Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-11-29 12:33:35 -0800 (Tue, 29 Nov 2011) $'
10
 * '$Revision: 6706 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.IOException;
29
import java.io.InputStream;
30
import java.math.BigInteger;
31
import java.security.NoSuchAlgorithmException;
32
import java.sql.SQLException;
33
import java.text.ParseException;
34
import java.text.SimpleDateFormat;
35
import java.util.Date;
36
import java.util.Hashtable;
37
import java.util.TimeZone;
38

    
39
import javax.xml.parsers.ParserConfigurationException;
40
import javax.xml.xpath.XPathExpressionException;
41

    
42
import org.apache.log4j.Logger;
43
import org.dataone.client.ObjectFormatCache;
44
import org.dataone.service.exceptions.BaseException;
45
import org.dataone.service.exceptions.NotFound;
46
import org.dataone.service.types.v1.Checksum;
47
import org.dataone.service.types.v1.Identifier;
48
import org.dataone.service.types.v1.NodeReference;
49
import org.dataone.service.types.v1.ObjectFormatIdentifier;
50
import org.dataone.service.types.v1.Subject;
51
import org.dataone.service.types.v1.SystemMetadata;
52
import org.dataone.service.types.v1.util.ChecksumUtil;
53
import org.ecoinformatics.datamanager.DataManager;
54
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
55
import org.ecoinformatics.datamanager.parser.DataPackage;
56
import org.jibx.runtime.JiBXException;
57
import org.xml.sax.SAXException;
58

    
59
import edu.ucsb.nceas.metacat.AccessionNumber;
60
import edu.ucsb.nceas.metacat.AccessionNumberException;
61
import edu.ucsb.nceas.metacat.IdentifierManager;
62
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
63
import edu.ucsb.nceas.metacat.McdbException;
64
import edu.ucsb.nceas.metacat.MetacatHandler;
65
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
66
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
67
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
68
import edu.ucsb.nceas.metacat.properties.PropertyService;
69
import edu.ucsb.nceas.metacat.util.DocumentUtil;
70
import edu.ucsb.nceas.utilities.ParseLSIDException;
71
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
72

    
73
public class SystemMetadataFactory {
74

    
75
	private static Logger logMetacat = Logger
76
			.getLogger(SystemMetadataFactory.class);
77

    
78
	/**
79
	 * Creates a system metadata object for insertion into metacat
80
	 * 
81
	 * @param localId
82
	 *            The local document identifier
83
	 * @param user
84
	 *            The user submitting the system metadata document
85
	 * @param groups
86
	 *            The groups the user belongs to
87
	 * 
88
	 * @return sysMeta The system metadata object created
89
	 */
90
	public static SystemMetadata createSystemMetadata(String localId)
91
			throws McdbException, McdbDocNotFoundException, SQLException,
92
			IOException, AccessionNumberException, ClassNotFoundException,
93
			InsufficientKarmaException, ParseLSIDException,
94
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
95
			JiBXException {
96
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
97
		logMetacat.debug("provided localId: " + localId);
98

    
99
		// create system metadata for the document
100
		SystemMetadata sysMeta = new SystemMetadata();
101
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
102
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(
103
				localId);
104
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
105
		String guid = null;
106
		try {
107
			// get the guid if it exists
108
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(),
109
					rev);
110
		} catch (McdbDocNotFoundException dnfe) {
111
			// otherwise create the mapping
112
			logMetacat
113
					.debug("There was a problem getting the guid from "
114
							+ "the given localId (docid and revision). The error message was: "
115
							+ dnfe.getMessage());
116
			logMetacat.debug("No guid in the identifier table.  adding it for "
117
					+ localId);
118
			IdentifierManager.getInstance().createMapping(localId, localId);
119
			logMetacat.debug("Mapping created for " + localId);
120
			logMetacat.debug("accessionNumber: " + accNum);
121
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(),
122
					rev);
123
		}
124
		Identifier identifier = new Identifier();
125
		identifier.setValue(guid);
126

    
127
		// set the id
128
		sysMeta.setIdentifier(identifier);
129

    
130
		// get the data or metadata object
131
		InputStream inputStream;
132
		try {
133
			inputStream = MetacatHandler.read(localId);
134
		} catch (ParseLSIDException ple) {
135
			logMetacat.debug("There was a problem parsing the LSID from "
136
					+ localId + ". The error message was: " + ple.getMessage());
137
			throw ple;
138

    
139
		} catch (PropertyNotFoundException pnfe) {
140
			logMetacat.debug("There was a problem finding a property. "
141
					+ "The error message was: " + pnfe.getMessage());
142
			throw pnfe;
143

    
144
		} catch (McdbException me) {
145
			logMetacat.debug("There was a Metacat problem. "
146
					+ "The error message was: " + me.getMessage());
147
			throw me;
148

    
149
		} catch (SQLException sqle) {
150
			logMetacat.debug("There was a SQL problem. "
151
					+ "The error message was: " + sqle.getMessage());
152
			throw sqle;
153

    
154
		} catch (ClassNotFoundException cnfe) {
155
			logMetacat.debug("There was a problem finding a class. "
156
					+ "The error message was: " + cnfe.getMessage());
157
			throw cnfe;
158

    
159
		} catch (IOException ioe) {
160
			logMetacat.debug("There was an I/O exception. "
161
					+ "The error message was: " + ioe.getMessage());
162
			throw ioe;
163

    
164
		} // end try()
165

    
166
		// get additional docinfo
167
		Hashtable<String, Object> docInfo = IdentifierManager.getInstance()
168
				.getDocumentInfo(localId);
169
		// set the default object format
170
		String doctype = (String) docInfo.get("doctype");
171
		ObjectFormatIdentifier fmtid = null;
172

    
173
		// set the object format, fall back to defaults
174
		try {
175
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype)
176
					.getFormatId();
177

    
178
		} catch (NotFound nfe) {
179

    
180
			try {
181
				// format is not registered, use default
182
				if (doctype.trim().equals("BIN")) {
183
					fmtid = ObjectFormatCache.getInstance().getFormat(
184
							"application/octet-stream").getFormatId();
185

    
186
				} else {
187
					fmtid = ObjectFormatCache.getInstance().getFormat(
188
							"text/plain").getFormatId();
189
				}
190

    
191
			} catch (NotFound nf) {
192
				logMetacat
193
						.error("There was a problem getting the default format "
194
								+ "from the ObjectFormatCache: "
195
								+ nf.getMessage());
196
				throw nf;
197
			}
198

    
199
		}
200

    
201
		sysMeta.setFormatId(fmtid);
202
		logMetacat.debug("The ObjectFormat for " + localId + " is "
203
				+ fmtid.getValue());
204

    
205
		// further parse EML documents to get data object format,
206
		// describes and describedBy information
207
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
208
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
209
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
210
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
211
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
212
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
213
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
214
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
215

    
216
			try {
217

    
218
				DatabaseConnectionPoolInterface connectionPool = MetacatDatabaseConnectionPoolFactory
219
						.getDatabaseConnectionPoolInterface();
220
				DataManager dataManager = DataManager.getInstance(
221
						connectionPool, connectionPool.getDBAdapterName());
222
				DataPackage dataPackage = dataManager
223
						.parseMetadata(inputStream);
224

    
225
				// iterate through the data objects in the EML doc and add
226
				// sysmeta
227
				logMetacat
228
						.debug("In createSystemMetadata() the number of data "
229
								+ "entities is: "
230
								+ dataPackage.getEntityNumber());
231

    
232
				// iterate through data objects described by the EML
233
				for (int j = 0; j < dataPackage.getEntityList().length; j++) {
234

    
235
					String dataDocUrl = dataPackage.getEntityList()[j].getURL();
236
					String dataDocMimeType = dataPackage.getEntityList()[j]
237
							.getDataFormat();
238
					// default to binary
239
					if (dataDocMimeType == null) {
240
						dataDocMimeType = ObjectFormatCache.getInstance()
241
								.getFormat("application/octet-stream")
242
								.getFormatId().getValue();
243
					}
244
					String dataDocLocalId = "";
245
					logMetacat.debug("Data local ID: " + dataDocLocalId);
246
					logMetacat.debug("Data URL     : " + dataDocUrl);
247
					logMetacat.debug("Data mime    : " + dataDocMimeType);
248

    
249
					// we only handle ecogrid urls right now
250
					String ecogridPrefix = "ecogrid://knb/";
251
					if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
252
						dataDocLocalId = dataDocUrl.substring(dataDocUrl
253
								.indexOf(ecogridPrefix)
254
								+ ecogridPrefix.length());
255

    
256
						// look up the guid for the data
257
						String dataDocid = DocumentUtil
258
								.getSmartDocId(dataDocLocalId);
259
						int dataRev = DocumentUtil
260
								.getRevisionFromAccessionNumber(dataDocLocalId);
261

    
262
						SystemMetadata dataSysMeta = null;
263
						// check if data system metadata exists
264
						String dataGuidString = null;
265
						try {
266
							dataGuidString = IdentifierManager.getInstance()
267
									.getGUID(dataDocid, dataRev);
268
							dataSysMeta = IdentifierManager.getInstance()
269
									.getSystemMetadata(dataGuidString);
270
						} catch (McdbDocNotFoundException nf) {
271
							// System metadata for data doesn't exist yet, so
272
							// create it
273
							logMetacat
274
									.debug("There was not an existing system metadata "
275
											+ "document for " + dataDocLocalId);
276
							try {
277
								logMetacat.debug("Creating a system metadata "
278
										+ "document for " + dataDocLocalId);
279
								dataSysMeta = createSystemMetadata(dataDocLocalId);
280

    
281
								// now look it up again
282
								dataGuidString = IdentifierManager
283
										.getInstance().getGUID(dataDocid,
284
												dataRev);
285

    
286
								// set the guid
287
								Identifier dataGuid = new Identifier();
288
								dataGuid.setValue(dataGuidString);
289

    
290
								// set object format
291
								logMetacat
292
										.debug("Updating system metadata for "
293
												+ dataGuid.getValue() + " to "
294
												+ dataDocMimeType);
295
								try {
296
									ObjectFormatIdentifier fmt = ObjectFormatCache
297
											.getInstance().getFormat(
298
													dataDocMimeType)
299
											.getFormatId();
300
									dataSysMeta.setFormatId(fmt);
301

    
302
								} catch (NotFound nfe) {
303
									logMetacat
304
											.debug("Couldn't find format identifier for: "
305
													+ dataDocMimeType
306
													+ ". Setting it to application/octet-stream.");
307
									ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
308
									newFmtid
309
											.setValue("application/octet-stream");
310

    
311
								}
312

    
313
								// update the values
314
								HazelcastService.getInstance()
315
										.getSystemMetadataMap().put(
316
												dataSysMeta.getIdentifier(),
317
												dataSysMeta);
318

    
319
							} catch (McdbDocNotFoundException mdnf) {
320
								mdnf.printStackTrace();
321
								throw mdnf;
322
							} catch (NumberFormatException nfe) {
323
								nfe.printStackTrace();
324
								throw nfe;
325
							} catch (AccessionNumberException ane) {
326
								ane.printStackTrace();
327
								throw ane;
328
							} catch (SQLException sqle) {
329
								sqle.printStackTrace();
330
								throw sqle;
331
							} catch (NoSuchAlgorithmException nsae) {
332
								nsae.printStackTrace();
333
								throw nsae;
334
							} catch (IOException ioe) {
335
								ioe.printStackTrace();
336
								throw ioe;
337
							} catch (PropertyNotFoundException pnfe) {
338
								pnfe.printStackTrace();
339
								throw pnfe;
340
							} catch (BaseException be) {
341
								be.printStackTrace();
342
								throw be;
343
							}
344
						}
345

    
346
					} // end if()
347

    
348
				} // end for()
349

    
350
			} catch (ParserConfigurationException pce) {
351
				logMetacat
352
						.debug("There was a problem parsing the EML document. "
353
								+ "The error message was: " + pce.getMessage());
354

    
355
			} catch (SAXException saxe) {
356
				logMetacat
357
						.debug("There was a problem traversing the EML document. "
358
								+ "The error message was: " + saxe.getMessage());
359

    
360
			} catch (XPathExpressionException xpee) {
361
				logMetacat
362
						.debug("There was a problem searching the EML document. "
363
								+ "The error message was: " + xpee.getMessage());
364
			} catch (Exception e) {
365
				logMetacat
366
						.debug("There was a problem creating System Metadata. "
367
								+ "The error message was: " + e.getMessage());
368
			} // end try()
369

    
370
		} // end if()
371

    
372
		// create the checksum
373
		inputStream = MetacatHandler.read(localId);
374
		String algorithm = "MD5";
375
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
376
		sysMeta.setChecksum(checksum);
377

    
378
		// set the size
379
		inputStream = MetacatHandler.read(localId);
380
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
381
		sysMeta.setSize(new BigInteger(sizeStr));
382

    
383
		// submitter
384
		Subject subject = new Subject();
385
		subject.setValue((String) docInfo.get("user_owner"));
386
		sysMeta.setSubmitter(subject);
387
		sysMeta.setRightsHolder(subject);
388

    
389
		try {
390
			SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-DD");
391
			dateFormat.setTimeZone(TimeZone.getDefault());
392

    
393
			Date dateCreated = dateFormat.parse((String) docInfo
394
					.get("date_created"));
395
			sysMeta.setDateUploaded(dateCreated);
396
			Date dateUpdated = dateFormat.parse((String) docInfo
397
					.get("date_updated"));
398
			sysMeta.setDateSysMetadataModified(dateUpdated);
399

    
400
		} catch (ParseException pe) {
401
			logMetacat.debug("There was a problem parsing a Metacat date. The "
402
					+ "error message was: " + pe.getMessage());
403
			Date dateCreated = new Date();
404
			sysMeta.setDateUploaded(dateCreated);
405
			Date dateUpdated = new Date();
406
			sysMeta.setDateSysMetadataModified(dateUpdated);
407

    
408
		}
409
		NodeReference nr = new NodeReference();
410
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
411
		sysMeta.setOriginMemberNode(nr);
412
		sysMeta.setAuthoritativeMemberNode(nr);
413

    
414
		return sysMeta;
415
	}
416

    
417
	/*
418
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
419
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
420
	 * evaluated.
421
	 * 
422
	 * @param is The InputStream of bytes
423
	 * 
424
	 * @return size The size in bytes of the input stream as a long
425
	 * 
426
	 * @throws IOException
427
	 */
428
	private static long sizeOfStream(InputStream is) throws IOException {
429

    
430
		long size = 0;
431
		byte[] b = new byte[1024];
432
		int numread = is.read(b, 0, 1024);
433
		while (numread != -1) {
434
			size += numread;
435
			numread = is.read(b, 0, 1024);
436
		}
437
		return size;
438

    
439
	}
440
}
(5-5/5)