Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-11-29 13:39:31 -0800 (Tue, 29 Nov 2011) $'
10
 * '$Revision: 6709 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.IOException;
29
import java.io.InputStream;
30
import java.math.BigInteger;
31
import java.security.NoSuchAlgorithmException;
32
import java.sql.SQLException;
33
import java.text.ParseException;
34
import java.text.SimpleDateFormat;
35
import java.util.Date;
36
import java.util.Hashtable;
37
import java.util.TimeZone;
38
import java.util.Vector;
39

    
40
import javax.xml.parsers.ParserConfigurationException;
41
import javax.xml.xpath.XPathExpressionException;
42

    
43
import org.apache.log4j.Logger;
44
import org.dataone.client.ObjectFormatCache;
45
import org.dataone.service.exceptions.BaseException;
46
import org.dataone.service.exceptions.NotFound;
47
import org.dataone.service.types.v1.Checksum;
48
import org.dataone.service.types.v1.Identifier;
49
import org.dataone.service.types.v1.NodeReference;
50
import org.dataone.service.types.v1.ObjectFormatIdentifier;
51
import org.dataone.service.types.v1.Subject;
52
import org.dataone.service.types.v1.SystemMetadata;
53
import org.dataone.service.types.v1.util.ChecksumUtil;
54
import org.dataone.service.util.DateTimeMarshaller;
55
import org.ecoinformatics.datamanager.DataManager;
56
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
57
import org.ecoinformatics.datamanager.parser.DataPackage;
58
import org.jibx.runtime.JiBXException;
59
import org.xml.sax.SAXException;
60

    
61
import edu.ucsb.nceas.metacat.AccessionNumber;
62
import edu.ucsb.nceas.metacat.AccessionNumberException;
63
import edu.ucsb.nceas.metacat.DBUtil;
64
import edu.ucsb.nceas.metacat.IdentifierManager;
65
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
66
import edu.ucsb.nceas.metacat.McdbException;
67
import edu.ucsb.nceas.metacat.MetacatHandler;
68
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
69
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
70
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
71
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
72
import edu.ucsb.nceas.metacat.properties.PropertyService;
73
import edu.ucsb.nceas.metacat.replication.ReplicationService;
74
import edu.ucsb.nceas.metacat.shared.HandlerException;
75
import edu.ucsb.nceas.metacat.util.DocumentUtil;
76
import edu.ucsb.nceas.utilities.ParseLSIDException;
77
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
78

    
79
public class SystemMetadataFactory {
80

    
81
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
82

    
83
	/**
84
	 * Creates a system metadata object for insertion into metacat
85
	 * 
86
	 * @param localId
87
	 *            The local document identifier
88
	 * @param user
89
	 *            The user submitting the system metadata document
90
	 * @param groups
91
	 *            The groups the user belongs to
92
	 * 
93
	 * @return sysMeta The system metadata object created
94
	 * @throws SAXException 
95
	 * @throws HandlerException 
96
	 * @throws AccessControlException 
97
	 */
98
	public static SystemMetadata createSystemMetadata(String localId)
99
			throws McdbException, McdbDocNotFoundException, SQLException,
100
			IOException, AccessionNumberException, ClassNotFoundException,
101
			InsufficientKarmaException, ParseLSIDException,
102
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
103
			JiBXException, AccessControlException, HandlerException, SAXException {
104
		
105
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
106
		logMetacat.debug("provided localId: " + localId);
107

    
108
		// create system metadata for the document
109
		SystemMetadata sysMeta = new SystemMetadata();
110
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
111
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
112
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
113
		String guid = null;
114
		try {
115
			// get the guid if it exists
116
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
117
		} catch (McdbDocNotFoundException dnfe) {
118
			// otherwise create the mapping
119
			logMetacat.debug("There was a problem getting the guid from "
120
							+ "the given localId (docid and revision). The error message was: "
121
							+ dnfe.getMessage());
122
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
123
			IdentifierManager.getInstance().createMapping(localId, localId);
124
			logMetacat.debug("Mapping created for " + localId);
125
			logMetacat.debug("accessionNumber: " + accNum);
126
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
127
		}
128
		Identifier identifier = new Identifier();
129
		identifier.setValue(guid);
130

    
131
		// set the id
132
		sysMeta.setIdentifier(identifier);
133

    
134
		// get the data or metadata object
135
		InputStream inputStream;
136
		try {
137
			inputStream = MetacatHandler.read(localId);
138
		} catch (ParseLSIDException ple) {
139
			logMetacat.debug("There was a problem parsing the LSID from "
140
					+ localId + ". The error message was: " + ple.getMessage());
141
			throw ple;
142

    
143
		} catch (PropertyNotFoundException pnfe) {
144
			logMetacat.debug("There was a problem finding a property. "
145
					+ "The error message was: " + pnfe.getMessage());
146
			throw pnfe;
147

    
148
		} catch (McdbException me) {
149
			logMetacat.debug("There was a Metacat problem. "
150
					+ "The error message was: " + me.getMessage());
151
			throw me;
152

    
153
		} catch (SQLException sqle) {
154
			logMetacat.debug("There was a SQL problem. "
155
					+ "The error message was: " + sqle.getMessage());
156
			throw sqle;
157

    
158
		} catch (ClassNotFoundException cnfe) {
159
			logMetacat.debug("There was a problem finding a class. "
160
					+ "The error message was: " + cnfe.getMessage());
161
			throw cnfe;
162

    
163
		} catch (IOException ioe) {
164
			logMetacat.debug("There was an I/O exception. "
165
					+ "The error message was: " + ioe.getMessage());
166
			throw ioe;
167

    
168
		} // end try()
169

    
170
		// get additional docinfo
171
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
172
		// set the default object format
173
		String doctype = docInfo.get("doctype");
174
		ObjectFormatIdentifier fmtid = null;
175

    
176
		// set the object format, fall back to defaults
177
		try {
178
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
179
		} catch (NotFound nfe) {
180

    
181
			try {
182
				// format is not registered, use default
183
				if (doctype.trim().equals("BIN")) {
184
					fmtid = ObjectFormatCache.getInstance().getFormat(
185
							"application/octet-stream").getFormatId();
186

    
187
				} else {
188
					fmtid = ObjectFormatCache.getInstance().getFormat(
189
							"text/plain").getFormatId();
190
				}
191

    
192
			} catch (NotFound nf) {
193
				logMetacat.error("There was a problem getting the default format "
194
								+ "from the ObjectFormatCache: "
195
								+ nf.getMessage());
196
				throw nf;
197
			}
198

    
199
		}
200

    
201
		sysMeta.setFormatId(fmtid);
202
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
203

    
204
		// further parse EML documents to get data object format,
205
		// describes and describedBy information
206
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
207
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
208
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
209
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
210
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
211
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
212
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
213
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
214

    
215
			try {
216

    
217
				DatabaseConnectionPoolInterface connectionPool = 
218
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
219
				DataManager dataManager = 
220
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
221
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
222

    
223
				// iterate through the data objects in the EML doc and add
224
				// sysmeta
225
				logMetacat.debug("In createSystemMetadata() the number of data "
226
								+ "entities is: "
227
								+ dataPackage.getEntityNumber());
228

    
229
				// iterate through data objects described by the EML
230
				for (int j = 0; j < dataPackage.getEntityList().length; j++) {
231

    
232
					String dataDocUrl = dataPackage.getEntityList()[j].getURL();
233
					String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
234
					// default to binary
235
					if (dataDocMimeType == null) {
236
						dataDocMimeType = ObjectFormatCache.getInstance()
237
								.getFormat("application/octet-stream")
238
								.getFormatId().getValue();
239
					}
240
					String dataDocLocalId = "";
241
					logMetacat.debug("Data local ID: " + dataDocLocalId);
242
					logMetacat.debug("Data URL     : " + dataDocUrl);
243
					logMetacat.debug("Data mime    : " + dataDocMimeType);
244

    
245
					// we only handle ecogrid urls right now
246
					String ecogridPrefix = "ecogrid://knb/";
247
					if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
248
						dataDocLocalId = dataDocUrl.substring(dataDocUrl
249
								.indexOf(ecogridPrefix)
250
								+ ecogridPrefix.length());
251

    
252
						// look up the guid for the data
253
						String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
254
						int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
255

    
256
						SystemMetadata dataSysMeta = null;
257
						// check if data system metadata exists
258
						String dataGuidString = null;
259
						try {
260
							dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
261
							dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
262
						} catch (McdbDocNotFoundException nf) {
263
							// System metadata for data doesn't exist yet, so
264
							// create it
265
							logMetacat.debug("There was not an existing system metadata "
266
											+ "document for " + dataDocLocalId);
267
							try {
268
								logMetacat.debug("Creating a system metadata "
269
										+ "document for " + dataDocLocalId);
270
								dataSysMeta = createSystemMetadata(dataDocLocalId);
271

    
272
								// now look it up again
273
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
274

    
275
								// set the guid
276
								Identifier dataGuid = new Identifier();
277
								dataGuid.setValue(dataGuidString);
278

    
279
								// set object format
280
								logMetacat.debug("Updating system metadata for "
281
												+ dataGuid.getValue() + " to "
282
												+ dataDocMimeType);
283
								try {
284
									ObjectFormatIdentifier fmt = 
285
										ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
286
									dataSysMeta.setFormatId(fmt);
287
								} catch (NotFound nfe) {
288
									logMetacat.debug("Couldn't find format identifier for: "
289
													+ dataDocMimeType
290
													+ ". Setting it to application/octet-stream.");
291
									ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
292
									newFmtid.setValue("application/octet-stream");
293
								}
294

    
295
								// update the values
296
								HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
297

    
298
							} catch (McdbDocNotFoundException mdnf) {
299
								mdnf.printStackTrace();
300
								throw mdnf;
301
							} catch (NumberFormatException nfe) {
302
								nfe.printStackTrace();
303
								throw nfe;
304
							} catch (AccessionNumberException ane) {
305
								ane.printStackTrace();
306
								throw ane;
307
							} catch (SQLException sqle) {
308
								sqle.printStackTrace();
309
								throw sqle;
310
							} catch (NoSuchAlgorithmException nsae) {
311
								nsae.printStackTrace();
312
								throw nsae;
313
							} catch (IOException ioe) {
314
								ioe.printStackTrace();
315
								throw ioe;
316
							} catch (PropertyNotFoundException pnfe) {
317
								pnfe.printStackTrace();
318
								throw pnfe;
319
							} catch (BaseException be) {
320
								be.printStackTrace();
321
								throw be;
322
							}
323
						}
324

    
325
					} // end if()
326

    
327
				} // end for()
328

    
329
			} catch (ParserConfigurationException pce) {
330
				logMetacat.debug("There was a problem parsing the EML document. "
331
								+ "The error message was: " + pce.getMessage());
332

    
333
			} catch (SAXException saxe) {
334
				logMetacat.debug("There was a problem traversing the EML document. "
335
								+ "The error message was: " + saxe.getMessage());
336

    
337
			} catch (XPathExpressionException xpee) {
338
				logMetacat.debug("There was a problem searching the EML document. "
339
								+ "The error message was: " + xpee.getMessage());
340
			} catch (Exception e) {
341
				logMetacat.debug("There was a problem creating System Metadata. "
342
								+ "The error message was: " + e.getMessage());
343
			} // end try()
344

    
345
		} // end if()
346

    
347
		// create the checksum
348
		inputStream = MetacatHandler.read(localId);
349
		String algorithm = "MD5";
350
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
351
		sysMeta.setChecksum(checksum);
352

    
353
		// set the size
354
		inputStream = MetacatHandler.read(localId);
355
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
356
		sysMeta.setSize(new BigInteger(sizeStr));
357

    
358
		// submitter
359
		Subject submitter = new Subject();
360
		submitter.setValue((String) docInfo.get("user_updated"));
361
		sysMeta.setSubmitter(submitter);
362
		
363
		// rights holder
364
		Subject owner = new Subject();
365
		owner.setValue((String) docInfo.get("user_owner"));
366
		sysMeta.setRightsHolder(owner);
367

    
368
		// dates
369
		String createdDateString = docInfo.get("date_created");
370
		String updatedDateString = docInfo.get("date_updated");
371
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
372
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
373
		sysMeta.setDateUploaded(createdDate);
374
		sysMeta.setDateSysMetadataModified(updatedDate);
375
		
376
		// set the revision history
377
		String docidWithoutRev = accNum.getDocid();
378
		Identifier obsoletedBy = new Identifier();
379
		Identifier obsoletes = new Identifier();
380
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
381
		for (int existingRev: revisions) {
382
			// use the docid+rev as the guid
383
			String existingPid = docidWithoutRev + "." + existingRev;
384
			if (existingRev < rev) {
385
				// it's the old docid, until it's not
386
				obsoletes.setValue(existingPid );
387
			}
388
			if (existingRev > rev) {
389
				// it's the newer docid
390
				obsoletedBy.setValue(existingPid);
391
				// only want the version just after it
392
				break;
393
			}
394
		}
395
		// set them
396
		sysMeta.setObsoletedBy(obsoletedBy);
397
		sysMeta.setObsoletes(obsoletes);
398
		
399
		// TODO: access control?
400
		// I believe the access control will be fine since we use the same storage mechanism for Metacat/D1
401
		
402
		// authoritative node
403
		NodeReference nr = new NodeReference();
404
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
405
		sysMeta.setOriginMemberNode(nr);
406
		sysMeta.setAuthoritativeMemberNode(nr);
407

    
408
		return sysMeta;
409
	}
410

    
411
	/**
412
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
413
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
414
	 * evaluated.
415
	 * 
416
	 * @param is The InputStream of bytes
417
	 * 
418
	 * @return size The size in bytes of the input stream as a long
419
	 * 
420
	 * @throws IOException
421
	 */
422
	private static long sizeOfStream(InputStream is) throws IOException {
423

    
424
		long size = 0;
425
		byte[] b = new byte[1024];
426
		int numread = is.read(b, 0, 1024);
427
		while (numread != -1) {
428
			size += numread;
429
			numread = is.read(b, 0, 1024);
430
		}
431
		return size;
432

    
433
	}
434
}
(5-5/5)