Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-11-29 13:41:41 -0800 (Tue, 29 Nov 2011) $'
10
 * '$Revision: 6710 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.IOException;
29
import java.io.InputStream;
30
import java.math.BigInteger;
31
import java.security.NoSuchAlgorithmException;
32
import java.sql.SQLException;
33
import java.util.Date;
34
import java.util.Hashtable;
35
import java.util.Vector;
36

    
37
import javax.xml.parsers.ParserConfigurationException;
38
import javax.xml.xpath.XPathExpressionException;
39

    
40
import org.apache.log4j.Logger;
41
import org.dataone.client.ObjectFormatCache;
42
import org.dataone.service.exceptions.BaseException;
43
import org.dataone.service.exceptions.NotFound;
44
import org.dataone.service.types.v1.Checksum;
45
import org.dataone.service.types.v1.Identifier;
46
import org.dataone.service.types.v1.NodeReference;
47
import org.dataone.service.types.v1.ObjectFormatIdentifier;
48
import org.dataone.service.types.v1.Subject;
49
import org.dataone.service.types.v1.SystemMetadata;
50
import org.dataone.service.types.v1.util.ChecksumUtil;
51
import org.dataone.service.util.DateTimeMarshaller;
52
import org.ecoinformatics.datamanager.DataManager;
53
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
54
import org.ecoinformatics.datamanager.parser.DataPackage;
55
import org.jibx.runtime.JiBXException;
56
import org.xml.sax.SAXException;
57

    
58
import edu.ucsb.nceas.metacat.AccessionNumber;
59
import edu.ucsb.nceas.metacat.AccessionNumberException;
60
import edu.ucsb.nceas.metacat.DBUtil;
61
import edu.ucsb.nceas.metacat.IdentifierManager;
62
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
63
import edu.ucsb.nceas.metacat.McdbException;
64
import edu.ucsb.nceas.metacat.MetacatHandler;
65
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
66
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
67
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
68
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
69
import edu.ucsb.nceas.metacat.properties.PropertyService;
70
import edu.ucsb.nceas.metacat.replication.ReplicationService;
71
import edu.ucsb.nceas.metacat.shared.HandlerException;
72
import edu.ucsb.nceas.metacat.util.DocumentUtil;
73
import edu.ucsb.nceas.utilities.ParseLSIDException;
74
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
75

    
76
public class SystemMetadataFactory {
77

    
78
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
79

    
80
	/**
81
	 * Creates a system metadata object for insertion into metacat
82
	 * 
83
	 * @param localId
84
	 *            The local document identifier
85
	 * @param user
86
	 *            The user submitting the system metadata document
87
	 * @param groups
88
	 *            The groups the user belongs to
89
	 * 
90
	 * @return sysMeta The system metadata object created
91
	 * @throws SAXException 
92
	 * @throws HandlerException 
93
	 * @throws AccessControlException 
94
	 */
95
	public static SystemMetadata createSystemMetadata(String localId)
96
			throws McdbException, McdbDocNotFoundException, SQLException,
97
			IOException, AccessionNumberException, ClassNotFoundException,
98
			InsufficientKarmaException, ParseLSIDException,
99
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
100
			JiBXException, AccessControlException, HandlerException, SAXException {
101
		
102
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
103
		logMetacat.debug("provided localId: " + localId);
104

    
105
		// create system metadata for the document
106
		SystemMetadata sysMeta = new SystemMetadata();
107
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
108
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
109
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
110
		String guid = null;
111
		try {
112
			// get the guid if it exists
113
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
114
		} catch (McdbDocNotFoundException dnfe) {
115
			// otherwise create the mapping
116
			logMetacat.debug("There was a problem getting the guid from "
117
							+ "the given localId (docid and revision). The error message was: "
118
							+ dnfe.getMessage());
119
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
120
			IdentifierManager.getInstance().createMapping(localId, localId);
121
			logMetacat.debug("Mapping created for " + localId);
122
			logMetacat.debug("accessionNumber: " + accNum);
123
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
124
		}
125
		Identifier identifier = new Identifier();
126
		identifier.setValue(guid);
127

    
128
		// set the id
129
		sysMeta.setIdentifier(identifier);
130

    
131
		// get the data or metadata object
132
		InputStream inputStream;
133
		try {
134
			inputStream = MetacatHandler.read(localId);
135
		} catch (ParseLSIDException ple) {
136
			logMetacat.debug("There was a problem parsing the LSID from "
137
					+ localId + ". The error message was: " + ple.getMessage());
138
			throw ple;
139

    
140
		} catch (PropertyNotFoundException pnfe) {
141
			logMetacat.debug("There was a problem finding a property. "
142
					+ "The error message was: " + pnfe.getMessage());
143
			throw pnfe;
144

    
145
		} catch (McdbException me) {
146
			logMetacat.debug("There was a Metacat problem. "
147
					+ "The error message was: " + me.getMessage());
148
			throw me;
149

    
150
		} catch (SQLException sqle) {
151
			logMetacat.debug("There was a SQL problem. "
152
					+ "The error message was: " + sqle.getMessage());
153
			throw sqle;
154

    
155
		} catch (ClassNotFoundException cnfe) {
156
			logMetacat.debug("There was a problem finding a class. "
157
					+ "The error message was: " + cnfe.getMessage());
158
			throw cnfe;
159

    
160
		} catch (IOException ioe) {
161
			logMetacat.debug("There was an I/O exception. "
162
					+ "The error message was: " + ioe.getMessage());
163
			throw ioe;
164

    
165
		} // end try()
166

    
167
		// get additional docinfo
168
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
169
		// set the default object format
170
		String doctype = docInfo.get("doctype");
171
		ObjectFormatIdentifier fmtid = null;
172

    
173
		// set the object format, fall back to defaults
174
		try {
175
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
176
		} catch (NotFound nfe) {
177

    
178
			try {
179
				// format is not registered, use default
180
				if (doctype.trim().equals("BIN")) {
181
					fmtid = ObjectFormatCache.getInstance().getFormat(
182
							"application/octet-stream").getFormatId();
183

    
184
				} else {
185
					fmtid = ObjectFormatCache.getInstance().getFormat(
186
							"text/plain").getFormatId();
187
				}
188

    
189
			} catch (NotFound nf) {
190
				logMetacat.error("There was a problem getting the default format "
191
								+ "from the ObjectFormatCache: "
192
								+ nf.getMessage());
193
				throw nf;
194
			}
195

    
196
		}
197

    
198
		sysMeta.setFormatId(fmtid);
199
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
200

    
201
		// further parse EML documents to get data object format,
202
		// describes and describedBy information
203
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
204
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
205
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
206
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
207
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
208
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
209
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
210
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
211

    
212
			try {
213

    
214
				DatabaseConnectionPoolInterface connectionPool = 
215
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
216
				DataManager dataManager = 
217
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
218
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
219

    
220
				// iterate through the data objects in the EML doc and add
221
				// sysmeta
222
				logMetacat.debug("In createSystemMetadata() the number of data "
223
								+ "entities is: "
224
								+ dataPackage.getEntityNumber());
225

    
226
				// iterate through data objects described by the EML
227
				for (int j = 0; j < dataPackage.getEntityList().length; j++) {
228

    
229
					String dataDocUrl = dataPackage.getEntityList()[j].getURL();
230
					String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
231
					// default to binary
232
					if (dataDocMimeType == null) {
233
						dataDocMimeType = ObjectFormatCache.getInstance()
234
								.getFormat("application/octet-stream")
235
								.getFormatId().getValue();
236
					}
237
					String dataDocLocalId = "";
238
					logMetacat.debug("Data local ID: " + dataDocLocalId);
239
					logMetacat.debug("Data URL     : " + dataDocUrl);
240
					logMetacat.debug("Data mime    : " + dataDocMimeType);
241

    
242
					// we only handle ecogrid urls right now
243
					String ecogridPrefix = "ecogrid://knb/";
244
					if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
245
						dataDocLocalId = dataDocUrl.substring(dataDocUrl
246
								.indexOf(ecogridPrefix)
247
								+ ecogridPrefix.length());
248

    
249
						// look up the guid for the data
250
						String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
251
						int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
252

    
253
						SystemMetadata dataSysMeta = null;
254
						// check if data system metadata exists
255
						String dataGuidString = null;
256
						try {
257
							dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
258
							dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
259
						} catch (McdbDocNotFoundException nf) {
260
							// System metadata for data doesn't exist yet, so
261
							// create it
262
							logMetacat.debug("There was not an existing system metadata "
263
											+ "document for " + dataDocLocalId);
264
							try {
265
								logMetacat.debug("Creating a system metadata "
266
										+ "document for " + dataDocLocalId);
267
								dataSysMeta = createSystemMetadata(dataDocLocalId);
268

    
269
								// now look it up again
270
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
271

    
272
								// set the guid
273
								Identifier dataGuid = new Identifier();
274
								dataGuid.setValue(dataGuidString);
275

    
276
								// set object format
277
								logMetacat.debug("Updating system metadata for "
278
												+ dataGuid.getValue() + " to "
279
												+ dataDocMimeType);
280
								try {
281
									ObjectFormatIdentifier fmt = 
282
										ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
283
									dataSysMeta.setFormatId(fmt);
284
								} catch (NotFound nfe) {
285
									logMetacat.debug("Couldn't find format identifier for: "
286
													+ dataDocMimeType
287
													+ ". Setting it to application/octet-stream.");
288
									ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
289
									newFmtid.setValue("application/octet-stream");
290
								}
291

    
292
								// update the values
293
								HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
294

    
295
							} catch (McdbDocNotFoundException mdnf) {
296
								mdnf.printStackTrace();
297
								throw mdnf;
298
							} catch (NumberFormatException nfe) {
299
								nfe.printStackTrace();
300
								throw nfe;
301
							} catch (AccessionNumberException ane) {
302
								ane.printStackTrace();
303
								throw ane;
304
							} catch (SQLException sqle) {
305
								sqle.printStackTrace();
306
								throw sqle;
307
							} catch (NoSuchAlgorithmException nsae) {
308
								nsae.printStackTrace();
309
								throw nsae;
310
							} catch (IOException ioe) {
311
								ioe.printStackTrace();
312
								throw ioe;
313
							} catch (PropertyNotFoundException pnfe) {
314
								pnfe.printStackTrace();
315
								throw pnfe;
316
							} catch (BaseException be) {
317
								be.printStackTrace();
318
								throw be;
319
							}
320
						}
321

    
322
					} // end if()
323

    
324
				} // end for()
325

    
326
			} catch (ParserConfigurationException pce) {
327
				logMetacat.debug("There was a problem parsing the EML document. "
328
								+ "The error message was: " + pce.getMessage());
329

    
330
			} catch (SAXException saxe) {
331
				logMetacat.debug("There was a problem traversing the EML document. "
332
								+ "The error message was: " + saxe.getMessage());
333

    
334
			} catch (XPathExpressionException xpee) {
335
				logMetacat.debug("There was a problem searching the EML document. "
336
								+ "The error message was: " + xpee.getMessage());
337
			} catch (Exception e) {
338
				logMetacat.debug("There was a problem creating System Metadata. "
339
								+ "The error message was: " + e.getMessage());
340
			} // end try()
341

    
342
		} // end if()
343

    
344
		// create the checksum
345
		inputStream = MetacatHandler.read(localId);
346
		String algorithm = "MD5";
347
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
348
		sysMeta.setChecksum(checksum);
349

    
350
		// set the size
351
		inputStream = MetacatHandler.read(localId);
352
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
353
		sysMeta.setSize(new BigInteger(sizeStr));
354

    
355
		// submitter
356
		Subject submitter = new Subject();
357
		submitter.setValue(docInfo.get("user_updated"));
358
		sysMeta.setSubmitter(submitter);
359
		
360
		// rights holder
361
		Subject owner = new Subject();
362
		owner.setValue(docInfo.get("user_owner"));
363
		sysMeta.setRightsHolder(owner);
364

    
365
		// dates
366
		String createdDateString = docInfo.get("date_created");
367
		String updatedDateString = docInfo.get("date_updated");
368
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
369
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
370
		sysMeta.setDateUploaded(createdDate);
371
		sysMeta.setDateSysMetadataModified(updatedDate);
372
		
373
		// set the revision history
374
		String docidWithoutRev = accNum.getDocid();
375
		Identifier obsoletedBy = new Identifier();
376
		Identifier obsoletes = new Identifier();
377
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
378
		for (int existingRev: revisions) {
379
			// use the docid+rev as the guid
380
			String existingPid = docidWithoutRev + "." + existingRev;
381
			if (existingRev < rev) {
382
				// it's the old docid, until it's not
383
				obsoletes.setValue(existingPid);
384
			}
385
			if (existingRev > rev) {
386
				// it's the newer docid
387
				obsoletedBy.setValue(existingPid);
388
				// only want the version just after it
389
				break;
390
			}
391
		}
392
		// set them
393
		sysMeta.setObsoletedBy(obsoletedBy);
394
		sysMeta.setObsoletes(obsoletes);
395
		
396
		// TODO: access control?
397
		// I believe the access control will be fine since we use the same storage mechanism for Metacat/D1
398
		
399
		// authoritative node
400
		NodeReference nr = new NodeReference();
401
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
402
		sysMeta.setOriginMemberNode(nr);
403
		sysMeta.setAuthoritativeMemberNode(nr);
404

    
405
		return sysMeta;
406
	}
407

    
408
	/**
409
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
410
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
411
	 * evaluated.
412
	 * 
413
	 * @param is The InputStream of bytes
414
	 * 
415
	 * @return size The size in bytes of the input stream as a long
416
	 * 
417
	 * @throws IOException
418
	 */
419
	private static long sizeOfStream(InputStream is) throws IOException {
420

    
421
		long size = 0;
422
		byte[] b = new byte[1024];
423
		int numread = is.read(b, 0, 1024);
424
		while (numread != -1) {
425
			size += numread;
426
			numread = is.read(b, 0, 1024);
427
		}
428
		return size;
429

    
430
	}
431
}
(5-5/5)