Project

General

Profile

1 6705 leinfelder
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author$'
9
 *     '$Date$'
10
 * '$Revision$'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27
28 6712 leinfelder
import java.io.BufferedInputStream;
29 6705 leinfelder
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.security.NoSuchAlgorithmException;
33
import java.sql.SQLException;
34 6712 leinfelder
import java.util.ArrayList;
35 6705 leinfelder
import java.util.Date;
36 6712 leinfelder
import java.util.HashMap;
37 6705 leinfelder
import java.util.Hashtable;
38 6712 leinfelder
import java.util.List;
39
import java.util.Map;
40 6709 leinfelder
import java.util.Vector;
41 6705 leinfelder
42
import javax.xml.parsers.ParserConfigurationException;
43
import javax.xml.xpath.XPathExpressionException;
44
45 6712 leinfelder
import org.apache.commons.beanutils.BeanUtils;
46
import org.apache.commons.io.IOUtils;
47 6705 leinfelder
import org.apache.log4j.Logger;
48
import org.dataone.client.ObjectFormatCache;
49 6712 leinfelder
import org.dataone.ore.ResourceMapFactory;
50 6705 leinfelder
import org.dataone.service.exceptions.BaseException;
51
import org.dataone.service.exceptions.NotFound;
52
import org.dataone.service.types.v1.Checksum;
53
import org.dataone.service.types.v1.Identifier;
54
import org.dataone.service.types.v1.NodeReference;
55
import org.dataone.service.types.v1.ObjectFormatIdentifier;
56
import org.dataone.service.types.v1.Subject;
57
import org.dataone.service.types.v1.SystemMetadata;
58
import org.dataone.service.types.v1.util.ChecksumUtil;
59 6709 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
60 6712 leinfelder
import org.dspace.foresite.ResourceMap;
61 6705 leinfelder
import org.ecoinformatics.datamanager.DataManager;
62
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
63
import org.ecoinformatics.datamanager.parser.DataPackage;
64
import org.jibx.runtime.JiBXException;
65
import org.xml.sax.SAXException;
66
67
import edu.ucsb.nceas.metacat.AccessionNumber;
68
import edu.ucsb.nceas.metacat.AccessionNumberException;
69 6709 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
70 6705 leinfelder
import edu.ucsb.nceas.metacat.IdentifierManager;
71
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
72
import edu.ucsb.nceas.metacat.McdbException;
73 6712 leinfelder
import edu.ucsb.nceas.metacat.MetaCatServlet;
74 6705 leinfelder
import edu.ucsb.nceas.metacat.MetacatHandler;
75 6708 leinfelder
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
76 6705 leinfelder
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
77
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
78
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
79
import edu.ucsb.nceas.metacat.properties.PropertyService;
80 6708 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
81
import edu.ucsb.nceas.metacat.shared.HandlerException;
82 6705 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
83
import edu.ucsb.nceas.utilities.ParseLSIDException;
84
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
85
86
public class SystemMetadataFactory {
87 6706 leinfelder
88 6707 leinfelder
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
89 6712 leinfelder
90 6705 leinfelder
	/**
91 6706 leinfelder
	 * Creates a system metadata object for insertion into metacat
92
	 *
93
	 * @param localId
94
	 *            The local document identifier
95
	 * @param user
96
	 *            The user submitting the system metadata document
97
	 * @param groups
98
	 *            The groups the user belongs to
99
	 *
100
	 * @return sysMeta The system metadata object created
101 6708 leinfelder
	 * @throws SAXException
102
	 * @throws HandlerException
103
	 * @throws AccessControlException
104 6706 leinfelder
	 */
105 6712 leinfelder
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE)
106 6706 leinfelder
			throws McdbException, McdbDocNotFoundException, SQLException,
107
			IOException, AccessionNumberException, ClassNotFoundException,
108
			InsufficientKarmaException, ParseLSIDException,
109
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
110 6708 leinfelder
			JiBXException, AccessControlException, HandlerException, SAXException {
111 6707 leinfelder
112 6706 leinfelder
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
113
		logMetacat.debug("provided localId: " + localId);
114 6705 leinfelder
115 6706 leinfelder
		// create system metadata for the document
116
		SystemMetadata sysMeta = new SystemMetadata();
117
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
118 6707 leinfelder
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
119 6706 leinfelder
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
120
		String guid = null;
121
		try {
122
			// get the guid if it exists
123 6707 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
124 6706 leinfelder
		} catch (McdbDocNotFoundException dnfe) {
125
			// otherwise create the mapping
126 6707 leinfelder
			logMetacat.debug("There was a problem getting the guid from "
127 6706 leinfelder
							+ "the given localId (docid and revision). The error message was: "
128
							+ dnfe.getMessage());
129 6707 leinfelder
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
130 6706 leinfelder
			IdentifierManager.getInstance().createMapping(localId, localId);
131
			logMetacat.debug("Mapping created for " + localId);
132
			logMetacat.debug("accessionNumber: " + accNum);
133 6707 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
134 6706 leinfelder
		}
135
		Identifier identifier = new Identifier();
136
		identifier.setValue(guid);
137 6705 leinfelder
138 6706 leinfelder
		// set the id
139
		sysMeta.setIdentifier(identifier);
140 6705 leinfelder
141 6706 leinfelder
		// get the data or metadata object
142
		InputStream inputStream;
143
		try {
144
			inputStream = MetacatHandler.read(localId);
145
		} catch (ParseLSIDException ple) {
146
			logMetacat.debug("There was a problem parsing the LSID from "
147
					+ localId + ". The error message was: " + ple.getMessage());
148
			throw ple;
149 6705 leinfelder
150 6706 leinfelder
		} catch (PropertyNotFoundException pnfe) {
151
			logMetacat.debug("There was a problem finding a property. "
152
					+ "The error message was: " + pnfe.getMessage());
153
			throw pnfe;
154
155
		} catch (McdbException me) {
156
			logMetacat.debug("There was a Metacat problem. "
157
					+ "The error message was: " + me.getMessage());
158
			throw me;
159
160
		} catch (SQLException sqle) {
161
			logMetacat.debug("There was a SQL problem. "
162
					+ "The error message was: " + sqle.getMessage());
163
			throw sqle;
164
165
		} catch (ClassNotFoundException cnfe) {
166
			logMetacat.debug("There was a problem finding a class. "
167
					+ "The error message was: " + cnfe.getMessage());
168
			throw cnfe;
169
170
		} catch (IOException ioe) {
171
			logMetacat.debug("There was an I/O exception. "
172
					+ "The error message was: " + ioe.getMessage());
173
			throw ioe;
174
175
		} // end try()
176
177
		// get additional docinfo
178 6708 leinfelder
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
179 6706 leinfelder
		// set the default object format
180 6708 leinfelder
		String doctype = docInfo.get("doctype");
181 6706 leinfelder
		ObjectFormatIdentifier fmtid = null;
182
183
		// set the object format, fall back to defaults
184
		try {
185 6707 leinfelder
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
186 6706 leinfelder
		} catch (NotFound nfe) {
187
188
			try {
189
				// format is not registered, use default
190
				if (doctype.trim().equals("BIN")) {
191
					fmtid = ObjectFormatCache.getInstance().getFormat(
192
							"application/octet-stream").getFormatId();
193
194
				} else {
195
					fmtid = ObjectFormatCache.getInstance().getFormat(
196
							"text/plain").getFormatId();
197
				}
198
199
			} catch (NotFound nf) {
200 6707 leinfelder
				logMetacat.error("There was a problem getting the default format "
201 6706 leinfelder
								+ "from the ObjectFormatCache: "
202
								+ nf.getMessage());
203
				throw nf;
204
			}
205
206
		}
207
208
		sysMeta.setFormatId(fmtid);
209 6707 leinfelder
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
210 6706 leinfelder
211
		// further parse EML documents to get data object format,
212
		// describes and describedBy information
213
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
214
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
215
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
216
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
217
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
218
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
219
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
220
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
221
222
			try {
223
224 6707 leinfelder
				DatabaseConnectionPoolInterface connectionPool =
225
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
226
				DataManager dataManager =
227
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
228
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
229 6706 leinfelder
230
				// iterate through the data objects in the EML doc and add
231
				// sysmeta
232 6707 leinfelder
				logMetacat.debug("In createSystemMetadata() the number of data "
233 6706 leinfelder
								+ "entities is: "
234
								+ dataPackage.getEntityNumber());
235
236 6712 leinfelder
				// for generating the ORE map
237
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
238
	            List<Identifier> dataIds = new ArrayList<Identifier>();
239
240 6706 leinfelder
				// iterate through data objects described by the EML
241
				for (int j = 0; j < dataPackage.getEntityList().length; j++) {
242
243
					String dataDocUrl = dataPackage.getEntityList()[j].getURL();
244 6707 leinfelder
					String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
245 6706 leinfelder
					// default to binary
246
					if (dataDocMimeType == null) {
247
						dataDocMimeType = ObjectFormatCache.getInstance()
248
								.getFormat("application/octet-stream")
249
								.getFormatId().getValue();
250
					}
251
					String dataDocLocalId = "";
252
					logMetacat.debug("Data local ID: " + dataDocLocalId);
253
					logMetacat.debug("Data URL     : " + dataDocUrl);
254
					logMetacat.debug("Data mime    : " + dataDocMimeType);
255
256
					// we only handle ecogrid urls right now
257
					String ecogridPrefix = "ecogrid://knb/";
258
					if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
259
						dataDocLocalId = dataDocUrl.substring(dataDocUrl
260
								.indexOf(ecogridPrefix)
261
								+ ecogridPrefix.length());
262
263
						// look up the guid for the data
264 6707 leinfelder
						String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
265
						int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
266 6706 leinfelder
267
						SystemMetadata dataSysMeta = null;
268
						// check if data system metadata exists
269
						String dataGuidString = null;
270
						try {
271 6707 leinfelder
							dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
272
							dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
273 6706 leinfelder
						} catch (McdbDocNotFoundException nf) {
274
							// System metadata for data doesn't exist yet, so
275
							// create it
276 6707 leinfelder
							logMetacat.debug("There was not an existing system metadata "
277 6706 leinfelder
											+ "document for " + dataDocLocalId);
278
							try {
279
								logMetacat.debug("Creating a system metadata "
280
										+ "document for " + dataDocLocalId);
281 6712 leinfelder
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE);
282 6706 leinfelder
283
								// now look it up again
284 6707 leinfelder
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
285 6706 leinfelder
286
								// set the guid
287
								Identifier dataGuid = new Identifier();
288
								dataGuid.setValue(dataGuidString);
289
290
								// set object format
291 6707 leinfelder
								logMetacat.debug("Updating system metadata for "
292 6706 leinfelder
												+ dataGuid.getValue() + " to "
293
												+ dataDocMimeType);
294
								try {
295 6707 leinfelder
									ObjectFormatIdentifier fmt =
296
										ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
297 6706 leinfelder
									dataSysMeta.setFormatId(fmt);
298
								} catch (NotFound nfe) {
299 6707 leinfelder
									logMetacat.debug("Couldn't find format identifier for: "
300 6706 leinfelder
													+ dataDocMimeType
301
													+ ". Setting it to application/octet-stream.");
302
									ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
303 6707 leinfelder
									newFmtid.setValue("application/octet-stream");
304 6706 leinfelder
								}
305
306
								// update the values
307 6707 leinfelder
								HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
308 6712 leinfelder
309
								// part of the ORE package
310
								dataIds.add(dataGuid);
311 6706 leinfelder
312
							} catch (McdbDocNotFoundException mdnf) {
313
								mdnf.printStackTrace();
314
								throw mdnf;
315
							} catch (NumberFormatException nfe) {
316
								nfe.printStackTrace();
317
								throw nfe;
318
							} catch (AccessionNumberException ane) {
319
								ane.printStackTrace();
320
								throw ane;
321
							} catch (SQLException sqle) {
322
								sqle.printStackTrace();
323
								throw sqle;
324
							} catch (NoSuchAlgorithmException nsae) {
325
								nsae.printStackTrace();
326
								throw nsae;
327
							} catch (IOException ioe) {
328
								ioe.printStackTrace();
329
								throw ioe;
330
							} catch (PropertyNotFoundException pnfe) {
331
								pnfe.printStackTrace();
332
								throw pnfe;
333
							} catch (BaseException be) {
334
								be.printStackTrace();
335
								throw be;
336
							}
337
						}
338
339
					} // end if()
340
341 6712 leinfelder
				} // end for (data entities)
342
343
				// ORE map
344 6713 leinfelder
				if (includeORE) {
345
			        if (!dataIds.isEmpty()) {
346
			            // generate the ORE map for this datapackage
347
			            Identifier resourceMapId = new Identifier();
348
			            resourceMapId.setValue("resourceMap_" + sysMeta.getIdentifier().getValue());
349
			            idMap.put(sysMeta.getIdentifier(), dataIds);
350
			            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
351
			            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
352
			            // copy most of the same system metadata as the packaging metadata
353
			            SystemMetadata resourceMapSysMeta = new SystemMetadata();
354
			            BeanUtils.copyProperties(resourceMapXML, sysMeta);
355
			            resourceMapSysMeta.setIdentifier(resourceMapId);
356
			            Checksum checksum = ChecksumUtil.checksum(new BufferedInputStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING)), "MD5");
357
						resourceMapSysMeta.setChecksum(checksum);
358
			            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
359
						resourceMapSysMeta.setFormatId(formatId);
360
			            // TODO: other fields to update?
361
362
						// save it locally
363
						MNodeService.getInstance(null).create(
364
								null,
365
								resourceMapId,
366
								IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING),
367
								resourceMapSysMeta);
368
			        }
369
				}
370 6706 leinfelder
371
			} catch (ParserConfigurationException pce) {
372 6707 leinfelder
				logMetacat.debug("There was a problem parsing the EML document. "
373 6706 leinfelder
								+ "The error message was: " + pce.getMessage());
374
375
			} catch (SAXException saxe) {
376 6707 leinfelder
				logMetacat.debug("There was a problem traversing the EML document. "
377 6706 leinfelder
								+ "The error message was: " + saxe.getMessage());
378
379
			} catch (XPathExpressionException xpee) {
380 6707 leinfelder
				logMetacat.debug("There was a problem searching the EML document. "
381 6706 leinfelder
								+ "The error message was: " + xpee.getMessage());
382
			} catch (Exception e) {
383 6707 leinfelder
				logMetacat.debug("There was a problem creating System Metadata. "
384 6706 leinfelder
								+ "The error message was: " + e.getMessage());
385
			} // end try()
386
387
		} // end if()
388 6712 leinfelder
389 6706 leinfelder
390
		// create the checksum
391
		inputStream = MetacatHandler.read(localId);
392
		String algorithm = "MD5";
393
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
394
		sysMeta.setChecksum(checksum);
395
396
		// set the size
397
		inputStream = MetacatHandler.read(localId);
398
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
399
		sysMeta.setSize(new BigInteger(sizeStr));
400
401
		// submitter
402 6708 leinfelder
		Subject submitter = new Subject();
403 6710 leinfelder
		submitter.setValue(docInfo.get("user_updated"));
404 6708 leinfelder
		sysMeta.setSubmitter(submitter);
405 6709 leinfelder
406 6708 leinfelder
		// rights holder
407
		Subject owner = new Subject();
408 6710 leinfelder
		owner.setValue(docInfo.get("user_owner"));
409 6708 leinfelder
		sysMeta.setRightsHolder(owner);
410 6706 leinfelder
411 6709 leinfelder
		// dates
412
		String createdDateString = docInfo.get("date_created");
413
		String updatedDateString = docInfo.get("date_updated");
414
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
415
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);
416
		sysMeta.setDateUploaded(createdDate);
417
		sysMeta.setDateSysMetadataModified(updatedDate);
418
419
		// set the revision history
420
		String docidWithoutRev = accNum.getDocid();
421
		Identifier obsoletedBy = new Identifier();
422
		Identifier obsoletes = new Identifier();
423
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
424
		for (int existingRev: revisions) {
425
			// use the docid+rev as the guid
426
			String existingPid = docidWithoutRev + "." + existingRev;
427
			if (existingRev < rev) {
428
				// it's the old docid, until it's not
429 6710 leinfelder
				obsoletes.setValue(existingPid);
430 6709 leinfelder
			}
431
			if (existingRev > rev) {
432
				// it's the newer docid
433
				obsoletedBy.setValue(existingPid);
434
				// only want the version just after it
435
				break;
436
			}
437 6706 leinfelder
		}
438 6709 leinfelder
		// set them
439
		sysMeta.setObsoletedBy(obsoletedBy);
440
		sysMeta.setObsoletes(obsoletes);
441
442
		// TODO: access control?
443
		// I believe the access control will be fine since we use the same storage mechanism for Metacat/D1
444
445
		// authoritative node
446 6706 leinfelder
		NodeReference nr = new NodeReference();
447
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
448
		sysMeta.setOriginMemberNode(nr);
449
		sysMeta.setAuthoritativeMemberNode(nr);
450
451
		return sysMeta;
452
	}
453
454 6707 leinfelder
	/**
455 6706 leinfelder
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
456
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
457
	 * evaluated.
458
	 *
459
	 * @param is The InputStream of bytes
460
	 *
461
	 * @return size The size in bytes of the input stream as a long
462
	 *
463
	 * @throws IOException
464
	 */
465
	private static long sizeOfStream(InputStream is) throws IOException {
466
467
		long size = 0;
468
		byte[] b = new byte[1024];
469
		int numread = is.read(b, 0, 1024);
470
		while (numread != -1) {
471
			size += numread;
472
			numread = is.read(b, 0, 1024);
473
		}
474
		return size;
475
476
	}
477 6705 leinfelder
}