Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-12-07 12:18:24 -0800 (Wed, 07 Dec 2011) $'
10
 * '$Revision: 6744 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.BufferedInputStream;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.security.NoSuchAlgorithmException;
33
import java.sql.SQLException;
34
import java.util.ArrayList;
35
import java.util.Collections;
36
import java.util.Date;
37
import java.util.HashMap;
38
import java.util.Hashtable;
39
import java.util.List;
40
import java.util.Map;
41
import java.util.Vector;
42

    
43
import javax.xml.parsers.ParserConfigurationException;
44
import javax.xml.xpath.XPathExpressionException;
45

    
46
import org.apache.commons.beanutils.BeanUtils;
47
import org.apache.commons.io.IOUtils;
48
import org.apache.log4j.Logger;
49
import org.apache.wicket.protocol.http.MockHttpServletRequest;
50
import org.dataone.client.ObjectFormatCache;
51
import org.dataone.ore.ResourceMapFactory;
52
import org.dataone.service.exceptions.BaseException;
53
import org.dataone.service.exceptions.NotFound;
54
import org.dataone.service.types.v1.AccessPolicy;
55
import org.dataone.service.types.v1.Checksum;
56
import org.dataone.service.types.v1.Identifier;
57
import org.dataone.service.types.v1.NodeReference;
58
import org.dataone.service.types.v1.ObjectFormatIdentifier;
59
import org.dataone.service.types.v1.Session;
60
import org.dataone.service.types.v1.Subject;
61
import org.dataone.service.types.v1.SystemMetadata;
62
import org.dataone.service.types.v1.util.ChecksumUtil;
63
import org.dataone.service.util.DateTimeMarshaller;
64
import org.dspace.foresite.ResourceMap;
65
import org.ecoinformatics.datamanager.DataManager;
66
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
67
import org.ecoinformatics.datamanager.parser.DataPackage;
68
import org.jibx.runtime.JiBXException;
69
import org.xml.sax.SAXException;
70

    
71
import edu.ucsb.nceas.metacat.AccessionNumber;
72
import edu.ucsb.nceas.metacat.AccessionNumberException;
73
import edu.ucsb.nceas.metacat.DBUtil;
74
import edu.ucsb.nceas.metacat.IdentifierManager;
75
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
76
import edu.ucsb.nceas.metacat.McdbException;
77
import edu.ucsb.nceas.metacat.MetaCatServlet;
78
import edu.ucsb.nceas.metacat.MetacatHandler;
79
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
80
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
81
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
82
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
83
import edu.ucsb.nceas.metacat.properties.PropertyService;
84
import edu.ucsb.nceas.metacat.replication.ReplicationService;
85
import edu.ucsb.nceas.metacat.shared.AccessException;
86
import edu.ucsb.nceas.metacat.shared.HandlerException;
87
import edu.ucsb.nceas.metacat.util.DocumentUtil;
88
import edu.ucsb.nceas.utilities.ParseLSIDException;
89
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
90

    
91
public class SystemMetadataFactory {
92

    
93
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
94
	
95
	/**
96
	 * Creates a system metadata object for insertion into metacat
97
	 * 
98
	 * @param localId
99
	 *            The local document identifier
100
	 * @param user
101
	 *            The user submitting the system metadata document
102
	 * @param groups
103
	 *            The groups the user belongs to
104
	 * 
105
	 * @return sysMeta The system metadata object created
106
	 * @throws SAXException 
107
	 * @throws HandlerException 
108
	 * @throws AccessControlException 
109
	 * @throws AccessException 
110
	 */
111
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE)
112
			throws McdbException, McdbDocNotFoundException, SQLException,
113
			IOException, AccessionNumberException, ClassNotFoundException,
114
			InsufficientKarmaException, ParseLSIDException,
115
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
116
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
117
		
118
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
119
		logMetacat.debug("provided localId: " + localId);
120

    
121
		// create system metadata for the document
122
		SystemMetadata sysMeta = new SystemMetadata();
123
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
124
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
125
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
126
		String guid = null;
127
		try {
128
			// get the guid if it exists
129
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
130
		} catch (McdbDocNotFoundException dnfe) {
131
			// otherwise create the mapping
132
			logMetacat.debug("There was a problem getting the guid from "
133
							+ "the given localId (docid and revision). The error message was: "
134
							+ dnfe.getMessage());
135
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
136
			IdentifierManager.getInstance().createMapping(localId, localId);
137
			logMetacat.debug("Mapping created for " + localId);
138
			logMetacat.debug("accessionNumber: " + accNum);
139
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
140
		}
141
		Identifier identifier = new Identifier();
142
		identifier.setValue(guid);
143

    
144
		// set the id
145
		sysMeta.setIdentifier(identifier);
146

    
147
		// get the data or metadata object
148
		InputStream inputStream;
149
		try {
150
			inputStream = MetacatHandler.read(localId);
151
		} catch (ParseLSIDException ple) {
152
			logMetacat.debug("There was a problem parsing the LSID from "
153
					+ localId + ". The error message was: " + ple.getMessage());
154
			throw ple;
155

    
156
		} catch (PropertyNotFoundException pnfe) {
157
			logMetacat.debug("There was a problem finding a property. "
158
					+ "The error message was: " + pnfe.getMessage());
159
			throw pnfe;
160

    
161
		} catch (McdbException me) {
162
			logMetacat.debug("There was a Metacat problem. "
163
					+ "The error message was: " + me.getMessage());
164
			throw me;
165

    
166
		} catch (SQLException sqle) {
167
			logMetacat.debug("There was a SQL problem. "
168
					+ "The error message was: " + sqle.getMessage());
169
			throw sqle;
170

    
171
		} catch (ClassNotFoundException cnfe) {
172
			logMetacat.debug("There was a problem finding a class. "
173
					+ "The error message was: " + cnfe.getMessage());
174
			throw cnfe;
175

    
176
		} catch (IOException ioe) {
177
			logMetacat.debug("There was an I/O exception. "
178
					+ "The error message was: " + ioe.getMessage());
179
			throw ioe;
180

    
181
		} // end try()
182

    
183
		// get additional docinfo
184
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
185
		// set the default object format
186
		String doctype = docInfo.get("doctype");
187
		ObjectFormatIdentifier fmtid = null;
188

    
189
		// set the object format, fall back to defaults
190
		try {
191
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
192
		} catch (NotFound nfe) {
193

    
194
			try {
195
				// format is not registered, use default
196
				if (doctype.trim().equals("BIN")) {
197
					fmtid = ObjectFormatCache.getInstance().getFormat(
198
							"application/octet-stream").getFormatId();
199

    
200
				} else {
201
					fmtid = ObjectFormatCache.getInstance().getFormat(
202
							"text/plain").getFormatId();
203
				}
204

    
205
			} catch (NotFound nf) {
206
				logMetacat.error("There was a problem getting the default format "
207
								+ "from the ObjectFormatCache: "
208
								+ nf.getMessage());
209
				throw nf;
210
			}
211

    
212
		}
213

    
214
		sysMeta.setFormatId(fmtid);
215
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
216

    
217
		// create the checksum
218
		inputStream = MetacatHandler.read(localId);
219
		String algorithm = "MD5";
220
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
221
		sysMeta.setChecksum(checksum);
222
		
223
		// set the size
224
		inputStream = MetacatHandler.read(localId);
225
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
226
		sysMeta.setSize(new BigInteger(sizeStr));
227
		
228
		// submitter
229
		Subject submitter = new Subject();
230
		submitter.setValue(docInfo.get("user_updated"));
231
		sysMeta.setSubmitter(submitter);
232
		
233
		// rights holder
234
		Subject owner = new Subject();
235
		owner.setValue(docInfo.get("user_owner"));
236
		sysMeta.setRightsHolder(owner);
237

    
238
		// dates
239
		String createdDateString = docInfo.get("date_created");
240
		String updatedDateString = docInfo.get("date_updated");
241
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
242
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
243
		sysMeta.setDateUploaded(createdDate);
244
		sysMeta.setDateSysMetadataModified(updatedDate);
245
		
246
		// set the revision history
247
		String docidWithoutRev = accNum.getDocid();
248
		Identifier obsoletedBy = null;
249
		Identifier obsoletes = null;
250
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
251
		// ensure this ordering since processing depends on it
252
		Collections.sort(revisions);
253
		for (int existingRev: revisions) {
254
			// use the docid+rev as the guid
255
			String existingPid = docidWithoutRev + "." + existingRev;
256
			if (existingRev < rev) {
257
				// it's the old docid, until it's not
258
				obsoletes = new Identifier();
259
				obsoletes.setValue(existingPid);
260
			}
261
			if (existingRev > rev) {
262
				// it's the newer docid
263
				obsoletedBy = new Identifier();
264
				obsoletedBy.setValue(existingPid);
265
				// only want the version just after it
266
				break;
267
			}
268
		}
269
		// set them on our object
270
		sysMeta.setObsoletedBy(obsoletedBy);
271
		sysMeta.setObsoletes(obsoletes);
272
		
273
		// update the system metadata for the object[s] we are revising
274
		if (obsoletedBy != null) {
275
			SystemMetadata obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
276
			if (obsoletedBySysMeta != null) {
277
				obsoletedBySysMeta.setObsoletes(identifier);
278
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
279
			}
280
		}
281
		if (obsoletes != null) {
282
			SystemMetadata obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
283
			if (obsoletesSysMeta != null) {
284
				obsoletesSysMeta.setObsoletedBy(identifier);
285
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
286
			}
287
		}
288
		
289
		// look up the access control policy we have in metacat
290
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
291
		sysMeta.setAccessPolicy(accessPolicy);
292
		
293
		// authoritative node
294
		NodeReference nr = new NodeReference();
295
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
296
		sysMeta.setOriginMemberNode(nr);
297
		sysMeta.setAuthoritativeMemberNode(nr);
298
		
299
		// further parse EML documents to get data object format,
300
		// describes and describedBy information
301
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
302
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
303
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
304
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
305
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
306
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
307
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
308
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
309

    
310
			try {
311
				inputStream = MetacatHandler.read(localId);
312
				DatabaseConnectionPoolInterface connectionPool = 
313
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
314
				DataManager dataManager = 
315
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
316
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
317

    
318
				// iterate through the data objects in the EML doc and add sysmeta
319
				logMetacat.debug("In createSystemMetadata() the number of data "
320
								+ "entities is: "
321
								+ dataPackage.getEntityNumber());
322

    
323
				// for generating the ORE map
324
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
325
	            List<Identifier> dataIds = new ArrayList<Identifier>();
326
				
327
				// iterate through data objects described by the EML
328
	            if (dataPackage.getEntityList() != null) {
329
					for (int j = 0; j < dataPackage.getEntityList().length; j++) {
330
	
331
						String dataDocUrl = dataPackage.getEntityList()[j].getURL();
332
						String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
333
						// default to binary
334
						if (dataDocMimeType == null) {
335
							dataDocMimeType = ObjectFormatCache.getInstance()
336
									.getFormat("application/octet-stream")
337
									.getFormatId().getValue();
338
						}
339
						String dataDocLocalId = "";
340
						logMetacat.debug("Data local ID: " + dataDocLocalId);
341
						logMetacat.debug("Data URL     : " + dataDocUrl);
342
						logMetacat.debug("Data mime    : " + dataDocMimeType);
343
	
344
						// we only handle ecogrid urls right now
345
						String ecogridPrefix = "ecogrid://knb/";
346
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
347
							dataDocLocalId = dataDocUrl.substring(dataDocUrl
348
									.indexOf(ecogridPrefix)
349
									+ ecogridPrefix.length());
350
	
351
							// look up the guid for the data
352
							String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
353
							int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
354
	
355
							// check if data system metadata exists already
356
							SystemMetadata dataSysMeta = null;
357
							String dataGuidString = null;
358
							Identifier dataGuid = new Identifier();
359
							try {
360
								// look for the identifier
361
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
362
								// set it
363
								dataGuid.setValue(dataGuidString);
364
								// look up the system metadata
365
								try {
366
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
367
								} catch (Exception e) {
368
									// probably not in the system
369
									dataSysMeta = null;
370
								}
371
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
372
							} catch (McdbDocNotFoundException nf) {
373
								// we didn't find it
374
								dataSysMeta = null;
375
							}
376
								
377
							// we'll have to generate it	
378
							if (dataSysMeta == null) {
379
								// System metadata for data doesn't exist yet, so create it
380
								logMetacat.debug("There was not an existing system metadata document for " + dataDocLocalId);
381
								try {
382
									logMetacat.debug("Creating a system metadata " + "document for " + dataDocLocalId);
383
									dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE);
384
	
385
									// now look it up again
386
									dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
387
	
388
									// set the guid
389
									dataGuid.setValue(dataGuidString);
390
	
391
									// set object format
392
									logMetacat.debug("Updating system metadata for "
393
													+ dataGuid.getValue() + " to "
394
													+ dataDocMimeType);
395
									try {
396
										ObjectFormatIdentifier fmt = 
397
											ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
398
										dataSysMeta.setFormatId(fmt);
399
									} catch (NotFound nfe) {
400
										logMetacat.debug("Couldn't find format identifier for: "
401
														+ dataDocMimeType
402
														+ ". Setting it to application/octet-stream.");
403
										ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
404
										newFmtid.setValue("application/octet-stream");
405
									}
406
	
407
									// update the values
408
									HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
409
									
410
	
411
								} catch (McdbDocNotFoundException mdnf) {
412
									mdnf.printStackTrace();
413
									throw mdnf;
414
								} catch (NumberFormatException nfe) {
415
									nfe.printStackTrace();
416
									throw nfe;
417
								} catch (AccessionNumberException ane) {
418
									ane.printStackTrace();
419
									throw ane;
420
								} catch (SQLException sqle) {
421
									sqle.printStackTrace();
422
									throw sqle;
423
								} catch (NoSuchAlgorithmException nsae) {
424
									nsae.printStackTrace();
425
									throw nsae;
426
								} catch (IOException ioe) {
427
									ioe.printStackTrace();
428
									throw ioe;
429
								} catch (PropertyNotFoundException pnfe) {
430
									pnfe.printStackTrace();
431
									throw pnfe;
432
								} catch (BaseException be) {
433
									be.printStackTrace();
434
									throw be;
435
								}	
436
								
437
							}
438
							
439
							// part of the ORE package
440
							dataIds.add(dataGuid);
441
	
442
						} // end if (EML package)
443
	
444
					} // end for (data entities)
445
					
446
	            } // data entities not null
447
	            
448
				// ORE map
449
				if (includeORE) {
450
			        if (!dataIds.isEmpty()) {
451
			            // generate the ORE map for this datapackage
452
			            Identifier resourceMapId = new Identifier();
453
			            // want to be able to run this over and over again for now
454
			            resourceMapId.setValue("resourceMap_" + sysMeta.getIdentifier().getValue() + "_" + System.currentTimeMillis());
455
			            idMap.put(sysMeta.getIdentifier(), dataIds);
456
			            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
457
			            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
458
			            // copy most of the same system metadata as the packaging metadata
459
			            SystemMetadata resourceMapSysMeta = new SystemMetadata();
460
			            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
461
			            resourceMapSysMeta.setIdentifier(resourceMapId);
462
			            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
463
						resourceMapSysMeta.setChecksum(oreChecksum);
464
			            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
465
						resourceMapSysMeta.setFormatId(formatId);
466
			            // TODO: other fields to update?
467
						resourceMapSysMeta.setObsoletes(null);
468
						resourceMapSysMeta.setObsoletedBy(null);
469
						resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
470
			            
471
						// save it locally
472
						Session session = new Session();
473
						session.setSubject(submitter);
474
						MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
475
						MNodeService.getInstance(request).create(
476
								session, 
477
								resourceMapId, 
478
								IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), 
479
								resourceMapSysMeta);
480
			        }
481
				}
482

    
483
			} catch (ParserConfigurationException pce) {
484
				logMetacat.debug("There was a problem parsing the EML document. "
485
								+ "The error message was: " + pce.getMessage());
486

    
487
			} catch (SAXException saxe) {
488
				logMetacat.debug("There was a problem traversing the EML document. "
489
								+ "The error message was: " + saxe.getMessage());
490

    
491
			} catch (XPathExpressionException xpee) {
492
				logMetacat.debug("There was a problem searching the EML document. "
493
								+ "The error message was: " + xpee.getMessage());
494
			} catch (Exception e) {
495
				logMetacat.debug("There was a problem creating System Metadata. "
496
								+ "The error message was: " + e.getMessage());
497
				e.printStackTrace();
498
			} // end try()
499

    
500
		} // end if()
501

    
502
		return sysMeta;
503
	}
504

    
505
	/**
506
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
507
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
508
	 * evaluated.
509
	 * 
510
	 * @param is The InputStream of bytes
511
	 * 
512
	 * @return size The size in bytes of the input stream as a long
513
	 * 
514
	 * @throws IOException
515
	 */
516
	private static long sizeOfStream(InputStream is) throws IOException {
517

    
518
		long size = 0;
519
		byte[] b = new byte[1024];
520
		int numread = is.read(b, 0, 1024);
521
		while (numread != -1) {
522
			size += numread;
523
			numread = is.read(b, 0, 1024);
524
		}
525
		return size;
526

    
527
	}
528
}
(5-5/5)