Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-12-07 17:05:17 -0800 (Wed, 07 Dec 2011) $'
10
 * '$Revision: 6747 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.BufferedInputStream;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.security.NoSuchAlgorithmException;
33
import java.sql.SQLException;
34
import java.util.ArrayList;
35
import java.util.Collections;
36
import java.util.Date;
37
import java.util.HashMap;
38
import java.util.Hashtable;
39
import java.util.List;
40
import java.util.Map;
41
import java.util.Vector;
42

    
43
import javax.xml.parsers.ParserConfigurationException;
44
import javax.xml.xpath.XPathExpressionException;
45

    
46
import org.apache.commons.beanutils.BeanUtils;
47
import org.apache.commons.io.IOUtils;
48
import org.apache.log4j.Logger;
49
import org.apache.wicket.protocol.http.MockHttpServletRequest;
50
import org.dataone.client.ObjectFormatCache;
51
import org.dataone.ore.ResourceMapFactory;
52
import org.dataone.service.exceptions.BaseException;
53
import org.dataone.service.exceptions.NotFound;
54
import org.dataone.service.types.v1.AccessPolicy;
55
import org.dataone.service.types.v1.Checksum;
56
import org.dataone.service.types.v1.Identifier;
57
import org.dataone.service.types.v1.NodeReference;
58
import org.dataone.service.types.v1.ObjectFormatIdentifier;
59
import org.dataone.service.types.v1.Session;
60
import org.dataone.service.types.v1.Subject;
61
import org.dataone.service.types.v1.SystemMetadata;
62
import org.dataone.service.types.v1.util.ChecksumUtil;
63
import org.dataone.service.util.DateTimeMarshaller;
64
import org.dspace.foresite.ResourceMap;
65
import org.ecoinformatics.datamanager.DataManager;
66
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
67
import org.ecoinformatics.datamanager.parser.DataPackage;
68
import org.jibx.runtime.JiBXException;
69
import org.xml.sax.SAXException;
70

    
71
import edu.ucsb.nceas.metacat.AccessionNumber;
72
import edu.ucsb.nceas.metacat.AccessionNumberException;
73
import edu.ucsb.nceas.metacat.DBUtil;
74
import edu.ucsb.nceas.metacat.IdentifierManager;
75
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
76
import edu.ucsb.nceas.metacat.McdbException;
77
import edu.ucsb.nceas.metacat.MetaCatServlet;
78
import edu.ucsb.nceas.metacat.MetacatHandler;
79
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
80
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
81
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
82
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
83
import edu.ucsb.nceas.metacat.properties.PropertyService;
84
import edu.ucsb.nceas.metacat.replication.ReplicationService;
85
import edu.ucsb.nceas.metacat.shared.AccessException;
86
import edu.ucsb.nceas.metacat.shared.HandlerException;
87
import edu.ucsb.nceas.metacat.util.DocumentUtil;
88
import edu.ucsb.nceas.utilities.ParseLSIDException;
89
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
90

    
91
public class SystemMetadataFactory {
92

    
93
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
94
	
95
	/**
96
	 * Creates a system metadata object for insertion into metacat
97
	 * 
98
	 * @param localId
99
	 *            The local document identifier
100
	 * @param user
101
	 *            The user submitting the system metadata document
102
	 * @param groups
103
	 *            The groups the user belongs to
104
	 * 
105
	 * @return sysMeta The system metadata object created
106
	 * @throws SAXException 
107
	 * @throws HandlerException 
108
	 * @throws AccessControlException 
109
	 * @throws AccessException 
110
	 */
111
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE)
112
			throws McdbException, McdbDocNotFoundException, SQLException,
113
			IOException, AccessionNumberException, ClassNotFoundException,
114
			InsufficientKarmaException, ParseLSIDException,
115
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
116
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
117
		
118
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
119
		logMetacat.debug("provided localId: " + localId);
120

    
121
		// create system metadata for the document
122
		SystemMetadata sysMeta = new SystemMetadata();
123
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
124
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
125
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
126
		String guid = null;
127
		try {
128
			// get the guid if it exists
129
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
130
		} catch (McdbDocNotFoundException dnfe) {
131
			// otherwise create the mapping
132
			logMetacat.debug("There was a problem getting the guid from "
133
							+ "the given localId (docid and revision). The error message was: "
134
							+ dnfe.getMessage());
135
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
136
			IdentifierManager.getInstance().createMapping(localId, localId);
137
			logMetacat.debug("Mapping created for " + localId);
138
			logMetacat.debug("accessionNumber: " + accNum);
139
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
140
		}
141
		Identifier identifier = new Identifier();
142
		identifier.setValue(guid);
143

    
144
		// set the id
145
		sysMeta.setIdentifier(identifier);
146

    
147
		// get the data or metadata object
148
		InputStream inputStream;
149
		try {
150
			inputStream = MetacatHandler.read(localId);
151
		} catch (ParseLSIDException ple) {
152
			logMetacat.debug("There was a problem parsing the LSID from "
153
					+ localId + ". The error message was: " + ple.getMessage());
154
			throw ple;
155

    
156
		} catch (PropertyNotFoundException pnfe) {
157
			logMetacat.debug("There was a problem finding a property. "
158
					+ "The error message was: " + pnfe.getMessage());
159
			throw pnfe;
160

    
161
		} catch (McdbException me) {
162
			logMetacat.debug("There was a Metacat problem. "
163
					+ "The error message was: " + me.getMessage());
164
			throw me;
165

    
166
		} catch (SQLException sqle) {
167
			logMetacat.debug("There was a SQL problem. "
168
					+ "The error message was: " + sqle.getMessage());
169
			throw sqle;
170

    
171
		} catch (ClassNotFoundException cnfe) {
172
			logMetacat.debug("There was a problem finding a class. "
173
					+ "The error message was: " + cnfe.getMessage());
174
			throw cnfe;
175

    
176
		} catch (IOException ioe) {
177
			logMetacat.debug("There was an I/O exception. "
178
					+ "The error message was: " + ioe.getMessage());
179
			throw ioe;
180

    
181
		} // end try()
182

    
183
		// get additional docinfo
184
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
185
		// set the default object format
186
		String doctype = docInfo.get("doctype");
187
		ObjectFormatIdentifier fmtid = null;
188

    
189
		// set the object format, fall back to defaults
190
		try {
191
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
192
		} catch (NotFound nfe) {
193

    
194
			try {
195
				// format is not registered, use default
196
				if (doctype.trim().equals("BIN")) {
197
					fmtid = ObjectFormatCache.getInstance().getFormat(
198
							"application/octet-stream").getFormatId();
199

    
200
				} else {
201
					fmtid = ObjectFormatCache.getInstance().getFormat(
202
							"text/plain").getFormatId();
203
				}
204

    
205
			} catch (NotFound nf) {
206
				logMetacat.error("There was a problem getting the default format "
207
								+ "from the ObjectFormatCache: "
208
								+ nf.getMessage());
209
				throw nf;
210
			}
211

    
212
		}
213

    
214
		sysMeta.setFormatId(fmtid);
215
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
216

    
217
		// create the checksum
218
		inputStream = MetacatHandler.read(localId);
219
		String algorithm = "MD5";
220
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
221
		sysMeta.setChecksum(checksum);
222
		
223
		// set the size
224
		inputStream = MetacatHandler.read(localId);
225
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
226
		sysMeta.setSize(new BigInteger(sizeStr));
227
		
228
		// submitter
229
		Subject submitter = new Subject();
230
		submitter.setValue(docInfo.get("user_updated"));
231
		sysMeta.setSubmitter(submitter);
232
		
233
		// rights holder
234
		Subject owner = new Subject();
235
		owner.setValue(docInfo.get("user_owner"));
236
		sysMeta.setRightsHolder(owner);
237

    
238
		// dates
239
		String createdDateString = docInfo.get("date_created");
240
		String updatedDateString = docInfo.get("date_updated");
241
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
242
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
243
		sysMeta.setDateUploaded(createdDate);
244
		sysMeta.setDateSysMetadataModified(updatedDate);
245
		
246
		// set the revision history
247
		String docidWithoutRev = accNum.getDocid();
248
		Identifier obsoletedBy = null;
249
		Identifier obsoletes = null;
250
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
251
		// ensure this ordering since processing depends on it
252
		Collections.sort(revisions);
253
		for (int existingRev: revisions) {
254
			// use the docid+rev as the guid
255
			String existingPid = docidWithoutRev + "." + existingRev;
256
			if (existingRev < rev) {
257
				// it's the old docid, until it's not
258
				obsoletes = new Identifier();
259
				obsoletes.setValue(existingPid);
260
			}
261
			if (existingRev > rev) {
262
				// it's the newer docid
263
				obsoletedBy = new Identifier();
264
				obsoletedBy.setValue(existingPid);
265
				// only want the version just after it
266
				break;
267
			}
268
		}
269
		// set them on our object
270
		sysMeta.setObsoletedBy(obsoletedBy);
271
		sysMeta.setObsoletes(obsoletes);
272
		
273
		// update the system metadata for the object[s] we are revising
274
		if (obsoletedBy != null) {
275
			//SystemMetadata obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
276
			SystemMetadata obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
277
			if (obsoletedBySysMeta != null) {
278
				obsoletedBySysMeta.setObsoletes(identifier);
279
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
280
			}
281
		}
282
		if (obsoletes != null) {
283
			//SystemMetadata obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
284
			SystemMetadata obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
285
			if (obsoletesSysMeta != null) {
286
				obsoletesSysMeta.setObsoletedBy(identifier);
287
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
288
			}
289
		}
290
		
291
		// look up the access control policy we have in metacat
292
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
293
		sysMeta.setAccessPolicy(accessPolicy);
294
		
295
		// authoritative node
296
		NodeReference nr = new NodeReference();
297
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
298
		sysMeta.setOriginMemberNode(nr);
299
		sysMeta.setAuthoritativeMemberNode(nr);
300
		
301
		// further parse EML documents to get data object format,
302
		// describes and describedBy information
303
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
304
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
305
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
306
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
307
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
308
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
309
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
310
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
311

    
312
			try {
313
				inputStream = MetacatHandler.read(localId);
314
				DatabaseConnectionPoolInterface connectionPool = 
315
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
316
				DataManager dataManager = 
317
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
318
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
319

    
320
				// iterate through the data objects in the EML doc and add sysmeta
321
				logMetacat.debug("In createSystemMetadata() the number of data "
322
								+ "entities is: "
323
								+ dataPackage.getEntityNumber());
324

    
325
				// for generating the ORE map
326
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
327
	            List<Identifier> dataIds = new ArrayList<Identifier>();
328
				
329
				// iterate through data objects described by the EML
330
	            if (dataPackage.getEntityList() != null) {
331
					for (int j = 0; j < dataPackage.getEntityList().length; j++) {
332
	
333
						String dataDocUrl = dataPackage.getEntityList()[j].getURL();
334
						String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
335
						// default to binary
336
						if (dataDocMimeType == null) {
337
							dataDocMimeType = ObjectFormatCache.getInstance()
338
									.getFormat("application/octet-stream")
339
									.getFormatId().getValue();
340
						}
341
						String dataDocLocalId = "";
342
						logMetacat.debug("Data local ID: " + dataDocLocalId);
343
						logMetacat.debug("Data URL     : " + dataDocUrl);
344
						logMetacat.debug("Data mime    : " + dataDocMimeType);
345
	
346
						// we only handle ecogrid urls right now
347
						String ecogridPrefix = "ecogrid://knb/";
348
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
349
							dataDocLocalId = dataDocUrl.substring(dataDocUrl
350
									.indexOf(ecogridPrefix)
351
									+ ecogridPrefix.length());
352
	
353
							// look up the guid for the data
354
							String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
355
							int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
356
	
357
							// check if data system metadata exists already
358
							SystemMetadata dataSysMeta = null;
359
							String dataGuidString = null;
360
							Identifier dataGuid = new Identifier();
361
							try {
362
								// look for the identifier
363
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
364
								// set it
365
								dataGuid.setValue(dataGuidString);
366
								// look up the system metadata
367
								try {
368
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
369
								} catch (Exception e) {
370
									// probably not in the system
371
									dataSysMeta = null;
372
								}
373
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
374
							} catch (McdbDocNotFoundException nf) {
375
								// we didn't find it
376
								dataSysMeta = null;
377
							}
378
								
379
							// we'll have to generate it	
380
							if (dataSysMeta == null) {
381
								// System metadata for data doesn't exist yet, so create it
382
								logMetacat.debug("There was not an existing system metadata document for " + dataDocLocalId);
383
								try {
384
									logMetacat.debug("Creating a system metadata " + "document for " + dataDocLocalId);
385
									dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE);
386
	
387
									// now look it up again
388
									dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
389
	
390
									// set the guid
391
									dataGuid.setValue(dataGuidString);
392
	
393
									// set object format
394
									logMetacat.debug("Updating system metadata for "
395
													+ dataGuid.getValue() + " to "
396
													+ dataDocMimeType);
397
									try {
398
										ObjectFormatIdentifier fmt = 
399
											ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
400
										dataSysMeta.setFormatId(fmt);
401
									} catch (NotFound nfe) {
402
										logMetacat.debug("Couldn't find format identifier for: "
403
														+ dataDocMimeType
404
														+ ". Setting it to application/octet-stream.");
405
										ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
406
										newFmtid.setValue("application/octet-stream");
407
									}
408
	
409
									// update the values
410
									HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
411
									
412
	
413
								} catch (McdbDocNotFoundException mdnf) {
414
									mdnf.printStackTrace();
415
									throw mdnf;
416
								} catch (NumberFormatException nfe) {
417
									nfe.printStackTrace();
418
									throw nfe;
419
								} catch (AccessionNumberException ane) {
420
									ane.printStackTrace();
421
									throw ane;
422
								} catch (SQLException sqle) {
423
									sqle.printStackTrace();
424
									throw sqle;
425
								} catch (NoSuchAlgorithmException nsae) {
426
									nsae.printStackTrace();
427
									throw nsae;
428
								} catch (IOException ioe) {
429
									ioe.printStackTrace();
430
									throw ioe;
431
								} catch (PropertyNotFoundException pnfe) {
432
									pnfe.printStackTrace();
433
									throw pnfe;
434
								} catch (BaseException be) {
435
									be.printStackTrace();
436
									throw be;
437
								}	
438
								
439
							}
440
							
441
							// part of the ORE package
442
							dataIds.add(dataGuid);
443
	
444
						} // end if (EML package)
445
	
446
					} // end for (data entities)
447
					
448
	            } // data entities not null
449
	            
450
				// ORE map
451
				if (includeORE) {
452
			        if (!dataIds.isEmpty()) {
453
			            // generate the ORE map for this datapackage
454
			            Identifier resourceMapId = new Identifier();
455
			            // want to be able to run this over and over again for now
456
			            resourceMapId.setValue("resourceMap_" + sysMeta.getIdentifier().getValue() + "_" + System.currentTimeMillis());
457
			            idMap.put(sysMeta.getIdentifier(), dataIds);
458
			            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
459
			            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
460
			            // copy most of the same system metadata as the packaging metadata
461
			            SystemMetadata resourceMapSysMeta = new SystemMetadata();
462
			            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
463
			            resourceMapSysMeta.setIdentifier(resourceMapId);
464
			            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
465
						resourceMapSysMeta.setChecksum(oreChecksum);
466
			            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
467
						resourceMapSysMeta.setFormatId(formatId);
468
			            // TODO: other fields to update?
469
						resourceMapSysMeta.setObsoletes(null);
470
						resourceMapSysMeta.setObsoletedBy(null);
471
						resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
472
			            
473
						// save it locally
474
						Session session = new Session();
475
						session.setSubject(submitter);
476
						MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
477
						MNodeService.getInstance(request).create(
478
								session, 
479
								resourceMapId, 
480
								IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), 
481
								resourceMapSysMeta);
482
			        }
483
				}
484

    
485
			} catch (ParserConfigurationException pce) {
486
				logMetacat.debug("There was a problem parsing the EML document. "
487
								+ "The error message was: " + pce.getMessage());
488

    
489
			} catch (SAXException saxe) {
490
				logMetacat.debug("There was a problem traversing the EML document. "
491
								+ "The error message was: " + saxe.getMessage());
492

    
493
			} catch (XPathExpressionException xpee) {
494
				logMetacat.debug("There was a problem searching the EML document. "
495
								+ "The error message was: " + xpee.getMessage());
496
			} catch (Exception e) {
497
				logMetacat.debug("There was a problem creating System Metadata. "
498
								+ "The error message was: " + e.getMessage());
499
				e.printStackTrace();
500
			} // end try()
501

    
502
		} // end if()
503

    
504
		return sysMeta;
505
	}
506

    
507
	/**
508
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
509
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
510
	 * evaluated.
511
	 * 
512
	 * @param is The InputStream of bytes
513
	 * 
514
	 * @return size The size in bytes of the input stream as a long
515
	 * 
516
	 * @throws IOException
517
	 */
518
	private static long sizeOfStream(InputStream is) throws IOException {
519

    
520
		long size = 0;
521
		byte[] b = new byte[1024];
522
		int numread = is.read(b, 0, 1024);
523
		while (numread != -1) {
524
			size += numread;
525
			numread = is.read(b, 0, 1024);
526
		}
527
		return size;
528

    
529
	}
530
}
(5-5/5)