Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-12-01 13:30:17 -0800 (Thu, 01 Dec 2011) $'
10
 * '$Revision: 6727 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.BufferedInputStream;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.security.NoSuchAlgorithmException;
33
import java.sql.SQLException;
34
import java.util.ArrayList;
35
import java.util.Collections;
36
import java.util.Date;
37
import java.util.HashMap;
38
import java.util.Hashtable;
39
import java.util.List;
40
import java.util.Map;
41
import java.util.Vector;
42

    
43
import javax.xml.parsers.ParserConfigurationException;
44
import javax.xml.xpath.XPathExpressionException;
45

    
46
import org.apache.commons.beanutils.BeanUtils;
47
import org.apache.commons.io.IOUtils;
48
import org.apache.log4j.Logger;
49
import org.apache.wicket.protocol.http.MockHttpServletRequest;
50
import org.dataone.client.ObjectFormatCache;
51
import org.dataone.ore.ResourceMapFactory;
52
import org.dataone.service.exceptions.BaseException;
53
import org.dataone.service.exceptions.NotFound;
54
import org.dataone.service.types.v1.AccessPolicy;
55
import org.dataone.service.types.v1.Checksum;
56
import org.dataone.service.types.v1.Identifier;
57
import org.dataone.service.types.v1.NodeReference;
58
import org.dataone.service.types.v1.ObjectFormatIdentifier;
59
import org.dataone.service.types.v1.Session;
60
import org.dataone.service.types.v1.Subject;
61
import org.dataone.service.types.v1.SystemMetadata;
62
import org.dataone.service.types.v1.util.ChecksumUtil;
63
import org.dataone.service.util.DateTimeMarshaller;
64
import org.dspace.foresite.ResourceMap;
65
import org.ecoinformatics.datamanager.DataManager;
66
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
67
import org.ecoinformatics.datamanager.parser.DataPackage;
68
import org.jibx.runtime.JiBXException;
69
import org.xml.sax.SAXException;
70

    
71
import edu.ucsb.nceas.metacat.AccessionNumber;
72
import edu.ucsb.nceas.metacat.AccessionNumberException;
73
import edu.ucsb.nceas.metacat.DBUtil;
74
import edu.ucsb.nceas.metacat.IdentifierManager;
75
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
76
import edu.ucsb.nceas.metacat.McdbException;
77
import edu.ucsb.nceas.metacat.MetaCatServlet;
78
import edu.ucsb.nceas.metacat.MetacatHandler;
79
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
80
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
81
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
82
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
83
import edu.ucsb.nceas.metacat.properties.PropertyService;
84
import edu.ucsb.nceas.metacat.replication.ReplicationService;
85
import edu.ucsb.nceas.metacat.shared.AccessException;
86
import edu.ucsb.nceas.metacat.shared.HandlerException;
87
import edu.ucsb.nceas.metacat.util.DocumentUtil;
88
import edu.ucsb.nceas.utilities.ParseLSIDException;
89
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
90

    
91
public class SystemMetadataFactory {
92

    
93
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
94
	
95
	/**
96
	 * Creates a system metadata object for insertion into metacat
97
	 * 
98
	 * @param localId
99
	 *            The local document identifier
100
	 * @param user
101
	 *            The user submitting the system metadata document
102
	 * @param groups
103
	 *            The groups the user belongs to
104
	 * 
105
	 * @return sysMeta The system metadata object created
106
	 * @throws SAXException 
107
	 * @throws HandlerException 
108
	 * @throws AccessControlException 
109
	 * @throws AccessException 
110
	 */
111
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE)
112
			throws McdbException, McdbDocNotFoundException, SQLException,
113
			IOException, AccessionNumberException, ClassNotFoundException,
114
			InsufficientKarmaException, ParseLSIDException,
115
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
116
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
117
		
118
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
119
		logMetacat.debug("provided localId: " + localId);
120

    
121
		// create system metadata for the document
122
		SystemMetadata sysMeta = new SystemMetadata();
123
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
124
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
125
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
126
		String guid = null;
127
		try {
128
			// get the guid if it exists
129
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
130
		} catch (McdbDocNotFoundException dnfe) {
131
			// otherwise create the mapping
132
			logMetacat.debug("There was a problem getting the guid from "
133
							+ "the given localId (docid and revision). The error message was: "
134
							+ dnfe.getMessage());
135
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
136
			IdentifierManager.getInstance().createMapping(localId, localId);
137
			logMetacat.debug("Mapping created for " + localId);
138
			logMetacat.debug("accessionNumber: " + accNum);
139
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
140
		}
141
		Identifier identifier = new Identifier();
142
		identifier.setValue(guid);
143

    
144
		// set the id
145
		sysMeta.setIdentifier(identifier);
146

    
147
		// get the data or metadata object
148
		InputStream inputStream;
149
		try {
150
			inputStream = MetacatHandler.read(localId);
151
		} catch (ParseLSIDException ple) {
152
			logMetacat.debug("There was a problem parsing the LSID from "
153
					+ localId + ". The error message was: " + ple.getMessage());
154
			throw ple;
155

    
156
		} catch (PropertyNotFoundException pnfe) {
157
			logMetacat.debug("There was a problem finding a property. "
158
					+ "The error message was: " + pnfe.getMessage());
159
			throw pnfe;
160

    
161
		} catch (McdbException me) {
162
			logMetacat.debug("There was a Metacat problem. "
163
					+ "The error message was: " + me.getMessage());
164
			throw me;
165

    
166
		} catch (SQLException sqle) {
167
			logMetacat.debug("There was a SQL problem. "
168
					+ "The error message was: " + sqle.getMessage());
169
			throw sqle;
170

    
171
		} catch (ClassNotFoundException cnfe) {
172
			logMetacat.debug("There was a problem finding a class. "
173
					+ "The error message was: " + cnfe.getMessage());
174
			throw cnfe;
175

    
176
		} catch (IOException ioe) {
177
			logMetacat.debug("There was an I/O exception. "
178
					+ "The error message was: " + ioe.getMessage());
179
			throw ioe;
180

    
181
		} // end try()
182

    
183
		// get additional docinfo
184
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
185
		// set the default object format
186
		String doctype = docInfo.get("doctype");
187
		ObjectFormatIdentifier fmtid = null;
188

    
189
		// set the object format, fall back to defaults
190
		try {
191
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
192
		} catch (NotFound nfe) {
193

    
194
			try {
195
				// format is not registered, use default
196
				if (doctype.trim().equals("BIN")) {
197
					fmtid = ObjectFormatCache.getInstance().getFormat(
198
							"application/octet-stream").getFormatId();
199

    
200
				} else {
201
					fmtid = ObjectFormatCache.getInstance().getFormat(
202
							"text/plain").getFormatId();
203
				}
204

    
205
			} catch (NotFound nf) {
206
				logMetacat.error("There was a problem getting the default format "
207
								+ "from the ObjectFormatCache: "
208
								+ nf.getMessage());
209
				throw nf;
210
			}
211

    
212
		}
213

    
214
		sysMeta.setFormatId(fmtid);
215
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
216

    
217
		// create the checksum
218
		inputStream = MetacatHandler.read(localId);
219
		String algorithm = "MD5";
220
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
221
		sysMeta.setChecksum(checksum);
222
		
223
		// set the size
224
		inputStream = MetacatHandler.read(localId);
225
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
226
		sysMeta.setSize(new BigInteger(sizeStr));
227
		
228
		// submitter
229
		Subject submitter = new Subject();
230
		submitter.setValue(docInfo.get("user_updated"));
231
		sysMeta.setSubmitter(submitter);
232
		
233
		// rights holder
234
		Subject owner = new Subject();
235
		owner.setValue(docInfo.get("user_owner"));
236
		sysMeta.setRightsHolder(owner);
237

    
238
		// dates
239
		String createdDateString = docInfo.get("date_created");
240
		String updatedDateString = docInfo.get("date_updated");
241
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
242
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
243
		sysMeta.setDateUploaded(createdDate);
244
		sysMeta.setDateSysMetadataModified(updatedDate);
245
		
246
		// set the revision history
247
		String docidWithoutRev = accNum.getDocid();
248
		Identifier obsoletedBy = null;
249
		Identifier obsoletes = null;
250
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
251
		// ensure this ordering since processing depends on it
252
		Collections.sort(revisions);
253
		for (int existingRev: revisions) {
254
			// use the docid+rev as the guid
255
			String existingPid = docidWithoutRev + "." + existingRev;
256
			if (existingRev < rev) {
257
				// it's the old docid, until it's not
258
				obsoletes = new Identifier();
259
				obsoletes.setValue(existingPid);
260
			}
261
			if (existingRev > rev) {
262
				// it's the newer docid
263
				obsoletedBy = new Identifier();
264
				obsoletedBy.setValue(existingPid);
265
				// only want the version just after it
266
				break;
267
			}
268
		}
269
		// set them on our object
270
		sysMeta.setObsoletedBy(obsoletedBy);
271
		sysMeta.setObsoletes(obsoletes);
272
		
273
		// update the system metadata for the object[s] we are revising
274
		if (obsoletedBy != null) {
275
			SystemMetadata obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
276
			if (obsoletedBySysMeta != null) {
277
				obsoletedBySysMeta.setObsoletes(identifier);
278
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
279
			}
280
		}
281
		if (obsoletes != null) {
282
			SystemMetadata obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
283
			if (obsoletesSysMeta != null) {
284
				obsoletesSysMeta.setObsoletedBy(identifier);
285
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
286
			}
287
		}
288
		
289
		// look up the access control policy we have in metacat already to make sure it is in the SM
290
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicyByLocalId(docidWithoutRev);
291
		sysMeta.setAccessPolicy(accessPolicy);
292
		
293
		// authoritative node
294
		NodeReference nr = new NodeReference();
295
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
296
		sysMeta.setOriginMemberNode(nr);
297
		sysMeta.setAuthoritativeMemberNode(nr);
298
		
299
		// further parse EML documents to get data object format,
300
		// describes and describedBy information
301
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
302
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
303
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
304
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
305
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
306
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
307
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
308
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
309

    
310
			try {
311
				inputStream = MetacatHandler.read(localId);
312
				DatabaseConnectionPoolInterface connectionPool = 
313
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
314
				DataManager dataManager = 
315
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
316
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
317

    
318
				// iterate through the data objects in the EML doc and add sysmeta
319
				logMetacat.debug("In createSystemMetadata() the number of data "
320
								+ "entities is: "
321
								+ dataPackage.getEntityNumber());
322

    
323
				// for generating the ORE map
324
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
325
	            List<Identifier> dataIds = new ArrayList<Identifier>();
326
				
327
				// iterate through data objects described by the EML
328
				for (int j = 0; j < dataPackage.getEntityList().length; j++) {
329

    
330
					String dataDocUrl = dataPackage.getEntityList()[j].getURL();
331
					String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
332
					// default to binary
333
					if (dataDocMimeType == null) {
334
						dataDocMimeType = ObjectFormatCache.getInstance()
335
								.getFormat("application/octet-stream")
336
								.getFormatId().getValue();
337
					}
338
					String dataDocLocalId = "";
339
					logMetacat.debug("Data local ID: " + dataDocLocalId);
340
					logMetacat.debug("Data URL     : " + dataDocUrl);
341
					logMetacat.debug("Data mime    : " + dataDocMimeType);
342

    
343
					// we only handle ecogrid urls right now
344
					String ecogridPrefix = "ecogrid://knb/";
345
					if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
346
						dataDocLocalId = dataDocUrl.substring(dataDocUrl
347
								.indexOf(ecogridPrefix)
348
								+ ecogridPrefix.length());
349

    
350
						// look up the guid for the data
351
						String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
352
						int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
353

    
354
						// check if data system metadata exists already
355
						SystemMetadata dataSysMeta = null;
356
						String dataGuidString = null;
357
						Identifier dataGuid = new Identifier();
358
						try {
359
							// look for the identifier
360
							dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
361
							// set it
362
							dataGuid.setValue(dataGuidString);
363
							// look up the system metadata
364
							try {
365
								dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
366
							} catch (Exception e) {
367
								// probably not in the system
368
								dataSysMeta = null;
369
							}
370
							//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
371
						} catch (McdbDocNotFoundException nf) {
372
							// we didn't find it
373
							dataSysMeta = null;
374
						}
375
							
376
						// we'll have to generate it	
377
						if (dataSysMeta == null) {
378
							// System metadata for data doesn't exist yet, so create it
379
							logMetacat.debug("There was not an existing system metadata document for " + dataDocLocalId);
380
							try {
381
								logMetacat.debug("Creating a system metadata " + "document for " + dataDocLocalId);
382
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE);
383

    
384
								// now look it up again
385
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
386

    
387
								// set the guid
388
								dataGuid.setValue(dataGuidString);
389

    
390
								// set object format
391
								logMetacat.debug("Updating system metadata for "
392
												+ dataGuid.getValue() + " to "
393
												+ dataDocMimeType);
394
								try {
395
									ObjectFormatIdentifier fmt = 
396
										ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
397
									dataSysMeta.setFormatId(fmt);
398
								} catch (NotFound nfe) {
399
									logMetacat.debug("Couldn't find format identifier for: "
400
													+ dataDocMimeType
401
													+ ". Setting it to application/octet-stream.");
402
									ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
403
									newFmtid.setValue("application/octet-stream");
404
								}
405

    
406
								// update the values
407
								HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
408
								
409

    
410
							} catch (McdbDocNotFoundException mdnf) {
411
								mdnf.printStackTrace();
412
								throw mdnf;
413
							} catch (NumberFormatException nfe) {
414
								nfe.printStackTrace();
415
								throw nfe;
416
							} catch (AccessionNumberException ane) {
417
								ane.printStackTrace();
418
								throw ane;
419
							} catch (SQLException sqle) {
420
								sqle.printStackTrace();
421
								throw sqle;
422
							} catch (NoSuchAlgorithmException nsae) {
423
								nsae.printStackTrace();
424
								throw nsae;
425
							} catch (IOException ioe) {
426
								ioe.printStackTrace();
427
								throw ioe;
428
							} catch (PropertyNotFoundException pnfe) {
429
								pnfe.printStackTrace();
430
								throw pnfe;
431
							} catch (BaseException be) {
432
								be.printStackTrace();
433
								throw be;
434
							}	
435
							
436
						}
437
						
438
						// part of the ORE package
439
						dataIds.add(dataGuid);
440

    
441
					} // end if (EML package)
442

    
443
				} // end for (data entities)
444
				
445
				// ORE map
446
				if (includeORE) {
447
			        if (!dataIds.isEmpty()) {
448
			            // generate the ORE map for this datapackage
449
			            Identifier resourceMapId = new Identifier();
450
			            // want to be able to run this over and over again for now
451
			            resourceMapId.setValue("resourceMap_" + sysMeta.getIdentifier().getValue() + "_" + System.currentTimeMillis());
452
			            idMap.put(sysMeta.getIdentifier(), dataIds);
453
			            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
454
			            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
455
			            // copy most of the same system metadata as the packaging metadata
456
			            SystemMetadata resourceMapSysMeta = new SystemMetadata();
457
			            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
458
			            resourceMapSysMeta.setIdentifier(resourceMapId);
459
			            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
460
						resourceMapSysMeta.setChecksum(oreChecksum);
461
			            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
462
						resourceMapSysMeta.setFormatId(formatId);
463
			            // TODO: other fields to update?
464
						resourceMapSysMeta.setObsoletes(null);
465
						resourceMapSysMeta.setObsoletedBy(null);
466
						resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
467
			            
468
						// save it locally
469
						Session session = new Session();
470
						session.setSubject(submitter);
471
						MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
472
						MNodeService.getInstance(request).create(
473
								session, 
474
								resourceMapId, 
475
								IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), 
476
								resourceMapSysMeta);
477
			        }
478
				}
479

    
480
			} catch (ParserConfigurationException pce) {
481
				logMetacat.debug("There was a problem parsing the EML document. "
482
								+ "The error message was: " + pce.getMessage());
483

    
484
			} catch (SAXException saxe) {
485
				logMetacat.debug("There was a problem traversing the EML document. "
486
								+ "The error message was: " + saxe.getMessage());
487

    
488
			} catch (XPathExpressionException xpee) {
489
				logMetacat.debug("There was a problem searching the EML document. "
490
								+ "The error message was: " + xpee.getMessage());
491
			} catch (Exception e) {
492
				logMetacat.debug("There was a problem creating System Metadata. "
493
								+ "The error message was: " + e.getMessage());
494
				e.printStackTrace();
495
			} // end try()
496

    
497
		} // end if()
498

    
499
		return sysMeta;
500
	}
501

    
502
	/**
503
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
504
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
505
	 * evaluated.
506
	 * 
507
	 * @param is The InputStream of bytes
508
	 * 
509
	 * @return size The size in bytes of the input stream as a long
510
	 * 
511
	 * @throws IOException
512
	 */
513
	private static long sizeOfStream(InputStream is) throws IOException {
514

    
515
		long size = 0;
516
		byte[] b = new byte[1024];
517
		int numread = is.read(b, 0, 1024);
518
		while (numread != -1) {
519
			size += numread;
520
			numread = is.read(b, 0, 1024);
521
		}
522
		return size;
523

    
524
	}
525
}
(5-5/5)