Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-11-29 15:41:10 -0800 (Tue, 29 Nov 2011) $'
10
 * '$Revision: 6713 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.BufferedInputStream;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.security.NoSuchAlgorithmException;
33
import java.sql.SQLException;
34
import java.util.ArrayList;
35
import java.util.Date;
36
import java.util.HashMap;
37
import java.util.Hashtable;
38
import java.util.List;
39
import java.util.Map;
40
import java.util.Vector;
41

    
42
import javax.xml.parsers.ParserConfigurationException;
43
import javax.xml.xpath.XPathExpressionException;
44

    
45
import org.apache.commons.beanutils.BeanUtils;
46
import org.apache.commons.io.IOUtils;
47
import org.apache.log4j.Logger;
48
import org.dataone.client.ObjectFormatCache;
49
import org.dataone.ore.ResourceMapFactory;
50
import org.dataone.service.exceptions.BaseException;
51
import org.dataone.service.exceptions.NotFound;
52
import org.dataone.service.types.v1.Checksum;
53
import org.dataone.service.types.v1.Identifier;
54
import org.dataone.service.types.v1.NodeReference;
55
import org.dataone.service.types.v1.ObjectFormatIdentifier;
56
import org.dataone.service.types.v1.Subject;
57
import org.dataone.service.types.v1.SystemMetadata;
58
import org.dataone.service.types.v1.util.ChecksumUtil;
59
import org.dataone.service.util.DateTimeMarshaller;
60
import org.dspace.foresite.ResourceMap;
61
import org.ecoinformatics.datamanager.DataManager;
62
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
63
import org.ecoinformatics.datamanager.parser.DataPackage;
64
import org.jibx.runtime.JiBXException;
65
import org.xml.sax.SAXException;
66

    
67
import edu.ucsb.nceas.metacat.AccessionNumber;
68
import edu.ucsb.nceas.metacat.AccessionNumberException;
69
import edu.ucsb.nceas.metacat.DBUtil;
70
import edu.ucsb.nceas.metacat.IdentifierManager;
71
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
72
import edu.ucsb.nceas.metacat.McdbException;
73
import edu.ucsb.nceas.metacat.MetaCatServlet;
74
import edu.ucsb.nceas.metacat.MetacatHandler;
75
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
76
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
77
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
78
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
79
import edu.ucsb.nceas.metacat.properties.PropertyService;
80
import edu.ucsb.nceas.metacat.replication.ReplicationService;
81
import edu.ucsb.nceas.metacat.shared.HandlerException;
82
import edu.ucsb.nceas.metacat.util.DocumentUtil;
83
import edu.ucsb.nceas.utilities.ParseLSIDException;
84
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
85

    
86
public class SystemMetadataFactory {
87

    
88
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
89
	
90
	/**
91
	 * Creates a system metadata object for insertion into metacat
92
	 * 
93
	 * @param localId
94
	 *            The local document identifier
95
	 * @param user
96
	 *            The user submitting the system metadata document
97
	 * @param groups
98
	 *            The groups the user belongs to
99
	 * 
100
	 * @return sysMeta The system metadata object created
101
	 * @throws SAXException 
102
	 * @throws HandlerException 
103
	 * @throws AccessControlException 
104
	 */
105
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE)
106
			throws McdbException, McdbDocNotFoundException, SQLException,
107
			IOException, AccessionNumberException, ClassNotFoundException,
108
			InsufficientKarmaException, ParseLSIDException,
109
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
110
			JiBXException, AccessControlException, HandlerException, SAXException {
111
		
112
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
113
		logMetacat.debug("provided localId: " + localId);
114

    
115
		// create system metadata for the document
116
		SystemMetadata sysMeta = new SystemMetadata();
117
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
118
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
119
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
120
		String guid = null;
121
		try {
122
			// get the guid if it exists
123
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
124
		} catch (McdbDocNotFoundException dnfe) {
125
			// otherwise create the mapping
126
			logMetacat.debug("There was a problem getting the guid from "
127
							+ "the given localId (docid and revision). The error message was: "
128
							+ dnfe.getMessage());
129
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
130
			IdentifierManager.getInstance().createMapping(localId, localId);
131
			logMetacat.debug("Mapping created for " + localId);
132
			logMetacat.debug("accessionNumber: " + accNum);
133
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
134
		}
135
		Identifier identifier = new Identifier();
136
		identifier.setValue(guid);
137

    
138
		// set the id
139
		sysMeta.setIdentifier(identifier);
140

    
141
		// get the data or metadata object
142
		InputStream inputStream;
143
		try {
144
			inputStream = MetacatHandler.read(localId);
145
		} catch (ParseLSIDException ple) {
146
			logMetacat.debug("There was a problem parsing the LSID from "
147
					+ localId + ". The error message was: " + ple.getMessage());
148
			throw ple;
149

    
150
		} catch (PropertyNotFoundException pnfe) {
151
			logMetacat.debug("There was a problem finding a property. "
152
					+ "The error message was: " + pnfe.getMessage());
153
			throw pnfe;
154

    
155
		} catch (McdbException me) {
156
			logMetacat.debug("There was a Metacat problem. "
157
					+ "The error message was: " + me.getMessage());
158
			throw me;
159

    
160
		} catch (SQLException sqle) {
161
			logMetacat.debug("There was a SQL problem. "
162
					+ "The error message was: " + sqle.getMessage());
163
			throw sqle;
164

    
165
		} catch (ClassNotFoundException cnfe) {
166
			logMetacat.debug("There was a problem finding a class. "
167
					+ "The error message was: " + cnfe.getMessage());
168
			throw cnfe;
169

    
170
		} catch (IOException ioe) {
171
			logMetacat.debug("There was an I/O exception. "
172
					+ "The error message was: " + ioe.getMessage());
173
			throw ioe;
174

    
175
		} // end try()
176

    
177
		// get additional docinfo
178
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
179
		// set the default object format
180
		String doctype = docInfo.get("doctype");
181
		ObjectFormatIdentifier fmtid = null;
182

    
183
		// set the object format, fall back to defaults
184
		try {
185
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
186
		} catch (NotFound nfe) {
187

    
188
			try {
189
				// format is not registered, use default
190
				if (doctype.trim().equals("BIN")) {
191
					fmtid = ObjectFormatCache.getInstance().getFormat(
192
							"application/octet-stream").getFormatId();
193

    
194
				} else {
195
					fmtid = ObjectFormatCache.getInstance().getFormat(
196
							"text/plain").getFormatId();
197
				}
198

    
199
			} catch (NotFound nf) {
200
				logMetacat.error("There was a problem getting the default format "
201
								+ "from the ObjectFormatCache: "
202
								+ nf.getMessage());
203
				throw nf;
204
			}
205

    
206
		}
207

    
208
		sysMeta.setFormatId(fmtid);
209
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
210

    
211
		// further parse EML documents to get data object format,
212
		// describes and describedBy information
213
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
214
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
215
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
216
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
217
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
218
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
219
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
220
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
221

    
222
			try {
223

    
224
				DatabaseConnectionPoolInterface connectionPool = 
225
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
226
				DataManager dataManager = 
227
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
228
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
229

    
230
				// iterate through the data objects in the EML doc and add
231
				// sysmeta
232
				logMetacat.debug("In createSystemMetadata() the number of data "
233
								+ "entities is: "
234
								+ dataPackage.getEntityNumber());
235

    
236
				// for generating the ORE map
237
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
238
	            List<Identifier> dataIds = new ArrayList<Identifier>();
239
				
240
				// iterate through data objects described by the EML
241
				for (int j = 0; j < dataPackage.getEntityList().length; j++) {
242

    
243
					String dataDocUrl = dataPackage.getEntityList()[j].getURL();
244
					String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
245
					// default to binary
246
					if (dataDocMimeType == null) {
247
						dataDocMimeType = ObjectFormatCache.getInstance()
248
								.getFormat("application/octet-stream")
249
								.getFormatId().getValue();
250
					}
251
					String dataDocLocalId = "";
252
					logMetacat.debug("Data local ID: " + dataDocLocalId);
253
					logMetacat.debug("Data URL     : " + dataDocUrl);
254
					logMetacat.debug("Data mime    : " + dataDocMimeType);
255

    
256
					// we only handle ecogrid urls right now
257
					String ecogridPrefix = "ecogrid://knb/";
258
					if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
259
						dataDocLocalId = dataDocUrl.substring(dataDocUrl
260
								.indexOf(ecogridPrefix)
261
								+ ecogridPrefix.length());
262

    
263
						// look up the guid for the data
264
						String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
265
						int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
266

    
267
						SystemMetadata dataSysMeta = null;
268
						// check if data system metadata exists
269
						String dataGuidString = null;
270
						try {
271
							dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
272
							dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
273
						} catch (McdbDocNotFoundException nf) {
274
							// System metadata for data doesn't exist yet, so
275
							// create it
276
							logMetacat.debug("There was not an existing system metadata "
277
											+ "document for " + dataDocLocalId);
278
							try {
279
								logMetacat.debug("Creating a system metadata "
280
										+ "document for " + dataDocLocalId);
281
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE);
282

    
283
								// now look it up again
284
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
285

    
286
								// set the guid
287
								Identifier dataGuid = new Identifier();
288
								dataGuid.setValue(dataGuidString);
289

    
290
								// set object format
291
								logMetacat.debug("Updating system metadata for "
292
												+ dataGuid.getValue() + " to "
293
												+ dataDocMimeType);
294
								try {
295
									ObjectFormatIdentifier fmt = 
296
										ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
297
									dataSysMeta.setFormatId(fmt);
298
								} catch (NotFound nfe) {
299
									logMetacat.debug("Couldn't find format identifier for: "
300
													+ dataDocMimeType
301
													+ ". Setting it to application/octet-stream.");
302
									ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
303
									newFmtid.setValue("application/octet-stream");
304
								}
305

    
306
								// update the values
307
								HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
308
								
309
								// part of the ORE package
310
								dataIds.add(dataGuid);
311

    
312
							} catch (McdbDocNotFoundException mdnf) {
313
								mdnf.printStackTrace();
314
								throw mdnf;
315
							} catch (NumberFormatException nfe) {
316
								nfe.printStackTrace();
317
								throw nfe;
318
							} catch (AccessionNumberException ane) {
319
								ane.printStackTrace();
320
								throw ane;
321
							} catch (SQLException sqle) {
322
								sqle.printStackTrace();
323
								throw sqle;
324
							} catch (NoSuchAlgorithmException nsae) {
325
								nsae.printStackTrace();
326
								throw nsae;
327
							} catch (IOException ioe) {
328
								ioe.printStackTrace();
329
								throw ioe;
330
							} catch (PropertyNotFoundException pnfe) {
331
								pnfe.printStackTrace();
332
								throw pnfe;
333
							} catch (BaseException be) {
334
								be.printStackTrace();
335
								throw be;
336
							}
337
						}
338

    
339
					} // end if()
340

    
341
				} // end for (data entities)
342
				
343
				// ORE map
344
				if (includeORE) {
345
			        if (!dataIds.isEmpty()) {
346
			            // generate the ORE map for this datapackage
347
			            Identifier resourceMapId = new Identifier();
348
			            resourceMapId.setValue("resourceMap_" + sysMeta.getIdentifier().getValue());
349
			            idMap.put(sysMeta.getIdentifier(), dataIds);
350
			            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
351
			            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
352
			            // copy most of the same system metadata as the packaging metadata
353
			            SystemMetadata resourceMapSysMeta = new SystemMetadata();
354
			            BeanUtils.copyProperties(resourceMapXML, sysMeta);
355
			            resourceMapSysMeta.setIdentifier(resourceMapId);
356
			            Checksum checksum = ChecksumUtil.checksum(new BufferedInputStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING)), "MD5");
357
						resourceMapSysMeta.setChecksum(checksum);
358
			            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
359
						resourceMapSysMeta.setFormatId(formatId);
360
			            // TODO: other fields to update?
361
			            
362
						// save it locally
363
						MNodeService.getInstance(null).create(
364
								null, 
365
								resourceMapId, 
366
								IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), 
367
								resourceMapSysMeta);
368
			        }
369
				}
370

    
371
			} catch (ParserConfigurationException pce) {
372
				logMetacat.debug("There was a problem parsing the EML document. "
373
								+ "The error message was: " + pce.getMessage());
374

    
375
			} catch (SAXException saxe) {
376
				logMetacat.debug("There was a problem traversing the EML document. "
377
								+ "The error message was: " + saxe.getMessage());
378

    
379
			} catch (XPathExpressionException xpee) {
380
				logMetacat.debug("There was a problem searching the EML document. "
381
								+ "The error message was: " + xpee.getMessage());
382
			} catch (Exception e) {
383
				logMetacat.debug("There was a problem creating System Metadata. "
384
								+ "The error message was: " + e.getMessage());
385
			} // end try()
386

    
387
		} // end if()
388
		
389

    
390
		// create the checksum
391
		inputStream = MetacatHandler.read(localId);
392
		String algorithm = "MD5";
393
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
394
		sysMeta.setChecksum(checksum);
395

    
396
		// set the size
397
		inputStream = MetacatHandler.read(localId);
398
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
399
		sysMeta.setSize(new BigInteger(sizeStr));
400

    
401
		// submitter
402
		Subject submitter = new Subject();
403
		submitter.setValue(docInfo.get("user_updated"));
404
		sysMeta.setSubmitter(submitter);
405
		
406
		// rights holder
407
		Subject owner = new Subject();
408
		owner.setValue(docInfo.get("user_owner"));
409
		sysMeta.setRightsHolder(owner);
410

    
411
		// dates
412
		String createdDateString = docInfo.get("date_created");
413
		String updatedDateString = docInfo.get("date_updated");
414
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
415
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
416
		sysMeta.setDateUploaded(createdDate);
417
		sysMeta.setDateSysMetadataModified(updatedDate);
418
		
419
		// set the revision history
420
		String docidWithoutRev = accNum.getDocid();
421
		Identifier obsoletedBy = new Identifier();
422
		Identifier obsoletes = new Identifier();
423
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
424
		for (int existingRev: revisions) {
425
			// use the docid+rev as the guid
426
			String existingPid = docidWithoutRev + "." + existingRev;
427
			if (existingRev < rev) {
428
				// it's the old docid, until it's not
429
				obsoletes.setValue(existingPid);
430
			}
431
			if (existingRev > rev) {
432
				// it's the newer docid
433
				obsoletedBy.setValue(existingPid);
434
				// only want the version just after it
435
				break;
436
			}
437
		}
438
		// set them
439
		sysMeta.setObsoletedBy(obsoletedBy);
440
		sysMeta.setObsoletes(obsoletes);
441
		
442
		// TODO: access control?
443
		// I believe the access control will be fine since we use the same storage mechanism for Metacat/D1
444
		
445
		// authoritative node
446
		NodeReference nr = new NodeReference();
447
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
448
		sysMeta.setOriginMemberNode(nr);
449
		sysMeta.setAuthoritativeMemberNode(nr);
450

    
451
		return sysMeta;
452
	}
453

    
454
	/**
455
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
456
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
457
	 * evaluated.
458
	 * 
459
	 * @param is The InputStream of bytes
460
	 * 
461
	 * @return size The size in bytes of the input stream as a long
462
	 * 
463
	 * @throws IOException
464
	 */
465
	private static long sizeOfStream(InputStream is) throws IOException {
466

    
467
		long size = 0;
468
		byte[] b = new byte[1024];
469
		int numread = is.read(b, 0, 1024);
470
		while (numread != -1) {
471
			size += numread;
472
			numread = is.read(b, 0, 1024);
473
		}
474
		return size;
475

    
476
	}
477
}
(5-5/5)