Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2011-11-29 14:15:41 -0800 (Tue, 29 Nov 2011) $'
10
 * '$Revision: 6712 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.BufferedInputStream;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.security.NoSuchAlgorithmException;
33
import java.sql.SQLException;
34
import java.util.ArrayList;
35
import java.util.Date;
36
import java.util.HashMap;
37
import java.util.Hashtable;
38
import java.util.List;
39
import java.util.Map;
40
import java.util.Vector;
41

    
42
import javax.xml.parsers.ParserConfigurationException;
43
import javax.xml.xpath.XPathExpressionException;
44

    
45
import org.apache.commons.beanutils.BeanUtils;
46
import org.apache.commons.io.IOUtils;
47
import org.apache.log4j.Logger;
48
import org.dataone.client.ObjectFormatCache;
49
import org.dataone.ore.ResourceMapFactory;
50
import org.dataone.service.exceptions.BaseException;
51
import org.dataone.service.exceptions.NotFound;
52
import org.dataone.service.types.v1.Checksum;
53
import org.dataone.service.types.v1.Identifier;
54
import org.dataone.service.types.v1.NodeReference;
55
import org.dataone.service.types.v1.ObjectFormatIdentifier;
56
import org.dataone.service.types.v1.Subject;
57
import org.dataone.service.types.v1.SystemMetadata;
58
import org.dataone.service.types.v1.util.ChecksumUtil;
59
import org.dataone.service.util.DateTimeMarshaller;
60
import org.dspace.foresite.ResourceMap;
61
import org.ecoinformatics.datamanager.DataManager;
62
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
63
import org.ecoinformatics.datamanager.parser.DataPackage;
64
import org.jibx.runtime.JiBXException;
65
import org.xml.sax.SAXException;
66

    
67
import edu.ucsb.nceas.metacat.AccessionNumber;
68
import edu.ucsb.nceas.metacat.AccessionNumberException;
69
import edu.ucsb.nceas.metacat.DBUtil;
70
import edu.ucsb.nceas.metacat.IdentifierManager;
71
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
72
import edu.ucsb.nceas.metacat.McdbException;
73
import edu.ucsb.nceas.metacat.MetaCatServlet;
74
import edu.ucsb.nceas.metacat.MetacatHandler;
75
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
76
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
77
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
78
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
79
import edu.ucsb.nceas.metacat.properties.PropertyService;
80
import edu.ucsb.nceas.metacat.replication.ReplicationService;
81
import edu.ucsb.nceas.metacat.shared.HandlerException;
82
import edu.ucsb.nceas.metacat.util.DocumentUtil;
83
import edu.ucsb.nceas.utilities.ParseLSIDException;
84
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
85

    
86
public class SystemMetadataFactory {
87

    
88
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
89
	
90
	/**
91
	 * Creates a system metadata object for insertion into metacat
92
	 * 
93
	 * @param localId
94
	 *            The local document identifier
95
	 * @param user
96
	 *            The user submitting the system metadata document
97
	 * @param groups
98
	 *            The groups the user belongs to
99
	 * 
100
	 * @return sysMeta The system metadata object created
101
	 * @throws SAXException 
102
	 * @throws HandlerException 
103
	 * @throws AccessControlException 
104
	 */
105
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE)
106
			throws McdbException, McdbDocNotFoundException, SQLException,
107
			IOException, AccessionNumberException, ClassNotFoundException,
108
			InsufficientKarmaException, ParseLSIDException,
109
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
110
			JiBXException, AccessControlException, HandlerException, SAXException {
111
		
112
		logMetacat.debug("MetacatHandler.createSystemMetadata() called.");
113
		logMetacat.debug("provided localId: " + localId);
114

    
115
		// create system metadata for the document
116
		SystemMetadata sysMeta = new SystemMetadata();
117
		sysMeta.setSerialVersion(BigInteger.valueOf(1));
118
		int rev = IdentifierManager.getInstance().getLatestRevForLocalId(localId);
119
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
120
		String guid = null;
121
		try {
122
			// get the guid if it exists
123
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
124
		} catch (McdbDocNotFoundException dnfe) {
125
			// otherwise create the mapping
126
			logMetacat.debug("There was a problem getting the guid from "
127
							+ "the given localId (docid and revision). The error message was: "
128
							+ dnfe.getMessage());
129
			logMetacat.debug("No guid in the identifier table.  adding it for " + localId);
130
			IdentifierManager.getInstance().createMapping(localId, localId);
131
			logMetacat.debug("Mapping created for " + localId);
132
			logMetacat.debug("accessionNumber: " + accNum);
133
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
134
		}
135
		Identifier identifier = new Identifier();
136
		identifier.setValue(guid);
137

    
138
		// set the id
139
		sysMeta.setIdentifier(identifier);
140

    
141
		// get the data or metadata object
142
		InputStream inputStream;
143
		try {
144
			inputStream = MetacatHandler.read(localId);
145
		} catch (ParseLSIDException ple) {
146
			logMetacat.debug("There was a problem parsing the LSID from "
147
					+ localId + ". The error message was: " + ple.getMessage());
148
			throw ple;
149

    
150
		} catch (PropertyNotFoundException pnfe) {
151
			logMetacat.debug("There was a problem finding a property. "
152
					+ "The error message was: " + pnfe.getMessage());
153
			throw pnfe;
154

    
155
		} catch (McdbException me) {
156
			logMetacat.debug("There was a Metacat problem. "
157
					+ "The error message was: " + me.getMessage());
158
			throw me;
159

    
160
		} catch (SQLException sqle) {
161
			logMetacat.debug("There was a SQL problem. "
162
					+ "The error message was: " + sqle.getMessage());
163
			throw sqle;
164

    
165
		} catch (ClassNotFoundException cnfe) {
166
			logMetacat.debug("There was a problem finding a class. "
167
					+ "The error message was: " + cnfe.getMessage());
168
			throw cnfe;
169

    
170
		} catch (IOException ioe) {
171
			logMetacat.debug("There was an I/O exception. "
172
					+ "The error message was: " + ioe.getMessage());
173
			throw ioe;
174

    
175
		} // end try()
176

    
177
		// get additional docinfo
178
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
179
		// set the default object format
180
		String doctype = docInfo.get("doctype");
181
		ObjectFormatIdentifier fmtid = null;
182

    
183
		// set the object format, fall back to defaults
184
		try {
185
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
186
		} catch (NotFound nfe) {
187

    
188
			try {
189
				// format is not registered, use default
190
				if (doctype.trim().equals("BIN")) {
191
					fmtid = ObjectFormatCache.getInstance().getFormat(
192
							"application/octet-stream").getFormatId();
193

    
194
				} else {
195
					fmtid = ObjectFormatCache.getInstance().getFormat(
196
							"text/plain").getFormatId();
197
				}
198

    
199
			} catch (NotFound nf) {
200
				logMetacat.error("There was a problem getting the default format "
201
								+ "from the ObjectFormatCache: "
202
								+ nf.getMessage());
203
				throw nf;
204
			}
205

    
206
		}
207

    
208
		sysMeta.setFormatId(fmtid);
209
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
210

    
211
		// further parse EML documents to get data object format,
212
		// describes and describedBy information
213
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
214
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
215
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
216
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
217
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
218
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
219
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
220
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
221

    
222
			try {
223

    
224
				DatabaseConnectionPoolInterface connectionPool = 
225
					MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
226
				DataManager dataManager = 
227
					DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
228
				DataPackage dataPackage = dataManager.parseMetadata(inputStream);
229

    
230
				// iterate through the data objects in the EML doc and add
231
				// sysmeta
232
				logMetacat.debug("In createSystemMetadata() the number of data "
233
								+ "entities is: "
234
								+ dataPackage.getEntityNumber());
235

    
236
				// for generating the ORE map
237
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
238
	            List<Identifier> dataIds = new ArrayList<Identifier>();
239
				
240
				// iterate through data objects described by the EML
241
				for (int j = 0; j < dataPackage.getEntityList().length; j++) {
242

    
243
					String dataDocUrl = dataPackage.getEntityList()[j].getURL();
244
					String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
245
					// default to binary
246
					if (dataDocMimeType == null) {
247
						dataDocMimeType = ObjectFormatCache.getInstance()
248
								.getFormat("application/octet-stream")
249
								.getFormatId().getValue();
250
					}
251
					String dataDocLocalId = "";
252
					logMetacat.debug("Data local ID: " + dataDocLocalId);
253
					logMetacat.debug("Data URL     : " + dataDocUrl);
254
					logMetacat.debug("Data mime    : " + dataDocMimeType);
255

    
256
					// we only handle ecogrid urls right now
257
					String ecogridPrefix = "ecogrid://knb/";
258
					if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
259
						dataDocLocalId = dataDocUrl.substring(dataDocUrl
260
								.indexOf(ecogridPrefix)
261
								+ ecogridPrefix.length());
262

    
263
						// look up the guid for the data
264
						String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
265
						int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
266

    
267
						SystemMetadata dataSysMeta = null;
268
						// check if data system metadata exists
269
						String dataGuidString = null;
270
						try {
271
							dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
272
							dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
273
						} catch (McdbDocNotFoundException nf) {
274
							// System metadata for data doesn't exist yet, so
275
							// create it
276
							logMetacat.debug("There was not an existing system metadata "
277
											+ "document for " + dataDocLocalId);
278
							try {
279
								logMetacat.debug("Creating a system metadata "
280
										+ "document for " + dataDocLocalId);
281
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE);
282

    
283
								// now look it up again
284
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
285

    
286
								// set the guid
287
								Identifier dataGuid = new Identifier();
288
								dataGuid.setValue(dataGuidString);
289

    
290
								// set object format
291
								logMetacat.debug("Updating system metadata for "
292
												+ dataGuid.getValue() + " to "
293
												+ dataDocMimeType);
294
								try {
295
									ObjectFormatIdentifier fmt = 
296
										ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
297
									dataSysMeta.setFormatId(fmt);
298
								} catch (NotFound nfe) {
299
									logMetacat.debug("Couldn't find format identifier for: "
300
													+ dataDocMimeType
301
													+ ". Setting it to application/octet-stream.");
302
									ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
303
									newFmtid.setValue("application/octet-stream");
304
								}
305

    
306
								// update the values
307
								HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
308
								
309
								// part of the ORE package
310
								dataIds.add(dataGuid);
311

    
312
							} catch (McdbDocNotFoundException mdnf) {
313
								mdnf.printStackTrace();
314
								throw mdnf;
315
							} catch (NumberFormatException nfe) {
316
								nfe.printStackTrace();
317
								throw nfe;
318
							} catch (AccessionNumberException ane) {
319
								ane.printStackTrace();
320
								throw ane;
321
							} catch (SQLException sqle) {
322
								sqle.printStackTrace();
323
								throw sqle;
324
							} catch (NoSuchAlgorithmException nsae) {
325
								nsae.printStackTrace();
326
								throw nsae;
327
							} catch (IOException ioe) {
328
								ioe.printStackTrace();
329
								throw ioe;
330
							} catch (PropertyNotFoundException pnfe) {
331
								pnfe.printStackTrace();
332
								throw pnfe;
333
							} catch (BaseException be) {
334
								be.printStackTrace();
335
								throw be;
336
							}
337
						}
338

    
339
					} // end if()
340

    
341
				} // end for (data entities)
342
				
343
				// ORE map
344
		        if (!dataIds.isEmpty()) {
345
		            // generate the ORE map for this datapackage
346
		            Identifier resourceMapId = new Identifier();
347
		            resourceMapId.setValue("resourceMap_" + sysMeta.getIdentifier().getValue());
348
		            idMap.put(sysMeta.getIdentifier(), dataIds);
349
		            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
350
		            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
351
		            // copy most of the same system metadata as the packaging metadata
352
		            SystemMetadata resourceMapSysMeta = new SystemMetadata();
353
		            BeanUtils.copyProperties(resourceMapXML, sysMeta);
354
		            resourceMapSysMeta.setIdentifier(resourceMapId);
355
		            Checksum checksum = ChecksumUtil.checksum(new BufferedInputStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING)), "MD5");
356
					resourceMapSysMeta.setChecksum(checksum);
357
		            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
358
					resourceMapSysMeta.setFormatId(formatId);
359
		            // TODO: other fields to update?
360
		            
361
					// save it locally
362
					MNodeService.getInstance(null).create(null, resourceMapId, IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapSysMeta);
363
		            
364
		        }
365

    
366
			} catch (ParserConfigurationException pce) {
367
				logMetacat.debug("There was a problem parsing the EML document. "
368
								+ "The error message was: " + pce.getMessage());
369

    
370
			} catch (SAXException saxe) {
371
				logMetacat.debug("There was a problem traversing the EML document. "
372
								+ "The error message was: " + saxe.getMessage());
373

    
374
			} catch (XPathExpressionException xpee) {
375
				logMetacat.debug("There was a problem searching the EML document. "
376
								+ "The error message was: " + xpee.getMessage());
377
			} catch (Exception e) {
378
				logMetacat.debug("There was a problem creating System Metadata. "
379
								+ "The error message was: " + e.getMessage());
380
			} // end try()
381

    
382
		} // end if()
383
		
384

    
385
		// create the checksum
386
		inputStream = MetacatHandler.read(localId);
387
		String algorithm = "MD5";
388
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
389
		sysMeta.setChecksum(checksum);
390

    
391
		// set the size
392
		inputStream = MetacatHandler.read(localId);
393
		String sizeStr = new Long(sizeOfStream(inputStream)).toString();
394
		sysMeta.setSize(new BigInteger(sizeStr));
395

    
396
		// submitter
397
		Subject submitter = new Subject();
398
		submitter.setValue(docInfo.get("user_updated"));
399
		sysMeta.setSubmitter(submitter);
400
		
401
		// rights holder
402
		Subject owner = new Subject();
403
		owner.setValue(docInfo.get("user_owner"));
404
		sysMeta.setRightsHolder(owner);
405

    
406
		// dates
407
		String createdDateString = docInfo.get("date_created");
408
		String updatedDateString = docInfo.get("date_updated");
409
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
410
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
411
		sysMeta.setDateUploaded(createdDate);
412
		sysMeta.setDateSysMetadataModified(updatedDate);
413
		
414
		// set the revision history
415
		String docidWithoutRev = accNum.getDocid();
416
		Identifier obsoletedBy = new Identifier();
417
		Identifier obsoletes = new Identifier();
418
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
419
		for (int existingRev: revisions) {
420
			// use the docid+rev as the guid
421
			String existingPid = docidWithoutRev + "." + existingRev;
422
			if (existingRev < rev) {
423
				// it's the old docid, until it's not
424
				obsoletes.setValue(existingPid);
425
			}
426
			if (existingRev > rev) {
427
				// it's the newer docid
428
				obsoletedBy.setValue(existingPid);
429
				// only want the version just after it
430
				break;
431
			}
432
		}
433
		// set them
434
		sysMeta.setObsoletedBy(obsoletedBy);
435
		sysMeta.setObsoletes(obsoletes);
436
		
437
		// TODO: access control?
438
		// I believe the access control will be fine since we use the same storage mechanism for Metacat/D1
439
		
440
		// authoritative node
441
		NodeReference nr = new NodeReference();
442
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
443
		sysMeta.setOriginMemberNode(nr);
444
		sysMeta.setAuthoritativeMemberNode(nr);
445

    
446
		return sysMeta;
447
	}
448

    
449
	/**
450
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
451
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
452
	 * evaluated.
453
	 * 
454
	 * @param is The InputStream of bytes
455
	 * 
456
	 * @return size The size in bytes of the input stream as a long
457
	 * 
458
	 * @throws IOException
459
	 */
460
	private static long sizeOfStream(InputStream is) throws IOException {
461

    
462
		long size = 0;
463
		byte[] b = new byte[1024];
464
		int numread = is.read(b, 0, 1024);
465
		while (numread != -1) {
466
			size += numread;
467
			numread = is.read(b, 0, 1024);
468
		}
469
		return size;
470

    
471
	}
472
}
(5-5/5)