Project

General

Profile

1 6705 leinfelder
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author$'
9
 *     '$Date$'
10
 * '$Revision$'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27
28 6962 leinfelder
import java.io.File;
29 6705 leinfelder
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32 6852 leinfelder
import java.net.URL;
33 6873 leinfelder
import java.net.URLConnection;
34 6705 leinfelder
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36 6712 leinfelder
import java.util.ArrayList;
37 6727 leinfelder
import java.util.Collections;
38 6705 leinfelder
import java.util.Date;
39 6712 leinfelder
import java.util.HashMap;
40 6705 leinfelder
import java.util.Hashtable;
41 6712 leinfelder
import java.util.List;
42
import java.util.Map;
43 6709 leinfelder
import java.util.Vector;
44 6705 leinfelder
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47
48 6712 leinfelder
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50 6705 leinfelder
import org.apache.log4j.Logger;
51 6721 leinfelder
import org.apache.wicket.protocol.http.MockHttpServletRequest;
52 6705 leinfelder
import org.dataone.client.ObjectFormatCache;
53 6960 leinfelder
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56 6712 leinfelder
import org.dataone.ore.ResourceMapFactory;
57 6705 leinfelder
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59 6721 leinfelder
import org.dataone.service.types.v1.AccessPolicy;
60 6705 leinfelder
import org.dataone.service.types.v1.Checksum;
61
import org.dataone.service.types.v1.Identifier;
62
import org.dataone.service.types.v1.NodeReference;
63
import org.dataone.service.types.v1.ObjectFormatIdentifier;
64 6721 leinfelder
import org.dataone.service.types.v1.Session;
65 6705 leinfelder
import org.dataone.service.types.v1.Subject;
66
import org.dataone.service.types.v1.SystemMetadata;
67
import org.dataone.service.types.v1.util.ChecksumUtil;
68 6709 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
69 6712 leinfelder
import org.dspace.foresite.ResourceMap;
70 6705 leinfelder
import org.jibx.runtime.JiBXException;
71
import org.xml.sax.SAXException;
72
73
import edu.ucsb.nceas.metacat.AccessionNumber;
74
import edu.ucsb.nceas.metacat.AccessionNumberException;
75 6709 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
76 6962 leinfelder
import edu.ucsb.nceas.metacat.DocumentImpl;
77 6705 leinfelder
import edu.ucsb.nceas.metacat.IdentifierManager;
78
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
79
import edu.ucsb.nceas.metacat.McdbException;
80 6712 leinfelder
import edu.ucsb.nceas.metacat.MetaCatServlet;
81 6705 leinfelder
import edu.ucsb.nceas.metacat.MetacatHandler;
82 6708 leinfelder
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
83 6705 leinfelder
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
84
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
85
import edu.ucsb.nceas.metacat.properties.PropertyService;
86 6708 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
87 6721 leinfelder
import edu.ucsb.nceas.metacat.shared.AccessException;
88 6708 leinfelder
import edu.ucsb.nceas.metacat.shared.HandlerException;
89 6705 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
90
import edu.ucsb.nceas.utilities.ParseLSIDException;
91
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
92
93
public class SystemMetadataFactory {
94 6706 leinfelder
95 6800 leinfelder
	private static final String resourceMapPrefix = "resourceMap_";
96 6707 leinfelder
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
97 6961 leinfelder
	/**
98
	 * use this flag if you want to update any existing system metadata values with generated content
99
	 */
100
	private static boolean updateExisting = true;
101 6712 leinfelder
102 6705 leinfelder
	/**
103 6706 leinfelder
	 * Creates a system metadata object for insertion into metacat
104
	 *
105
	 * @param localId
106
	 *            The local document identifier
107
	 * @param user
108
	 *            The user submitting the system metadata document
109
	 * @param groups
110
	 *            The groups the user belongs to
111
	 *
112
	 * @return sysMeta The system metadata object created
113 6708 leinfelder
	 * @throws SAXException
114
	 * @throws HandlerException
115
	 * @throws AccessControlException
116 6721 leinfelder
	 * @throws AccessException
117 6706 leinfelder
	 */
118 6852 leinfelder
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
119 6706 leinfelder
			throws McdbException, McdbDocNotFoundException, SQLException,
120
			IOException, AccessionNumberException, ClassNotFoundException,
121
			InsufficientKarmaException, ParseLSIDException,
122
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
123 6721 leinfelder
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
124 6707 leinfelder
125 6964 leinfelder
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
126 6705 leinfelder
127 6961 leinfelder
		// check for system metadata
128
		SystemMetadata sysMeta = null;
129
130 6706 leinfelder
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
131 6808 leinfelder
		int rev = Integer.valueOf(accNum.getRev());
132 6961 leinfelder
133
		// get/make the guid
134
		String guid = null;
135
		try {
136
			// get the guid if it exists
137
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
138
		} catch (McdbDocNotFoundException dnfe) {
139
			// otherwise create the mapping
140 6964 leinfelder
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
141 6961 leinfelder
			IdentifierManager.getInstance().createMapping(localId, localId);
142 6964 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
143 6961 leinfelder
		}
144
145
		// look up existing system metadata if it exists
146
		Identifier identifier = new Identifier();
147
		identifier.setValue(guid);
148
		try {
149 6964 leinfelder
			logMetacat.debug("Using hazelcast to get system metadata");
150 6961 leinfelder
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
151
			// TODO: if this is the case, we could return here -- what else do we gain?
152
			if (!updateExisting ) {
153
				return sysMeta;
154
			}
155
		} catch (Exception e) {
156 6964 leinfelder
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
157
158 6970 leinfelder
		}
159
160
		if (sysMeta == null) {
161 6961 leinfelder
			// create system metadata
162
			sysMeta = new SystemMetadata();
163
			sysMeta.setIdentifier(identifier);
164
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
165
			sysMeta.setArchived(false);
166
		}
167 6962 leinfelder
168 6706 leinfelder
		// get additional docinfo
169 6708 leinfelder
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
170 6706 leinfelder
		// set the default object format
171 6708 leinfelder
		String doctype = docInfo.get("doctype");
172 6706 leinfelder
		ObjectFormatIdentifier fmtid = null;
173
174
		// set the object format, fall back to defaults
175
		try {
176 6707 leinfelder
			fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
177 6706 leinfelder
		} catch (NotFound nfe) {
178 6964 leinfelder
			// format is not registered, use default
179
			if (doctype.trim().equals("BIN")) {
180
				fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
181
			} else {
182
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
183 6706 leinfelder
			}
184
		}
185
186
		sysMeta.setFormatId(fmtid);
187 6707 leinfelder
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
188 6706 leinfelder
189 6962 leinfelder
		// for retrieving the actual object
190
		InputStream inputStream = null;
191
		inputStream = MetacatHandler.read(localId);
192
193 6721 leinfelder
		// create the checksum
194
		String algorithm = "MD5";
195
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
196
		sysMeta.setChecksum(checksum);
197
198 6962 leinfelder
		// set the size from file on disk, don't read bytes again
199
		File fileOnDisk = getFileOnDisk(localId);
200
		long fileSize = 0;
201
		if (fileOnDisk.exists()) {
202
			fileSize = fileOnDisk.length();
203
		}
204
		sysMeta.setSize(BigInteger.valueOf(fileSize));
205 6721 leinfelder
206
		// submitter
207
		Subject submitter = new Subject();
208
		submitter.setValue(docInfo.get("user_updated"));
209
		sysMeta.setSubmitter(submitter);
210
211
		// rights holder
212
		Subject owner = new Subject();
213
		owner.setValue(docInfo.get("user_owner"));
214
		sysMeta.setRightsHolder(owner);
215
216
		// dates
217
		String createdDateString = docInfo.get("date_created");
218
		String updatedDateString = docInfo.get("date_updated");
219
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
220
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);
221
		sysMeta.setDateUploaded(createdDate);
222
		sysMeta.setDateSysMetadataModified(updatedDate);
223
224
		// set the revision history
225
		String docidWithoutRev = accNum.getDocid();
226
		Identifier obsoletedBy = null;
227
		Identifier obsoletes = null;
228
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
229 6727 leinfelder
		// ensure this ordering since processing depends on it
230
		Collections.sort(revisions);
231 6721 leinfelder
		for (int existingRev: revisions) {
232
			// use the docid+rev as the guid
233
			String existingPid = docidWithoutRev + "." + existingRev;
234
			if (existingRev < rev) {
235
				// it's the old docid, until it's not
236
				obsoletes = new Identifier();
237
				obsoletes.setValue(existingPid);
238
			}
239
			if (existingRev > rev) {
240
				// it's the newer docid
241
				obsoletedBy = new Identifier();
242
				obsoletedBy.setValue(existingPid);
243
				// only want the version just after it
244
				break;
245
			}
246
		}
247 6725 leinfelder
		// set them on our object
248 6721 leinfelder
		sysMeta.setObsoletedBy(obsoletedBy);
249
		sysMeta.setObsoletes(obsoletes);
250
251 6725 leinfelder
		// update the system metadata for the object[s] we are revising
252
		if (obsoletedBy != null) {
253 6971 leinfelder
			SystemMetadata obsoletedBySysMeta = null;
254
			try {
255
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
256
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
257
			} catch (McdbDocNotFoundException e) {
258
				// ignore
259
			}
260 6725 leinfelder
			if (obsoletedBySysMeta != null) {
261
				obsoletedBySysMeta.setObsoletes(identifier);
262
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
263
			}
264
		}
265
		if (obsoletes != null) {
266 6971 leinfelder
			SystemMetadata obsoletesSysMeta = null;
267
			try {
268
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
269
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
270
			} catch (McdbDocNotFoundException e) {
271
				// ignore
272
			}
273 6725 leinfelder
			if (obsoletesSysMeta != null) {
274
				obsoletesSysMeta.setObsoletedBy(identifier);
275 6911 leinfelder
				obsoletesSysMeta.setArchived(true);
276
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
277 6725 leinfelder
			}
278
		}
279
280 6744 leinfelder
		// look up the access control policy we have in metacat
281
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
282 6721 leinfelder
		sysMeta.setAccessPolicy(accessPolicy);
283
284
		// authoritative node
285
		NodeReference nr = new NodeReference();
286
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
287
		sysMeta.setOriginMemberNode(nr);
288
		sysMeta.setAuthoritativeMemberNode(nr);
289
290 6706 leinfelder
		// further parse EML documents to get data object format,
291
		// describes and describedBy information
292
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
293
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
294
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
295
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
296
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
297
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
298
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
299
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
300
301
			try {
302 6962 leinfelder
303
				// get it again to parse the document
304
				logMetacat.debug("Re-reading document inputStream");
305 6721 leinfelder
				inputStream = MetacatHandler.read(localId);
306 6960 leinfelder
307
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
308
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
309
310 6721 leinfelder
				// iterate through the data objects in the EML doc and add sysmeta
311 6707 leinfelder
				logMetacat.debug("In createSystemMetadata() the number of data "
312 6706 leinfelder
								+ "entities is: "
313 6960 leinfelder
								+ emlDocument.distributionMetadata);
314 6706 leinfelder
315 6712 leinfelder
				// for generating the ORE map
316
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
317
	            List<Identifier> dataIds = new ArrayList<Identifier>();
318
319 6706 leinfelder
				// iterate through data objects described by the EML
320 6960 leinfelder
	            if (emlDocument.distributionMetadata != null) {
321
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
322 6744 leinfelder
323 6960 leinfelder
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
324
				        String dataDocUrl = distMetadata.url;
325
				        String dataDocMimeType = distMetadata.mimeType;
326 6744 leinfelder
						// default to binary
327
						if (dataDocMimeType == null) {
328
							dataDocMimeType = ObjectFormatCache.getInstance()
329
									.getFormat("application/octet-stream")
330
									.getFormatId().getValue();
331 6721 leinfelder
						}
332 6852 leinfelder
333
						// process the data
334 6855 leinfelder
						boolean remoteData = false;
335 6852 leinfelder
						String dataDocLocalId = null;
336
						Identifier dataGuid = new Identifier();
337
338
						// handle ecogrid, or downloadable data
339
						String ecogridPrefix = "ecogrid://knb/";
340
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
341
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
342
						} else {
343
							// should we try downloading the remote data?
344
							if (downloadData) {
345
								InputStream dataObject = null;
346
								try {
347
									// download the data from the URL
348
									URL dataURL = new URL(dataDocUrl);
349 6873 leinfelder
									URLConnection dataConnection = dataURL.openConnection();
350
351
									// default is to download the data
352
									dataObject = dataConnection.getInputStream();
353
354
									String detectedContentType = dataConnection.getContentType();
355
									logMetacat.info("Detected content type: " + detectedContentType);
356
357
									if (detectedContentType != null) {
358
										// seems to be HTML from the remote location
359
										if (detectedContentType.contains("html")) {
360
											// if we are not expecting it, we skip it
361
											if (!dataDocMimeType.contains("html")) {
362
												// set to null so we don't download it
363
												dataObject = null;
364
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
365
											}
366
										}
367
368
									} else {
369
										// if we don't know what it is, should we skip it?
370
										dataObject = null;
371
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
372
									}
373
374 6852 leinfelder
								} catch (Exception e) {
375
									// error with the download
376
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
377
								}
378
379
								if (dataObject != null) {
380
									// create the local version of it
381
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
382
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
383
									dataGuid.setValue(dataDocLocalId);
384
385
									// save it locally
386
									Session session = new Session();
387
									session.setSubject(submitter);
388
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
389
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
390 6855 leinfelder
391
									remoteData = true;
392 6852 leinfelder
								}
393
							}
394
395
						}
396
397 6744 leinfelder
						logMetacat.debug("Data local ID: " + dataDocLocalId);
398
						logMetacat.debug("Data URL     : " + dataDocUrl);
399
						logMetacat.debug("Data mime    : " + dataDocMimeType);
400 6852 leinfelder
401
						// now we have a local id for the data
402
						if (dataDocLocalId != null) {
403
404 6744 leinfelder
							// look up the guid for the data
405
							String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
406
							int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
407
408
							// check if data system metadata exists already
409
							SystemMetadata dataSysMeta = null;
410
							String dataGuidString = null;
411 6706 leinfelder
							try {
412 6744 leinfelder
								// look for the identifier
413 6707 leinfelder
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
414 6744 leinfelder
								// set it
415 6706 leinfelder
								dataGuid.setValue(dataGuidString);
416 6744 leinfelder
								// look up the system metadata
417 6706 leinfelder
								try {
418 6744 leinfelder
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
419
								} catch (Exception e) {
420
									// probably not in the system
421
									dataSysMeta = null;
422 6706 leinfelder
								}
423 6744 leinfelder
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
424
							} catch (McdbDocNotFoundException nf) {
425
								// we didn't find it
426
								dataSysMeta = null;
427
							}
428 6712 leinfelder
429 6744 leinfelder
							// we'll have to generate it
430
							if (dataSysMeta == null) {
431
								// System metadata for data doesn't exist yet, so create it
432 6964 leinfelder
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
433 6961 leinfelder
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
434
435
								// now look it up again
436
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
437
438
								// set the guid
439
								dataGuid.setValue(dataGuidString);
440
441
								// inherit access rules from metadata, if we don't have our own
442
								if (remoteData) {
443
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
444
									// TODO: use access rules defined in EML, per data file
445
								}
446 6744 leinfelder
447
							}
448 6721 leinfelder
449 6961 leinfelder
							// set object format for the data file
450 6964 leinfelder
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
451 6961 leinfelder
							try {
452
								ObjectFormatIdentifier fmt =
453
									ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
454
								dataSysMeta.setFormatId(fmt);
455
							} catch (NotFound nfe) {
456
								logMetacat.debug("Couldn't find format identifier for: "
457
												+ dataDocMimeType
458
												+ ". Setting it to application/octet-stream.");
459
								ObjectFormatIdentifier newFmtid = new ObjectFormatIdentifier();
460
								newFmtid.setValue("application/octet-stream");
461
							}
462
463
							// update the values
464
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
465
466
							// include as part of the ORE package
467 6744 leinfelder
							dataIds.add(dataGuid);
468
469
						} // end if (EML package)
470
471
					} // end for (data entities)
472
473
	            } // data entities not null
474
475 6712 leinfelder
				// ORE map
476 6713 leinfelder
				if (includeORE) {
477 6800 leinfelder
					// can we generate them?
478 6713 leinfelder
			        if (!dataIds.isEmpty()) {
479 6800 leinfelder
			        	// it doesn't exist in the system?
480
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
481
482
				            // generate the ORE map for this datapackage
483
				            Identifier resourceMapId = new Identifier();
484
				            // want to be able to run this over and over again for now
485
				            resourceMapId.setValue(resourceMapPrefix + sysMeta.getIdentifier().getValue());
486
				            idMap.put(sysMeta.getIdentifier(), dataIds);
487
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
488
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
489
				            // copy most of the same system metadata as the packaging metadata
490
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
491
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
492
				            resourceMapSysMeta.setIdentifier(resourceMapId);
493
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
494
							resourceMapSysMeta.setChecksum(oreChecksum);
495
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
496
							resourceMapSysMeta.setFormatId(formatId);
497
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
498
499
							// set the revision graph
500
							resourceMapSysMeta.setObsoletes(null);
501
							resourceMapSysMeta.setObsoletedBy(null);
502
							// look up the resource map that this one obsoletes
503
							if (sysMeta.getObsoletes() != null) {
504
								Identifier resourceMapObsoletes = new Identifier();
505
								resourceMapObsoletes.setValue(resourceMapPrefix + sysMeta.getObsoletes().getValue());
506
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
507
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
508
								if (resourceMapObsoletesSystemMetadata != null) {
509
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
510
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
511
								}
512
							}
513
							// look up the resource map that this one is obsoletedBy
514
							if (sysMeta.getObsoletedBy() != null) {
515
								Identifier resourceMapObsoletedBy = new Identifier();
516
								resourceMapObsoletedBy.setValue(resourceMapPrefix + sysMeta.getObsoletedBy().getValue());
517
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
518
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
519
								if (resourceMapObsoletedBySystemMetadata != null) {
520
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
521
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
522
								}
523
							}
524
525 6907 leinfelder
							// save it locally, if it doesn't already exist
526
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
527
								Session session = new Session();
528
								session.setSubject(submitter);
529
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
530
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
531
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
532
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
533
							}
534 6800 leinfelder
			        	}
535 6713 leinfelder
			        }
536
				}
537 6706 leinfelder
538
			} catch (ParserConfigurationException pce) {
539 6707 leinfelder
				logMetacat.debug("There was a problem parsing the EML document. "
540 6706 leinfelder
								+ "The error message was: " + pce.getMessage());
541
542
			} catch (SAXException saxe) {
543 6707 leinfelder
				logMetacat.debug("There was a problem traversing the EML document. "
544 6706 leinfelder
								+ "The error message was: " + saxe.getMessage());
545
546
			} catch (XPathExpressionException xpee) {
547 6707 leinfelder
				logMetacat.debug("There was a problem searching the EML document. "
548 6706 leinfelder
								+ "The error message was: " + xpee.getMessage());
549
			} catch (Exception e) {
550 6707 leinfelder
				logMetacat.debug("There was a problem creating System Metadata. "
551 6706 leinfelder
								+ "The error message was: " + e.getMessage());
552 6721 leinfelder
				e.printStackTrace();
553 6706 leinfelder
			} // end try()
554
555
		} // end if()
556
557
		return sysMeta;
558
	}
559 6800 leinfelder
560
	/**
561 6911 leinfelder
     * Generate SystemMetadata for any object in the object store that does
562
     * not already have it.  SystemMetadata documents themselves, are, of course,
563
     * exempt.  This is a utility method for migration of existing object
564
     * stores to DataONE where SystemMetadata is required for all objects.
565
     * @param idList
566
     * @param includeOre
567
     * @param downloadData
568
     * @throws PropertyNotFoundException
569
     * @throws NoSuchAlgorithmException
570
     * @throws AccessionNumberException
571
     * @throws SQLException
572 6964 leinfelder
	 * @throws SAXException
573
	 * @throws HandlerException
574
	 * @throws JiBXException
575
	 * @throws BaseException
576
	 * @throws ParseLSIDException
577
	 * @throws InsufficientKarmaException
578
	 * @throws ClassNotFoundException
579
	 * @throws IOException
580
	 * @throws McdbException
581
	 * @throws AccessException
582
	 * @throws AccessControlException
583 6911 leinfelder
     */
584
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData)
585 6964 leinfelder
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException
586 6911 leinfelder
    {
587
588
        for (String localId : idList) {
589
            //for each id, add a system metadata doc
590 6964 leinfelder
        	generateSystemMetadata(localId, includeOre, downloadData);
591 6911 leinfelder
        }
592 6964 leinfelder
        logMetacat.info("done generating system metadata for given list");
593 6911 leinfelder
    }
594
595
596
    /**
597
     * Generate SystemMetadata for a particular object with identifier localId.
598
     * This is a utility method for migration of existing objects
599
     * to DataONE where SystemMetadata is required for all objects.
600
     * @param localId
601
     * @param includeOre
602
     * @param downloadData
603
     * @throws PropertyNotFoundException
604
     * @throws NoSuchAlgorithmException
605
     * @throws AccessionNumberException
606
     * @throws SQLException
607 6964 leinfelder
     * @throws SAXException
608
     * @throws HandlerException
609
     * @throws JiBXException
610
     * @throws BaseException
611
     * @throws ParseLSIDException
612
     * @throws InsufficientKarmaException
613
     * @throws ClassNotFoundException
614
     * @throws IOException
615
     * @throws McdbException
616
     * @throws AccessException
617
     * @throws AccessControlException
618 6911 leinfelder
     */
619
    protected static void generateSystemMetadata(String localId, boolean includeOre, boolean downloadData)
620 6964 leinfelder
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException
621 6911 leinfelder
    {
622
    	logMetacat.debug("Creating SystemMetadata for localId " + localId);
623
        SystemMetadata sm = null;
624
625
        //generate required system metadata fields from the document
626 6964 leinfelder
    	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
627
628 6911 leinfelder
        //insert the systemmetadata object or just update it as needed
629
        boolean exists = IdentifierManager.getInstance().systemMetadataExists(sm.getIdentifier().getValue());
630
        if (!exists) {
631
        	IdentifierManager.getInstance().createSystemMetadata(sm);
632
        	logMetacat.info("Generated SystemMetadata for " + localId);
633
        } else {
634
        	IdentifierManager.getInstance().updateSystemMetadata(sm);
635
        	logMetacat.info("Updated SystemMetadata for " + localId);
636
        }
637
    }
638
639
	/**
640 6800 leinfelder
	 * Determines if we already have registered an ORE map for this package
641
	 * @param guid of the EML/packaging object
642
	 * @return true if there is an ORE map for the given package
643
	 */
644
	private static boolean oreExistsFor(Identifier guid) {
645
		// TODO: implement call to CN.search()
646
		return false;
647
	}
648 6706 leinfelder
649 6707 leinfelder
	/**
650 6706 leinfelder
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
651
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
652
	 * evaluated.
653
	 *
654
	 * @param is The InputStream of bytes
655
	 *
656
	 * @return size The size in bytes of the input stream as a long
657
	 *
658
	 * @throws IOException
659
	 */
660
	private static long sizeOfStream(InputStream is) throws IOException {
661
662
		long size = 0;
663
		byte[] b = new byte[1024];
664
		int numread = is.read(b, 0, 1024);
665
		while (numread != -1) {
666
			size += numread;
667
			numread = is.read(b, 0, 1024);
668
		}
669
		return size;
670
671
	}
672 6962 leinfelder
673
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
674
675
		DocumentImpl doc = new DocumentImpl(docid, false);
676
		String filepath = null;
677
		String filename = null;
678
679
		// deal with data or metadata cases
680
		if (doc.getRootNodeID() == 0) {
681
			// this is a data file
682
			filepath = PropertyService.getProperty("application.datafilepath");
683
		} else {
684
			filepath = PropertyService.getProperty("application.documentfilepath");
685
		}
686
		// ensure it is a directory path
687
		if (!(filepath.endsWith("/"))) {
688
			filepath += "/";
689
		}
690
		filename = filepath + docid;
691
		File documentFile = new File(filename);
692
693
		return documentFile;
694
	}
695 6705 leinfelder
}