Project

General

Profile

1 6705 leinfelder
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author$'
9
 *     '$Date$'
10
 * '$Revision$'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27
28 6962 leinfelder
import java.io.File;
29 6705 leinfelder
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32 6852 leinfelder
import java.net.URL;
33 6873 leinfelder
import java.net.URLConnection;
34 6705 leinfelder
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36 6712 leinfelder
import java.util.ArrayList;
37 6727 leinfelder
import java.util.Collections;
38 6705 leinfelder
import java.util.Date;
39 6712 leinfelder
import java.util.HashMap;
40 6705 leinfelder
import java.util.Hashtable;
41 6712 leinfelder
import java.util.List;
42
import java.util.Map;
43 6709 leinfelder
import java.util.Vector;
44 6705 leinfelder
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47
48 6712 leinfelder
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50 6705 leinfelder
import org.apache.log4j.Logger;
51 7622 leinfelder
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52 6705 leinfelder
import org.dataone.client.ObjectFormatCache;
53 6960 leinfelder
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56 6712 leinfelder
import org.dataone.ore.ResourceMapFactory;
57 6705 leinfelder
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59 6721 leinfelder
import org.dataone.service.types.v1.AccessPolicy;
60 7214 cjones
import org.dataone.service.types.v1.AccessRule;
61 6705 leinfelder
import org.dataone.service.types.v1.Checksum;
62
import org.dataone.service.types.v1.Identifier;
63
import org.dataone.service.types.v1.NodeReference;
64
import org.dataone.service.types.v1.ObjectFormatIdentifier;
65 6988 jones
import org.dataone.service.types.v1.ReplicationPolicy;
66 6721 leinfelder
import org.dataone.service.types.v1.Session;
67 6705 leinfelder
import org.dataone.service.types.v1.Subject;
68
import org.dataone.service.types.v1.SystemMetadata;
69
import org.dataone.service.types.v1.util.ChecksumUtil;
70 6709 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
71 6712 leinfelder
import org.dspace.foresite.ResourceMap;
72 6705 leinfelder
import org.jibx.runtime.JiBXException;
73
import org.xml.sax.SAXException;
74
75 7087 cjones
import java.util.Calendar;
76 7084 leinfelder
77 6705 leinfelder
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79 6709 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
80 6962 leinfelder
import edu.ucsb.nceas.metacat.DocumentImpl;
81 6705 leinfelder
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84 6712 leinfelder
import edu.ucsb.nceas.metacat.MetaCatServlet;
85 6705 leinfelder
import edu.ucsb.nceas.metacat.MetacatHandler;
86 6708 leinfelder
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87 6705 leinfelder
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.properties.PropertyService;
90 6708 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
91 6721 leinfelder
import edu.ucsb.nceas.metacat.shared.AccessException;
92 6708 leinfelder
import edu.ucsb.nceas.metacat.shared.HandlerException;
93 6705 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
94
import edu.ucsb.nceas.utilities.ParseLSIDException;
95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
96
97
public class SystemMetadataFactory {
98 6706 leinfelder
99 7849 leinfelder
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
100 6707 leinfelder
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
101 6961 leinfelder
	/**
102
	 * use this flag if you want to update any existing system metadata values with generated content
103
	 */
104
	private static boolean updateExisting = true;
105 6712 leinfelder
106 6705 leinfelder
	/**
107 6706 leinfelder
	 * Creates a system metadata object for insertion into metacat
108
	 *
109
	 * @param localId
110
	 *            The local document identifier
111
	 * @param user
112
	 *            The user submitting the system metadata document
113
	 * @param groups
114
	 *            The groups the user belongs to
115
	 *
116
	 * @return sysMeta The system metadata object created
117 6708 leinfelder
	 * @throws SAXException
118
	 * @throws HandlerException
119
	 * @throws AccessControlException
120 6721 leinfelder
	 * @throws AccessException
121 6706 leinfelder
	 */
122 6852 leinfelder
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
123 6706 leinfelder
			throws McdbException, McdbDocNotFoundException, SQLException,
124
			IOException, AccessionNumberException, ClassNotFoundException,
125
			InsufficientKarmaException, ParseLSIDException,
126
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
127 6721 leinfelder
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
128 6707 leinfelder
129 6964 leinfelder
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
130 6705 leinfelder
131 6961 leinfelder
		// check for system metadata
132
		SystemMetadata sysMeta = null;
133
134 6706 leinfelder
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
135 6808 leinfelder
		int rev = Integer.valueOf(accNum.getRev());
136 6961 leinfelder
137
		// get/make the guid
138
		String guid = null;
139
		try {
140
			// get the guid if it exists
141
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
142
		} catch (McdbDocNotFoundException dnfe) {
143
			// otherwise create the mapping
144 6964 leinfelder
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
145 6961 leinfelder
			IdentifierManager.getInstance().createMapping(localId, localId);
146 6964 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
147 6961 leinfelder
		}
148
149
		// look up existing system metadata if it exists
150
		Identifier identifier = new Identifier();
151
		identifier.setValue(guid);
152
		try {
153 6964 leinfelder
			logMetacat.debug("Using hazelcast to get system metadata");
154 6961 leinfelder
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
155
			// TODO: if this is the case, we could return here -- what else do we gain?
156
			if (!updateExisting ) {
157
				return sysMeta;
158
			}
159
		} catch (Exception e) {
160 6964 leinfelder
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
161
162 6970 leinfelder
		}
163
164
		if (sysMeta == null) {
165 6961 leinfelder
			// create system metadata
166
			sysMeta = new SystemMetadata();
167
			sysMeta.setIdentifier(identifier);
168
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
169
			sysMeta.setArchived(false);
170
		}
171 6962 leinfelder
172 6706 leinfelder
		// get additional docinfo
173 6708 leinfelder
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
174 6706 leinfelder
		// set the default object format
175 6708 leinfelder
		String doctype = docInfo.get("doctype");
176 6706 leinfelder
		ObjectFormatIdentifier fmtid = null;
177
178
		// set the object format, fall back to defaults
179 6982 leinfelder
		if (doctype.trim().equals("BIN")) {
180
			// we don't know much about this file (yet)
181
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
182 8028 leinfelder
		} else if (doctype.trim().equals("metadata")) {
183
			// special ESRI FGDC format
184
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
185 6982 leinfelder
		} else {
186
			try {
187
				// do we know the given format?
188
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
189
			} catch (NotFound nfe) {
190
				// format is not registered, use default
191 6964 leinfelder
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
192 6706 leinfelder
			}
193
		}
194
195
		sysMeta.setFormatId(fmtid);
196 6707 leinfelder
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
197 6706 leinfelder
198 6962 leinfelder
		// for retrieving the actual object
199
		InputStream inputStream = null;
200
		inputStream = MetacatHandler.read(localId);
201
202 6721 leinfelder
		// create the checksum
203 7222 leinfelder
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
204 6721 leinfelder
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
205 7084 leinfelder
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
206 6721 leinfelder
		sysMeta.setChecksum(checksum);
207
208 6962 leinfelder
		// set the size from file on disk, don't read bytes again
209
		File fileOnDisk = getFileOnDisk(localId);
210
		long fileSize = 0;
211
		if (fileOnDisk.exists()) {
212
			fileSize = fileOnDisk.length();
213
		}
214
		sysMeta.setSize(BigInteger.valueOf(fileSize));
215 6721 leinfelder
216
		// submitter
217
		Subject submitter = new Subject();
218
		submitter.setValue(docInfo.get("user_updated"));
219
		sysMeta.setSubmitter(submitter);
220
221
		// rights holder
222
		Subject owner = new Subject();
223
		owner.setValue(docInfo.get("user_owner"));
224
		sysMeta.setRightsHolder(owner);
225
226
		// dates
227
		String createdDateString = docInfo.get("date_created");
228
		String updatedDateString = docInfo.get("date_updated");
229
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
230
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);
231
		sysMeta.setDateUploaded(createdDate);
232 7084 leinfelder
		//sysMeta.setDateSysMetadataModified(updatedDate);
233
		// use current datetime
234
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
235 6721 leinfelder
236
		// set the revision history
237
		String docidWithoutRev = accNum.getDocid();
238
		Identifier obsoletedBy = null;
239
		Identifier obsoletes = null;
240
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
241 6727 leinfelder
		// ensure this ordering since processing depends on it
242
		Collections.sort(revisions);
243 6721 leinfelder
		for (int existingRev: revisions) {
244
			// use the docid+rev as the guid
245
			String existingPid = docidWithoutRev + "." + existingRev;
246 7001 leinfelder
			try {
247
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
248
			} catch (McdbDocNotFoundException mdfe) {
249
				// we'll be defaulting to the local id
250
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
251
			}
252 6721 leinfelder
			if (existingRev < rev) {
253
				// it's the old docid, until it's not
254
				obsoletes = new Identifier();
255
				obsoletes.setValue(existingPid);
256
			}
257
			if (existingRev > rev) {
258
				// it's the newer docid
259
				obsoletedBy = new Identifier();
260
				obsoletedBy.setValue(existingPid);
261
				// only want the version just after it
262
				break;
263
			}
264
		}
265 6725 leinfelder
		// set them on our object
266 6721 leinfelder
		sysMeta.setObsoletedBy(obsoletedBy);
267
		sysMeta.setObsoletes(obsoletes);
268
269 6725 leinfelder
		// update the system metadata for the object[s] we are revising
270
		if (obsoletedBy != null) {
271 6971 leinfelder
			SystemMetadata obsoletedBySysMeta = null;
272
			try {
273
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
274
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
275
			} catch (McdbDocNotFoundException e) {
276
				// ignore
277
			}
278 6725 leinfelder
			if (obsoletedBySysMeta != null) {
279
				obsoletedBySysMeta.setObsoletes(identifier);
280 7297 leinfelder
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
281 6725 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
282
			}
283
		}
284
		if (obsoletes != null) {
285 6971 leinfelder
			SystemMetadata obsoletesSysMeta = null;
286
			try {
287
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
288
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
289
			} catch (McdbDocNotFoundException e) {
290
				// ignore
291
			}
292 6725 leinfelder
			if (obsoletesSysMeta != null) {
293
				obsoletesSysMeta.setObsoletedBy(identifier);
294 6911 leinfelder
				obsoletesSysMeta.setArchived(true);
295 7297 leinfelder
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
296 6911 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
297 6725 leinfelder
			}
298
		}
299
300 6744 leinfelder
		// look up the access control policy we have in metacat
301
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
302 7214 cjones
		try {
303
        List<AccessRule> allowList = accessPolicy.getAllowList();
304 7215 cjones
        int listSize = allowList.size();
305 7214 cjones
        sysMeta.setAccessPolicy(accessPolicy);
306
307
    } catch (NullPointerException npe) {
308
        logMetacat.info("The allow list is empty, can't include an empty " +
309
            "access policy in the system metadata for " + guid);
310
311
    }
312 6721 leinfelder
313
		// authoritative node
314
		NodeReference nr = new NodeReference();
315 7030 cjones
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
316 6721 leinfelder
		sysMeta.setOriginMemberNode(nr);
317
		sysMeta.setAuthoritativeMemberNode(nr);
318
319 6988 jones
		// Set a default replication policy
320
        ReplicationPolicy rp = getDefaultReplicationPolicy();
321
        if (rp != null) {
322
            sysMeta.setReplicationPolicy(rp);
323
        }
324
325 6706 leinfelder
		// further parse EML documents to get data object format,
326
		// describes and describedBy information
327
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
328
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
329
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
330
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
331
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
332
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
333
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
334
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
335
336
			try {
337 6962 leinfelder
338
				// get it again to parse the document
339
				logMetacat.debug("Re-reading document inputStream");
340 6721 leinfelder
				inputStream = MetacatHandler.read(localId);
341 6960 leinfelder
342
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
343
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
344
345 6721 leinfelder
				// iterate through the data objects in the EML doc and add sysmeta
346 6707 leinfelder
				logMetacat.debug("In createSystemMetadata() the number of data "
347 6706 leinfelder
								+ "entities is: "
348 6960 leinfelder
								+ emlDocument.distributionMetadata);
349 6706 leinfelder
350 6712 leinfelder
				// for generating the ORE map
351
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
352
	            List<Identifier> dataIds = new ArrayList<Identifier>();
353
354 6706 leinfelder
				// iterate through data objects described by the EML
355 6960 leinfelder
	            if (emlDocument.distributionMetadata != null) {
356
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
357 6744 leinfelder
358 6960 leinfelder
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
359
				        String dataDocUrl = distMetadata.url;
360
				        String dataDocMimeType = distMetadata.mimeType;
361 6744 leinfelder
						// default to binary
362
						if (dataDocMimeType == null) {
363 6982 leinfelder
							dataDocMimeType = "application/octet-stream";
364 6721 leinfelder
						}
365 6852 leinfelder
366
						// process the data
367 6855 leinfelder
						boolean remoteData = false;
368 6852 leinfelder
						String dataDocLocalId = null;
369
						Identifier dataGuid = new Identifier();
370
371
						// handle ecogrid, or downloadable data
372
						String ecogridPrefix = "ecogrid://knb/";
373
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
374
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
375
						} else {
376
							// should we try downloading the remote data?
377
							if (downloadData) {
378
								InputStream dataObject = null;
379
								try {
380
									// download the data from the URL
381
									URL dataURL = new URL(dataDocUrl);
382 6873 leinfelder
									URLConnection dataConnection = dataURL.openConnection();
383
384
									// default is to download the data
385
									dataObject = dataConnection.getInputStream();
386
387
									String detectedContentType = dataConnection.getContentType();
388
									logMetacat.info("Detected content type: " + detectedContentType);
389
390
									if (detectedContentType != null) {
391
										// seems to be HTML from the remote location
392
										if (detectedContentType.contains("html")) {
393
											// if we are not expecting it, we skip it
394
											if (!dataDocMimeType.contains("html")) {
395
												// set to null so we don't download it
396
												dataObject = null;
397
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
398
											}
399
										}
400
401
									} else {
402
										// if we don't know what it is, should we skip it?
403
										dataObject = null;
404
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
405
									}
406
407 6852 leinfelder
								} catch (Exception e) {
408
									// error with the download
409
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
410
								}
411
412
								if (dataObject != null) {
413
									// create the local version of it
414
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
415
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
416
									dataGuid.setValue(dataDocLocalId);
417
418
									// save it locally
419
									Session session = new Session();
420
									session.setSubject(submitter);
421
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
422
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
423 6855 leinfelder
424
									remoteData = true;
425 6852 leinfelder
								}
426
							}
427
428
						}
429
430 6744 leinfelder
						logMetacat.debug("Data local ID: " + dataDocLocalId);
431
						logMetacat.debug("Data URL     : " + dataDocUrl);
432
						logMetacat.debug("Data mime    : " + dataDocMimeType);
433 6852 leinfelder
434 7112 leinfelder
						// check for valid docid.rev
435
						String dataDocid = null;
436
						int dataRev = 0;
437
						if (dataDocLocalId != null) {
438
							// look up the guid for the data
439
							try {
440
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
441
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
442
							} catch (Exception e) {
443
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
444
								dataDocLocalId = null;
445
							}
446
						}
447
448 6852 leinfelder
						// now we have a local id for the data
449
						if (dataDocLocalId != null) {
450 6744 leinfelder
451
							// check if data system metadata exists already
452
							SystemMetadata dataSysMeta = null;
453
							String dataGuidString = null;
454 6706 leinfelder
							try {
455 6744 leinfelder
								// look for the identifier
456 6707 leinfelder
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
457 6744 leinfelder
								// set it
458 6706 leinfelder
								dataGuid.setValue(dataGuidString);
459 6744 leinfelder
								// look up the system metadata
460 6706 leinfelder
								try {
461 6744 leinfelder
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
462
								} catch (Exception e) {
463
									// probably not in the system
464
									dataSysMeta = null;
465 6706 leinfelder
								}
466 6744 leinfelder
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
467
							} catch (McdbDocNotFoundException nf) {
468
								// we didn't find it
469
								dataSysMeta = null;
470
							}
471 6712 leinfelder
472 6744 leinfelder
							// we'll have to generate it
473
							if (dataSysMeta == null) {
474
								// System metadata for data doesn't exist yet, so create it
475 6964 leinfelder
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
476 6961 leinfelder
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
477
478
								// now look it up again
479
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
480
481
								// set the guid
482
								dataGuid.setValue(dataGuidString);
483
484
								// inherit access rules from metadata, if we don't have our own
485
								if (remoteData) {
486
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
487
									// TODO: use access rules defined in EML, per data file
488
								}
489 6744 leinfelder
490
							}
491 6721 leinfelder
492 6961 leinfelder
							// set object format for the data file
493 6964 leinfelder
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
494 6982 leinfelder
							ObjectFormatIdentifier fmt = null;
495 6961 leinfelder
							try {
496 6982 leinfelder
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
497 6961 leinfelder
							} catch (NotFound nfe) {
498
								logMetacat.debug("Couldn't find format identifier for: "
499
												+ dataDocMimeType
500
												+ ". Setting it to application/octet-stream.");
501 6982 leinfelder
								fmt = new ObjectFormatIdentifier();
502
								fmt.setValue("application/octet-stream");
503 6961 leinfelder
							}
504 6982 leinfelder
							dataSysMeta.setFormatId(fmt);
505
506 6961 leinfelder
							// update the values
507
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
508
509
							// include as part of the ORE package
510 6744 leinfelder
							dataIds.add(dataGuid);
511
512
						} // end if (EML package)
513
514
					} // end for (data entities)
515
516
	            } // data entities not null
517
518 6712 leinfelder
				// ORE map
519 6713 leinfelder
				if (includeORE) {
520 6800 leinfelder
					// can we generate them?
521 6713 leinfelder
			        if (!dataIds.isEmpty()) {
522 6800 leinfelder
			        	// it doesn't exist in the system?
523
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
524
525
				            // generate the ORE map for this datapackage
526
				            Identifier resourceMapId = new Identifier();
527 7001 leinfelder
				            // use the local id, not the guid in case we have DOIs for them already
528 7849 leinfelder
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
529 6800 leinfelder
				            idMap.put(sysMeta.getIdentifier(), dataIds);
530
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
531
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
532
				            // copy most of the same system metadata as the packaging metadata
533
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
534
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
535
				            resourceMapSysMeta.setIdentifier(resourceMapId);
536 7222 leinfelder
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
537 6800 leinfelder
							resourceMapSysMeta.setChecksum(oreChecksum);
538
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
539
							resourceMapSysMeta.setFormatId(formatId);
540
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
541
542
							// set the revision graph
543
							resourceMapSysMeta.setObsoletes(null);
544
							resourceMapSysMeta.setObsoletedBy(null);
545
							// look up the resource map that this one obsoletes
546
							if (sysMeta.getObsoletes() != null) {
547 7273 leinfelder
								// use the localId in case we have a DOI
548
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
549 6800 leinfelder
								Identifier resourceMapObsoletes = new Identifier();
550 7849 leinfelder
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
551 6800 leinfelder
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
552
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
553
								if (resourceMapObsoletesSystemMetadata != null) {
554
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
555 7278 leinfelder
									resourceMapObsoletesSystemMetadata.setArchived(true);
556 6800 leinfelder
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
557
								}
558
							}
559
							// look up the resource map that this one is obsoletedBy
560
							if (sysMeta.getObsoletedBy() != null) {
561 7273 leinfelder
								// use the localId in case we have a DOI
562
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
563 6800 leinfelder
								Identifier resourceMapObsoletedBy = new Identifier();
564 7849 leinfelder
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
565 6800 leinfelder
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
566 7278 leinfelder
								resourceMapSysMeta.setArchived(true);
567 6800 leinfelder
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
568
								if (resourceMapObsoletedBySystemMetadata != null) {
569
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
570
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
571
								}
572
							}
573
574 6907 leinfelder
							// save it locally, if it doesn't already exist
575
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
576
								Session session = new Session();
577
								session.setSubject(submitter);
578
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
579
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
580
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
581
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
582
							}
583 6800 leinfelder
			        	}
584 6713 leinfelder
			        }
585
				}
586 6706 leinfelder
587
			} catch (ParserConfigurationException pce) {
588 6707 leinfelder
				logMetacat.debug("There was a problem parsing the EML document. "
589 6706 leinfelder
								+ "The error message was: " + pce.getMessage());
590
591
			} catch (SAXException saxe) {
592 6707 leinfelder
				logMetacat.debug("There was a problem traversing the EML document. "
593 6706 leinfelder
								+ "The error message was: " + saxe.getMessage());
594
595
			} catch (XPathExpressionException xpee) {
596 6707 leinfelder
				logMetacat.debug("There was a problem searching the EML document. "
597 6706 leinfelder
								+ "The error message was: " + xpee.getMessage());
598
			} catch (Exception e) {
599 6707 leinfelder
				logMetacat.debug("There was a problem creating System Metadata. "
600 6706 leinfelder
								+ "The error message was: " + e.getMessage());
601 6721 leinfelder
				e.printStackTrace();
602 6706 leinfelder
			} // end try()
603
604
		} // end if()
605
606
		return sysMeta;
607
	}
608 6988 jones
609 8190 leinfelder
	/**
610
	 * Checks for potential ORE object existence
611
	 * @param identifier
612
	 * @return
613
	 */
614
    public static boolean oreExistsFor(Identifier identifier) {
615
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
616 8200 leinfelder
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier, true);
617 8190 leinfelder
		return (ids != null && ids.size() > 0);
618
	}
619
620
	/**
621 6911 leinfelder
     * Generate SystemMetadata for any object in the object store that does
622
     * not already have it.  SystemMetadata documents themselves, are, of course,
623
     * exempt.  This is a utility method for migration of existing object
624
     * stores to DataONE where SystemMetadata is required for all objects.
625
     * @param idList
626
     * @param includeOre
627
     * @param downloadData
628
     * @throws PropertyNotFoundException
629
     * @throws NoSuchAlgorithmException
630
     * @throws AccessionNumberException
631
     * @throws SQLException
632 6964 leinfelder
	 * @throws SAXException
633
	 * @throws HandlerException
634
	 * @throws JiBXException
635
	 * @throws BaseException
636
	 * @throws ParseLSIDException
637
	 * @throws InsufficientKarmaException
638
	 * @throws ClassNotFoundException
639
	 * @throws IOException
640
	 * @throws McdbException
641
	 * @throws AccessException
642
	 * @throws AccessControlException
643 6911 leinfelder
     */
644
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData)
645 6964 leinfelder
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException
646 6911 leinfelder
    {
647
648
        for (String localId : idList) {
649 6998 leinfelder
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
650 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
651
652 6998 leinfelder
            SystemMetadata sm = null;
653
654
            //generate required system metadata fields from the document
655 7123 leinfelder
            try {
656
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
657
            } catch (Exception e) {
658
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
659
				continue;
660
			}
661
662 6998 leinfelder
            //insert the systemmetadata object or just update it as needed
663 7188 leinfelder
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
664
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
665
666 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
667
668 6911 leinfelder
        }
669 6964 leinfelder
        logMetacat.info("done generating system metadata for given list");
670 6911 leinfelder
    }
671 6706 leinfelder
672 6707 leinfelder
	/**
673 6706 leinfelder
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
674
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
675
	 * evaluated.
676
	 *
677
	 * @param is The InputStream of bytes
678
	 *
679
	 * @return size The size in bytes of the input stream as a long
680
	 *
681
	 * @throws IOException
682
	 */
683 7322 leinfelder
	public static long sizeOfStream(InputStream is) throws IOException {
684 6706 leinfelder
685
		long size = 0;
686
		byte[] b = new byte[1024];
687
		int numread = is.read(b, 0, 1024);
688
		while (numread != -1) {
689
			size += numread;
690
			numread = is.read(b, 0, 1024);
691
		}
692
		return size;
693
694
	}
695 6962 leinfelder
696
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
697
698
		DocumentImpl doc = new DocumentImpl(docid, false);
699
		String filepath = null;
700
		String filename = null;
701
702
		// deal with data or metadata cases
703
		if (doc.getRootNodeID() == 0) {
704
			// this is a data file
705
			filepath = PropertyService.getProperty("application.datafilepath");
706
		} else {
707
			filepath = PropertyService.getProperty("application.documentfilepath");
708
		}
709
		// ensure it is a directory path
710
		if (!(filepath.endsWith("/"))) {
711
			filepath += "/";
712
		}
713
		filename = filepath + docid;
714
		File documentFile = new File(filename);
715
716
		return documentFile;
717
	}
718 6988 jones
719
	/**
720
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
721
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
722
	 * than or equal to zero, no policy needs to be set, so return null.
723
	 * @return ReplicationPolicy, or null if no replication policy is needed
724
	 */
725
    private static ReplicationPolicy getDefaultReplicationPolicy() {
726
        ReplicationPolicy rp = null;
727
        int numReplicas = -1;
728
        try {
729
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
730
        } catch (NumberFormatException e) {
731
            // The property is not a valid integer, so return a null policy
732
            return null;
733
        } catch (PropertyNotFoundException e) {
734
            // The property is not found, so return a null policy
735
            return null;
736
        }
737
738
        if (numReplicas > 0) {
739
            rp = new ReplicationPolicy();
740
            rp.setReplicationAllowed(true);
741
            rp.setNumberReplicas(numReplicas);
742
            try {
743
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
744
                if (preferredNodeList != null) {
745
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
746
                    if (pNodes != null && !pNodes.isEmpty()) {
747
                        rp.setPreferredMemberNodeList(pNodes);
748
                    }
749
                }
750
            } catch (PropertyNotFoundException e) {
751
                // No preferred list found in properties, so just ignore it; no action needed
752
            }
753
            try {
754
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
755
                if (blockedNodeList != null) {
756
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
757
                    if (bNodes != null && !bNodes.isEmpty()) {
758 7022 leinfelder
                        rp.setBlockedMemberNodeList(bNodes);
759 6988 jones
                    }
760
                }
761
            } catch (PropertyNotFoundException e) {
762
                // No blocked list found in properties, so just ignore it; no action needed
763
            }
764
        }
765
        return rp;
766
    }
767
768
    /**
769 7025 leinfelder
     * Extract a List of NodeReferences from a String listing the node identifiers where
770 6988 jones
     * each identifier is separated by whitespace, comma, or semicolon characters.
771
     * @param nodeString the string containing the list of nodes
772 7025 leinfelder
     * @return the List of NodeReference objects parsed from the input string
773 6988 jones
     */
774
    private static List<NodeReference> extractNodeReferences(String nodeString) {
775
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
776 7022 leinfelder
        String[] result = nodeString.split("[,;\\s]");
777 6988 jones
        for (String r : result) {
778 7025 leinfelder
        	if (r != null && r.length() > 0) {
779
	            NodeReference noderef = new NodeReference();
780
	            noderef.setValue(r);
781
	            nodeList.add(noderef);
782
	        }
783 6988 jones
        }
784
        return nodeList;
785
    }
786 6705 leinfelder
}