Project

General

Profile

1 6705 leinfelder
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author$'
9
 *     '$Date$'
10
 * '$Revision$'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27
28 6962 leinfelder
import java.io.File;
29 6705 leinfelder
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32 6852 leinfelder
import java.net.URL;
33 6873 leinfelder
import java.net.URLConnection;
34 6705 leinfelder
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36 6712 leinfelder
import java.util.ArrayList;
37 6727 leinfelder
import java.util.Collections;
38 6705 leinfelder
import java.util.Date;
39 6712 leinfelder
import java.util.HashMap;
40 6705 leinfelder
import java.util.Hashtable;
41 6712 leinfelder
import java.util.List;
42
import java.util.Map;
43 6709 leinfelder
import java.util.Vector;
44 6705 leinfelder
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47
48 6712 leinfelder
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50 6705 leinfelder
import org.apache.log4j.Logger;
51 7622 leinfelder
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52 6705 leinfelder
import org.dataone.client.ObjectFormatCache;
53 6960 leinfelder
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56 6712 leinfelder
import org.dataone.ore.ResourceMapFactory;
57 6705 leinfelder
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59 6721 leinfelder
import org.dataone.service.types.v1.AccessPolicy;
60 7214 cjones
import org.dataone.service.types.v1.AccessRule;
61 6705 leinfelder
import org.dataone.service.types.v1.Checksum;
62
import org.dataone.service.types.v1.Identifier;
63
import org.dataone.service.types.v1.NodeReference;
64
import org.dataone.service.types.v1.ObjectFormatIdentifier;
65 6988 jones
import org.dataone.service.types.v1.ReplicationPolicy;
66 6721 leinfelder
import org.dataone.service.types.v1.Session;
67 6705 leinfelder
import org.dataone.service.types.v1.Subject;
68
import org.dataone.service.types.v1.SystemMetadata;
69
import org.dataone.service.types.v1.util.ChecksumUtil;
70 6709 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
71 6712 leinfelder
import org.dspace.foresite.ResourceMap;
72 6705 leinfelder
import org.jibx.runtime.JiBXException;
73
import org.xml.sax.SAXException;
74
75 7087 cjones
import java.util.Calendar;
76 7084 leinfelder
77 6705 leinfelder
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79 6709 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
80 6962 leinfelder
import edu.ucsb.nceas.metacat.DocumentImpl;
81 6705 leinfelder
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84 6712 leinfelder
import edu.ucsb.nceas.metacat.MetaCatServlet;
85 6705 leinfelder
import edu.ucsb.nceas.metacat.MetacatHandler;
86 6708 leinfelder
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87 6705 leinfelder
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.properties.PropertyService;
90 6708 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
91 6721 leinfelder
import edu.ucsb.nceas.metacat.shared.AccessException;
92 6708 leinfelder
import edu.ucsb.nceas.metacat.shared.HandlerException;
93 6705 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
94
import edu.ucsb.nceas.utilities.ParseLSIDException;
95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
96
97
public class SystemMetadataFactory {
98 6706 leinfelder
99 7849 leinfelder
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
100 6707 leinfelder
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
101 6961 leinfelder
	/**
102
	 * use this flag if you want to update any existing system metadata values with generated content
103
	 */
104
	private static boolean updateExisting = true;
105 6712 leinfelder
106 6705 leinfelder
	/**
107 6706 leinfelder
	 * Creates a system metadata object for insertion into metacat
108
	 *
109
	 * @param localId
110
	 *            The local document identifier
111
	 * @param user
112
	 *            The user submitting the system metadata document
113
	 * @param groups
114
	 *            The groups the user belongs to
115
	 *
116
	 * @return sysMeta The system metadata object created
117 6708 leinfelder
	 * @throws SAXException
118
	 * @throws HandlerException
119
	 * @throws AccessControlException
120 6721 leinfelder
	 * @throws AccessException
121 6706 leinfelder
	 */
122 6852 leinfelder
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
123 6706 leinfelder
			throws McdbException, McdbDocNotFoundException, SQLException,
124
			IOException, AccessionNumberException, ClassNotFoundException,
125
			InsufficientKarmaException, ParseLSIDException,
126
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
127 6721 leinfelder
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
128 6707 leinfelder
129 6964 leinfelder
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
130 6705 leinfelder
131 6961 leinfelder
		// check for system metadata
132
		SystemMetadata sysMeta = null;
133
134 6706 leinfelder
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
135 6808 leinfelder
		int rev = Integer.valueOf(accNum.getRev());
136 6961 leinfelder
137
		// get/make the guid
138
		String guid = null;
139
		try {
140
			// get the guid if it exists
141
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
142
		} catch (McdbDocNotFoundException dnfe) {
143
			// otherwise create the mapping
144 6964 leinfelder
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
145 6961 leinfelder
			IdentifierManager.getInstance().createMapping(localId, localId);
146 6964 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
147 6961 leinfelder
		}
148
149
		// look up existing system metadata if it exists
150
		Identifier identifier = new Identifier();
151
		identifier.setValue(guid);
152
		try {
153 6964 leinfelder
			logMetacat.debug("Using hazelcast to get system metadata");
154 6961 leinfelder
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
155
			// TODO: if this is the case, we could return here -- what else do we gain?
156
			if (!updateExisting ) {
157
				return sysMeta;
158
			}
159
		} catch (Exception e) {
160 6964 leinfelder
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
161
162 6970 leinfelder
		}
163
164
		if (sysMeta == null) {
165 6961 leinfelder
			// create system metadata
166
			sysMeta = new SystemMetadata();
167
			sysMeta.setIdentifier(identifier);
168
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
169
			sysMeta.setArchived(false);
170
		}
171 6962 leinfelder
172 6706 leinfelder
		// get additional docinfo
173 6708 leinfelder
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
174 6706 leinfelder
		// set the default object format
175 6708 leinfelder
		String doctype = docInfo.get("doctype");
176 6706 leinfelder
		ObjectFormatIdentifier fmtid = null;
177
178
		// set the object format, fall back to defaults
179 6982 leinfelder
		if (doctype.trim().equals("BIN")) {
180
			// we don't know much about this file (yet)
181
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
182
		} else {
183
			try {
184
				// do we know the given format?
185
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
186
			} catch (NotFound nfe) {
187
				// format is not registered, use default
188 6964 leinfelder
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
189 6706 leinfelder
			}
190
		}
191
192
		sysMeta.setFormatId(fmtid);
193 6707 leinfelder
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
194 6706 leinfelder
195 6962 leinfelder
		// for retrieving the actual object
196
		InputStream inputStream = null;
197
		inputStream = MetacatHandler.read(localId);
198
199 6721 leinfelder
		// create the checksum
200 7222 leinfelder
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
201 6721 leinfelder
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
202 7084 leinfelder
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
203 6721 leinfelder
		sysMeta.setChecksum(checksum);
204
205 6962 leinfelder
		// set the size from file on disk, don't read bytes again
206
		File fileOnDisk = getFileOnDisk(localId);
207
		long fileSize = 0;
208
		if (fileOnDisk.exists()) {
209
			fileSize = fileOnDisk.length();
210
		}
211
		sysMeta.setSize(BigInteger.valueOf(fileSize));
212 6721 leinfelder
213
		// submitter
214
		Subject submitter = new Subject();
215
		submitter.setValue(docInfo.get("user_updated"));
216
		sysMeta.setSubmitter(submitter);
217
218
		// rights holder
219
		Subject owner = new Subject();
220
		owner.setValue(docInfo.get("user_owner"));
221
		sysMeta.setRightsHolder(owner);
222
223
		// dates
224
		String createdDateString = docInfo.get("date_created");
225
		String updatedDateString = docInfo.get("date_updated");
226
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
227
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);
228
		sysMeta.setDateUploaded(createdDate);
229 7084 leinfelder
		//sysMeta.setDateSysMetadataModified(updatedDate);
230
		// use current datetime
231
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
232 6721 leinfelder
233
		// set the revision history
234
		String docidWithoutRev = accNum.getDocid();
235
		Identifier obsoletedBy = null;
236
		Identifier obsoletes = null;
237
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
238 6727 leinfelder
		// ensure this ordering since processing depends on it
239
		Collections.sort(revisions);
240 6721 leinfelder
		for (int existingRev: revisions) {
241
			// use the docid+rev as the guid
242
			String existingPid = docidWithoutRev + "." + existingRev;
243 7001 leinfelder
			try {
244
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
245
			} catch (McdbDocNotFoundException mdfe) {
246
				// we'll be defaulting to the local id
247
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
248
			}
249 6721 leinfelder
			if (existingRev < rev) {
250
				// it's the old docid, until it's not
251
				obsoletes = new Identifier();
252
				obsoletes.setValue(existingPid);
253
			}
254
			if (existingRev > rev) {
255
				// it's the newer docid
256
				obsoletedBy = new Identifier();
257
				obsoletedBy.setValue(existingPid);
258
				// only want the version just after it
259
				break;
260
			}
261
		}
262 6725 leinfelder
		// set them on our object
263 6721 leinfelder
		sysMeta.setObsoletedBy(obsoletedBy);
264
		sysMeta.setObsoletes(obsoletes);
265
266 6725 leinfelder
		// update the system metadata for the object[s] we are revising
267
		if (obsoletedBy != null) {
268 6971 leinfelder
			SystemMetadata obsoletedBySysMeta = null;
269
			try {
270
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
271
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
272
			} catch (McdbDocNotFoundException e) {
273
				// ignore
274
			}
275 6725 leinfelder
			if (obsoletedBySysMeta != null) {
276
				obsoletedBySysMeta.setObsoletes(identifier);
277 7297 leinfelder
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
278 6725 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
279
			}
280
		}
281
		if (obsoletes != null) {
282 6971 leinfelder
			SystemMetadata obsoletesSysMeta = null;
283
			try {
284
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
285
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
286
			} catch (McdbDocNotFoundException e) {
287
				// ignore
288
			}
289 6725 leinfelder
			if (obsoletesSysMeta != null) {
290
				obsoletesSysMeta.setObsoletedBy(identifier);
291 6911 leinfelder
				obsoletesSysMeta.setArchived(true);
292 7297 leinfelder
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
293 6911 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
294 6725 leinfelder
			}
295
		}
296
297 6744 leinfelder
		// look up the access control policy we have in metacat
298
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
299 7214 cjones
		try {
300
        List<AccessRule> allowList = accessPolicy.getAllowList();
301 7215 cjones
        int listSize = allowList.size();
302 7214 cjones
        sysMeta.setAccessPolicy(accessPolicy);
303
304
    } catch (NullPointerException npe) {
305
        logMetacat.info("The allow list is empty, can't include an empty " +
306
            "access policy in the system metadata for " + guid);
307
308
    }
309 6721 leinfelder
310
		// authoritative node
311
		NodeReference nr = new NodeReference();
312 7030 cjones
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
313 6721 leinfelder
		sysMeta.setOriginMemberNode(nr);
314
		sysMeta.setAuthoritativeMemberNode(nr);
315
316 6988 jones
		// Set a default replication policy
317
        ReplicationPolicy rp = getDefaultReplicationPolicy();
318
        if (rp != null) {
319
            sysMeta.setReplicationPolicy(rp);
320
        }
321
322 6706 leinfelder
		// further parse EML documents to get data object format,
323
		// describes and describedBy information
324
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
325
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
326
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
327
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
328
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
329
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
330
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
331
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
332
333
			try {
334 6962 leinfelder
335
				// get it again to parse the document
336
				logMetacat.debug("Re-reading document inputStream");
337 6721 leinfelder
				inputStream = MetacatHandler.read(localId);
338 6960 leinfelder
339
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
340
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
341
342 6721 leinfelder
				// iterate through the data objects in the EML doc and add sysmeta
343 6707 leinfelder
				logMetacat.debug("In createSystemMetadata() the number of data "
344 6706 leinfelder
								+ "entities is: "
345 6960 leinfelder
								+ emlDocument.distributionMetadata);
346 6706 leinfelder
347 6712 leinfelder
				// for generating the ORE map
348
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
349
	            List<Identifier> dataIds = new ArrayList<Identifier>();
350
351 6706 leinfelder
				// iterate through data objects described by the EML
352 6960 leinfelder
	            if (emlDocument.distributionMetadata != null) {
353
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
354 6744 leinfelder
355 6960 leinfelder
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
356
				        String dataDocUrl = distMetadata.url;
357
				        String dataDocMimeType = distMetadata.mimeType;
358 6744 leinfelder
						// default to binary
359
						if (dataDocMimeType == null) {
360 6982 leinfelder
							dataDocMimeType = "application/octet-stream";
361 6721 leinfelder
						}
362 6852 leinfelder
363
						// process the data
364 6855 leinfelder
						boolean remoteData = false;
365 6852 leinfelder
						String dataDocLocalId = null;
366
						Identifier dataGuid = new Identifier();
367
368
						// handle ecogrid, or downloadable data
369
						String ecogridPrefix = "ecogrid://knb/";
370
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
371
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
372
						} else {
373
							// should we try downloading the remote data?
374
							if (downloadData) {
375
								InputStream dataObject = null;
376
								try {
377
									// download the data from the URL
378
									URL dataURL = new URL(dataDocUrl);
379 6873 leinfelder
									URLConnection dataConnection = dataURL.openConnection();
380
381
									// default is to download the data
382
									dataObject = dataConnection.getInputStream();
383
384
									String detectedContentType = dataConnection.getContentType();
385
									logMetacat.info("Detected content type: " + detectedContentType);
386
387
									if (detectedContentType != null) {
388
										// seems to be HTML from the remote location
389
										if (detectedContentType.contains("html")) {
390
											// if we are not expecting it, we skip it
391
											if (!dataDocMimeType.contains("html")) {
392
												// set to null so we don't download it
393
												dataObject = null;
394
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
395
											}
396
										}
397
398
									} else {
399
										// if we don't know what it is, should we skip it?
400
										dataObject = null;
401
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
402
									}
403
404 6852 leinfelder
								} catch (Exception e) {
405
									// error with the download
406
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
407
								}
408
409
								if (dataObject != null) {
410
									// create the local version of it
411
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
412
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
413
									dataGuid.setValue(dataDocLocalId);
414
415
									// save it locally
416
									Session session = new Session();
417
									session.setSubject(submitter);
418
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
419
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
420 6855 leinfelder
421
									remoteData = true;
422 6852 leinfelder
								}
423
							}
424
425
						}
426
427 6744 leinfelder
						logMetacat.debug("Data local ID: " + dataDocLocalId);
428
						logMetacat.debug("Data URL     : " + dataDocUrl);
429
						logMetacat.debug("Data mime    : " + dataDocMimeType);
430 6852 leinfelder
431 7112 leinfelder
						// check for valid docid.rev
432
						String dataDocid = null;
433
						int dataRev = 0;
434
						if (dataDocLocalId != null) {
435
							// look up the guid for the data
436
							try {
437
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
438
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
439
							} catch (Exception e) {
440
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
441
								dataDocLocalId = null;
442
							}
443
						}
444
445 6852 leinfelder
						// now we have a local id for the data
446
						if (dataDocLocalId != null) {
447 6744 leinfelder
448
							// check if data system metadata exists already
449
							SystemMetadata dataSysMeta = null;
450
							String dataGuidString = null;
451 6706 leinfelder
							try {
452 6744 leinfelder
								// look for the identifier
453 6707 leinfelder
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
454 6744 leinfelder
								// set it
455 6706 leinfelder
								dataGuid.setValue(dataGuidString);
456 6744 leinfelder
								// look up the system metadata
457 6706 leinfelder
								try {
458 6744 leinfelder
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
459
								} catch (Exception e) {
460
									// probably not in the system
461
									dataSysMeta = null;
462 6706 leinfelder
								}
463 6744 leinfelder
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
464
							} catch (McdbDocNotFoundException nf) {
465
								// we didn't find it
466
								dataSysMeta = null;
467
							}
468 6712 leinfelder
469 6744 leinfelder
							// we'll have to generate it
470
							if (dataSysMeta == null) {
471
								// System metadata for data doesn't exist yet, so create it
472 6964 leinfelder
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
473 6961 leinfelder
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
474
475
								// now look it up again
476
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
477
478
								// set the guid
479
								dataGuid.setValue(dataGuidString);
480
481
								// inherit access rules from metadata, if we don't have our own
482
								if (remoteData) {
483
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
484
									// TODO: use access rules defined in EML, per data file
485
								}
486 6744 leinfelder
487
							}
488 6721 leinfelder
489 6961 leinfelder
							// set object format for the data file
490 6964 leinfelder
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
491 6982 leinfelder
							ObjectFormatIdentifier fmt = null;
492 6961 leinfelder
							try {
493 6982 leinfelder
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
494 6961 leinfelder
							} catch (NotFound nfe) {
495
								logMetacat.debug("Couldn't find format identifier for: "
496
												+ dataDocMimeType
497
												+ ". Setting it to application/octet-stream.");
498 6982 leinfelder
								fmt = new ObjectFormatIdentifier();
499
								fmt.setValue("application/octet-stream");
500 6961 leinfelder
							}
501 6982 leinfelder
							dataSysMeta.setFormatId(fmt);
502
503 6961 leinfelder
							// update the values
504
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
505
506
							// include as part of the ORE package
507 6744 leinfelder
							dataIds.add(dataGuid);
508
509
						} // end if (EML package)
510
511
					} // end for (data entities)
512
513
	            } // data entities not null
514
515 6712 leinfelder
				// ORE map
516 6713 leinfelder
				if (includeORE) {
517 6800 leinfelder
					// can we generate them?
518 6713 leinfelder
			        if (!dataIds.isEmpty()) {
519 6800 leinfelder
			        	// it doesn't exist in the system?
520
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
521
522
				            // generate the ORE map for this datapackage
523
				            Identifier resourceMapId = new Identifier();
524 7001 leinfelder
				            // use the local id, not the guid in case we have DOIs for them already
525 7849 leinfelder
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
526 6800 leinfelder
				            idMap.put(sysMeta.getIdentifier(), dataIds);
527
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
528
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
529
				            // copy most of the same system metadata as the packaging metadata
530
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
531
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
532
				            resourceMapSysMeta.setIdentifier(resourceMapId);
533 7222 leinfelder
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
534 6800 leinfelder
							resourceMapSysMeta.setChecksum(oreChecksum);
535
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
536
							resourceMapSysMeta.setFormatId(formatId);
537
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
538
539
							// set the revision graph
540
							resourceMapSysMeta.setObsoletes(null);
541
							resourceMapSysMeta.setObsoletedBy(null);
542
							// look up the resource map that this one obsoletes
543
							if (sysMeta.getObsoletes() != null) {
544 7273 leinfelder
								// use the localId in case we have a DOI
545
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
546 6800 leinfelder
								Identifier resourceMapObsoletes = new Identifier();
547 7849 leinfelder
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
548 6800 leinfelder
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
549
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
550
								if (resourceMapObsoletesSystemMetadata != null) {
551
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
552 7278 leinfelder
									resourceMapObsoletesSystemMetadata.setArchived(true);
553 6800 leinfelder
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
554
								}
555
							}
556
							// look up the resource map that this one is obsoletedBy
557
							if (sysMeta.getObsoletedBy() != null) {
558 7273 leinfelder
								// use the localId in case we have a DOI
559
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
560 6800 leinfelder
								Identifier resourceMapObsoletedBy = new Identifier();
561 7849 leinfelder
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
562 6800 leinfelder
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
563 7278 leinfelder
								resourceMapSysMeta.setArchived(true);
564 6800 leinfelder
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
565
								if (resourceMapObsoletedBySystemMetadata != null) {
566
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
567
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
568
								}
569
							}
570
571 6907 leinfelder
							// save it locally, if it doesn't already exist
572
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
573
								Session session = new Session();
574
								session.setSubject(submitter);
575
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
576
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
577
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
578
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
579
							}
580 6800 leinfelder
			        	}
581 6713 leinfelder
			        }
582
				}
583 6706 leinfelder
584
			} catch (ParserConfigurationException pce) {
585 6707 leinfelder
				logMetacat.debug("There was a problem parsing the EML document. "
586 6706 leinfelder
								+ "The error message was: " + pce.getMessage());
587
588
			} catch (SAXException saxe) {
589 6707 leinfelder
				logMetacat.debug("There was a problem traversing the EML document. "
590 6706 leinfelder
								+ "The error message was: " + saxe.getMessage());
591
592
			} catch (XPathExpressionException xpee) {
593 6707 leinfelder
				logMetacat.debug("There was a problem searching the EML document. "
594 6706 leinfelder
								+ "The error message was: " + xpee.getMessage());
595
			} catch (Exception e) {
596 6707 leinfelder
				logMetacat.debug("There was a problem creating System Metadata. "
597 6706 leinfelder
								+ "The error message was: " + e.getMessage());
598 6721 leinfelder
				e.printStackTrace();
599 6706 leinfelder
			} // end try()
600
601
		} // end if()
602
603
		return sysMeta;
604
	}
605 6988 jones
606
    /**
607 6911 leinfelder
     * Generate SystemMetadata for any object in the object store that does
608
     * not already have it.  SystemMetadata documents themselves, are, of course,
609
     * exempt.  This is a utility method for migration of existing object
610
     * stores to DataONE where SystemMetadata is required for all objects.
611
     * @param idList
612
     * @param includeOre
613
     * @param downloadData
614
     * @throws PropertyNotFoundException
615
     * @throws NoSuchAlgorithmException
616
     * @throws AccessionNumberException
617
     * @throws SQLException
618 6964 leinfelder
	 * @throws SAXException
619
	 * @throws HandlerException
620
	 * @throws JiBXException
621
	 * @throws BaseException
622
	 * @throws ParseLSIDException
623
	 * @throws InsufficientKarmaException
624
	 * @throws ClassNotFoundException
625
	 * @throws IOException
626
	 * @throws McdbException
627
	 * @throws AccessException
628
	 * @throws AccessControlException
629 6911 leinfelder
     */
630
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData)
631 6964 leinfelder
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException
632 6911 leinfelder
    {
633
634
        for (String localId : idList) {
635 6998 leinfelder
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
636 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
637
638 6998 leinfelder
            SystemMetadata sm = null;
639
640
            //generate required system metadata fields from the document
641 7123 leinfelder
            try {
642
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
643
            } catch (Exception e) {
644
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
645
				continue;
646
			}
647
648 6998 leinfelder
            //insert the systemmetadata object or just update it as needed
649 7188 leinfelder
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
650
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
651
652 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
653
654 6911 leinfelder
        }
655 6964 leinfelder
        logMetacat.info("done generating system metadata for given list");
656 6911 leinfelder
    }
657
658
	/**
659 6800 leinfelder
	 * Determines if we already have registered an ORE map for this package
660
	 * @param guid of the EML/packaging object
661
	 * @return true if there is an ORE map for the given package
662
	 */
663
	private static boolean oreExistsFor(Identifier guid) {
664
		// TODO: implement call to CN.search()
665
		return false;
666
	}
667 6706 leinfelder
668 6707 leinfelder
	/**
669 6706 leinfelder
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
670
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
671
	 * evaluated.
672
	 *
673
	 * @param is The InputStream of bytes
674
	 *
675
	 * @return size The size in bytes of the input stream as a long
676
	 *
677
	 * @throws IOException
678
	 */
679 7322 leinfelder
	public static long sizeOfStream(InputStream is) throws IOException {
680 6706 leinfelder
681
		long size = 0;
682
		byte[] b = new byte[1024];
683
		int numread = is.read(b, 0, 1024);
684
		while (numread != -1) {
685
			size += numread;
686
			numread = is.read(b, 0, 1024);
687
		}
688
		return size;
689
690
	}
691 6962 leinfelder
692
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
693
694
		DocumentImpl doc = new DocumentImpl(docid, false);
695
		String filepath = null;
696
		String filename = null;
697
698
		// deal with data or metadata cases
699
		if (doc.getRootNodeID() == 0) {
700
			// this is a data file
701
			filepath = PropertyService.getProperty("application.datafilepath");
702
		} else {
703
			filepath = PropertyService.getProperty("application.documentfilepath");
704
		}
705
		// ensure it is a directory path
706
		if (!(filepath.endsWith("/"))) {
707
			filepath += "/";
708
		}
709
		filename = filepath + docid;
710
		File documentFile = new File(filename);
711
712
		return documentFile;
713
	}
714 6988 jones
715
	/**
716
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
717
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
718
	 * than or equal to zero, no policy needs to be set, so return null.
719
	 * @return ReplicationPolicy, or null if no replication policy is needed
720
	 */
721
    private static ReplicationPolicy getDefaultReplicationPolicy() {
722
        ReplicationPolicy rp = null;
723
        int numReplicas = -1;
724
        try {
725
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
726
        } catch (NumberFormatException e) {
727
            // The property is not a valid integer, so return a null policy
728
            return null;
729
        } catch (PropertyNotFoundException e) {
730
            // The property is not found, so return a null policy
731
            return null;
732
        }
733
734
        if (numReplicas > 0) {
735
            rp = new ReplicationPolicy();
736
            rp.setReplicationAllowed(true);
737
            rp.setNumberReplicas(numReplicas);
738
            try {
739
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
740
                if (preferredNodeList != null) {
741
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
742
                    if (pNodes != null && !pNodes.isEmpty()) {
743
                        rp.setPreferredMemberNodeList(pNodes);
744
                    }
745
                }
746
            } catch (PropertyNotFoundException e) {
747
                // No preferred list found in properties, so just ignore it; no action needed
748
            }
749
            try {
750
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
751
                if (blockedNodeList != null) {
752
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
753
                    if (bNodes != null && !bNodes.isEmpty()) {
754 7022 leinfelder
                        rp.setBlockedMemberNodeList(bNodes);
755 6988 jones
                    }
756
                }
757
            } catch (PropertyNotFoundException e) {
758
                // No blocked list found in properties, so just ignore it; no action needed
759
            }
760
        }
761
        return rp;
762
    }
763
764
    /**
765 7025 leinfelder
     * Extract a List of NodeReferences from a String listing the node identifiers where
766 6988 jones
     * each identifier is separated by whitespace, comma, or semicolon characters.
767
     * @param nodeString the string containing the list of nodes
768 7025 leinfelder
     * @return the List of NodeReference objects parsed from the input string
769 6988 jones
     */
770
    private static List<NodeReference> extractNodeReferences(String nodeString) {
771
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
772 7022 leinfelder
        String[] result = nodeString.split("[,;\\s]");
773 6988 jones
        for (String r : result) {
774 7025 leinfelder
        	if (r != null && r.length() > 0) {
775
	            NodeReference noderef = new NodeReference();
776
	            noderef.setValue(r);
777
	            nodeList.add(noderef);
778
	        }
779 6988 jones
        }
780
        return nodeList;
781
    }
782 6705 leinfelder
}