Project

General

Profile

1 6705 leinfelder
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author$'
9
 *     '$Date$'
10
 * '$Revision$'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27
28 6962 leinfelder
import java.io.File;
29 6705 leinfelder
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32 6852 leinfelder
import java.net.URL;
33 6873 leinfelder
import java.net.URLConnection;
34 6705 leinfelder
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36 6712 leinfelder
import java.util.ArrayList;
37 6727 leinfelder
import java.util.Collections;
38 6705 leinfelder
import java.util.Date;
39 6712 leinfelder
import java.util.HashMap;
40 6705 leinfelder
import java.util.Hashtable;
41 6712 leinfelder
import java.util.List;
42
import java.util.Map;
43 6709 leinfelder
import java.util.Vector;
44 6705 leinfelder
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47
48 6712 leinfelder
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50 6705 leinfelder
import org.apache.log4j.Logger;
51 6721 leinfelder
import org.apache.wicket.protocol.http.MockHttpServletRequest;
52 6705 leinfelder
import org.dataone.client.ObjectFormatCache;
53 6960 leinfelder
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56 6712 leinfelder
import org.dataone.ore.ResourceMapFactory;
57 6705 leinfelder
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59 6721 leinfelder
import org.dataone.service.types.v1.AccessPolicy;
60 6705 leinfelder
import org.dataone.service.types.v1.Checksum;
61
import org.dataone.service.types.v1.Identifier;
62
import org.dataone.service.types.v1.NodeReference;
63
import org.dataone.service.types.v1.ObjectFormatIdentifier;
64 6988 jones
import org.dataone.service.types.v1.ReplicationPolicy;
65 6721 leinfelder
import org.dataone.service.types.v1.Session;
66 6705 leinfelder
import org.dataone.service.types.v1.Subject;
67
import org.dataone.service.types.v1.SystemMetadata;
68
import org.dataone.service.types.v1.util.ChecksumUtil;
69 6709 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
70 6712 leinfelder
import org.dspace.foresite.ResourceMap;
71 6705 leinfelder
import org.jibx.runtime.JiBXException;
72
import org.xml.sax.SAXException;
73
74 7087 cjones
import java.util.Calendar;
75 7084 leinfelder
76 6705 leinfelder
import edu.ucsb.nceas.metacat.AccessionNumber;
77
import edu.ucsb.nceas.metacat.AccessionNumberException;
78 6709 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
79 6962 leinfelder
import edu.ucsb.nceas.metacat.DocumentImpl;
80 6705 leinfelder
import edu.ucsb.nceas.metacat.IdentifierManager;
81
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
82
import edu.ucsb.nceas.metacat.McdbException;
83 6712 leinfelder
import edu.ucsb.nceas.metacat.MetaCatServlet;
84 6705 leinfelder
import edu.ucsb.nceas.metacat.MetacatHandler;
85 6708 leinfelder
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
86 6705 leinfelder
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
87
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
88
import edu.ucsb.nceas.metacat.properties.PropertyService;
89 6708 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
90 6721 leinfelder
import edu.ucsb.nceas.metacat.shared.AccessException;
91 6708 leinfelder
import edu.ucsb.nceas.metacat.shared.HandlerException;
92 6705 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
93
import edu.ucsb.nceas.utilities.ParseLSIDException;
94
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
95
96
public class SystemMetadataFactory {
97 6706 leinfelder
98 6800 leinfelder
	private static final String resourceMapPrefix = "resourceMap_";
99 6707 leinfelder
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
100 6961 leinfelder
	/**
101
	 * use this flag if you want to update any existing system metadata values with generated content
102
	 */
103
	private static boolean updateExisting = true;
104 6712 leinfelder
105 6705 leinfelder
	/**
106 6706 leinfelder
	 * Creates a system metadata object for insertion into metacat
107
	 *
108
	 * @param localId
109
	 *            The local document identifier
110
	 * @param user
111
	 *            The user submitting the system metadata document
112
	 * @param groups
113
	 *            The groups the user belongs to
114
	 *
115
	 * @return sysMeta The system metadata object created
116 6708 leinfelder
	 * @throws SAXException
117
	 * @throws HandlerException
118
	 * @throws AccessControlException
119 6721 leinfelder
	 * @throws AccessException
120 6706 leinfelder
	 */
121 6852 leinfelder
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
122 6706 leinfelder
			throws McdbException, McdbDocNotFoundException, SQLException,
123
			IOException, AccessionNumberException, ClassNotFoundException,
124
			InsufficientKarmaException, ParseLSIDException,
125
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
126 6721 leinfelder
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
127 6707 leinfelder
128 6964 leinfelder
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
129 6705 leinfelder
130 6961 leinfelder
		// check for system metadata
131
		SystemMetadata sysMeta = null;
132
133 6706 leinfelder
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
134 6808 leinfelder
		int rev = Integer.valueOf(accNum.getRev());
135 6961 leinfelder
136
		// get/make the guid
137
		String guid = null;
138
		try {
139
			// get the guid if it exists
140
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
141
		} catch (McdbDocNotFoundException dnfe) {
142
			// otherwise create the mapping
143 6964 leinfelder
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
144 6961 leinfelder
			IdentifierManager.getInstance().createMapping(localId, localId);
145 6964 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
146 6961 leinfelder
		}
147
148
		// look up existing system metadata if it exists
149
		Identifier identifier = new Identifier();
150
		identifier.setValue(guid);
151
		try {
152 6964 leinfelder
			logMetacat.debug("Using hazelcast to get system metadata");
153 6961 leinfelder
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
154
			// TODO: if this is the case, we could return here -- what else do we gain?
155
			if (!updateExisting ) {
156
				return sysMeta;
157
			}
158
		} catch (Exception e) {
159 6964 leinfelder
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
160
161 6970 leinfelder
		}
162
163
		if (sysMeta == null) {
164 6961 leinfelder
			// create system metadata
165
			sysMeta = new SystemMetadata();
166
			sysMeta.setIdentifier(identifier);
167
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
168
			sysMeta.setArchived(false);
169
		}
170 6962 leinfelder
171 6706 leinfelder
		// get additional docinfo
172 6708 leinfelder
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
173 6706 leinfelder
		// set the default object format
174 6708 leinfelder
		String doctype = docInfo.get("doctype");
175 6706 leinfelder
		ObjectFormatIdentifier fmtid = null;
176
177
		// set the object format, fall back to defaults
178 6982 leinfelder
		if (doctype.trim().equals("BIN")) {
179
			// we don't know much about this file (yet)
180
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
181
		} else {
182
			try {
183
				// do we know the given format?
184
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
185
			} catch (NotFound nfe) {
186
				// format is not registered, use default
187 6964 leinfelder
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
188 6706 leinfelder
			}
189
		}
190
191
		sysMeta.setFormatId(fmtid);
192 6707 leinfelder
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
193 6706 leinfelder
194 6962 leinfelder
		// for retrieving the actual object
195
		InputStream inputStream = null;
196
		inputStream = MetacatHandler.read(localId);
197
198 6721 leinfelder
		// create the checksum
199
		String algorithm = "MD5";
200
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
201 7084 leinfelder
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
202 6721 leinfelder
		sysMeta.setChecksum(checksum);
203
204 6962 leinfelder
		// set the size from file on disk, don't read bytes again
205
		File fileOnDisk = getFileOnDisk(localId);
206
		long fileSize = 0;
207
		if (fileOnDisk.exists()) {
208
			fileSize = fileOnDisk.length();
209
		}
210
		sysMeta.setSize(BigInteger.valueOf(fileSize));
211 6721 leinfelder
212
		// submitter
213
		Subject submitter = new Subject();
214
		submitter.setValue(docInfo.get("user_updated"));
215
		sysMeta.setSubmitter(submitter);
216
217
		// rights holder
218
		Subject owner = new Subject();
219
		owner.setValue(docInfo.get("user_owner"));
220
		sysMeta.setRightsHolder(owner);
221
222
		// dates
223
		String createdDateString = docInfo.get("date_created");
224
		String updatedDateString = docInfo.get("date_updated");
225
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
226
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);
227
		sysMeta.setDateUploaded(createdDate);
228 7084 leinfelder
		//sysMeta.setDateSysMetadataModified(updatedDate);
229
		// use current datetime
230
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
231 6721 leinfelder
232
		// set the revision history
233
		String docidWithoutRev = accNum.getDocid();
234
		Identifier obsoletedBy = null;
235
		Identifier obsoletes = null;
236
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
237 6727 leinfelder
		// ensure this ordering since processing depends on it
238
		Collections.sort(revisions);
239 6721 leinfelder
		for (int existingRev: revisions) {
240
			// use the docid+rev as the guid
241
			String existingPid = docidWithoutRev + "." + existingRev;
242 7001 leinfelder
			try {
243
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
244
			} catch (McdbDocNotFoundException mdfe) {
245
				// we'll be defaulting to the local id
246
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
247
			}
248 6721 leinfelder
			if (existingRev < rev) {
249
				// it's the old docid, until it's not
250
				obsoletes = new Identifier();
251
				obsoletes.setValue(existingPid);
252
			}
253
			if (existingRev > rev) {
254
				// it's the newer docid
255
				obsoletedBy = new Identifier();
256
				obsoletedBy.setValue(existingPid);
257
				// only want the version just after it
258
				break;
259
			}
260
		}
261 6725 leinfelder
		// set them on our object
262 6721 leinfelder
		sysMeta.setObsoletedBy(obsoletedBy);
263
		sysMeta.setObsoletes(obsoletes);
264
265 6725 leinfelder
		// update the system metadata for the object[s] we are revising
266
		if (obsoletedBy != null) {
267 6971 leinfelder
			SystemMetadata obsoletedBySysMeta = null;
268
			try {
269
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
270
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
271
			} catch (McdbDocNotFoundException e) {
272
				// ignore
273
			}
274 6725 leinfelder
			if (obsoletedBySysMeta != null) {
275
				obsoletedBySysMeta.setObsoletes(identifier);
276
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
277
			}
278
		}
279
		if (obsoletes != null) {
280 6971 leinfelder
			SystemMetadata obsoletesSysMeta = null;
281
			try {
282
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
283
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
284
			} catch (McdbDocNotFoundException e) {
285
				// ignore
286
			}
287 6725 leinfelder
			if (obsoletesSysMeta != null) {
288
				obsoletesSysMeta.setObsoletedBy(identifier);
289 6911 leinfelder
				obsoletesSysMeta.setArchived(true);
290
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
291 6725 leinfelder
			}
292
		}
293
294 6744 leinfelder
		// look up the access control policy we have in metacat
295
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
296 6721 leinfelder
		sysMeta.setAccessPolicy(accessPolicy);
297
298
		// authoritative node
299
		NodeReference nr = new NodeReference();
300 7030 cjones
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
301 6721 leinfelder
		sysMeta.setOriginMemberNode(nr);
302
		sysMeta.setAuthoritativeMemberNode(nr);
303
304 6988 jones
		// Set a default replication policy
305
        ReplicationPolicy rp = getDefaultReplicationPolicy();
306
        if (rp != null) {
307
            sysMeta.setReplicationPolicy(rp);
308
        }
309
310 6706 leinfelder
		// further parse EML documents to get data object format,
311
		// describes and describedBy information
312
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
313
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
314
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
315
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
316
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
317
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
318
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
319
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
320
321
			try {
322 6962 leinfelder
323
				// get it again to parse the document
324
				logMetacat.debug("Re-reading document inputStream");
325 6721 leinfelder
				inputStream = MetacatHandler.read(localId);
326 6960 leinfelder
327
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
328
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
329
330 6721 leinfelder
				// iterate through the data objects in the EML doc and add sysmeta
331 6707 leinfelder
				logMetacat.debug("In createSystemMetadata() the number of data "
332 6706 leinfelder
								+ "entities is: "
333 6960 leinfelder
								+ emlDocument.distributionMetadata);
334 6706 leinfelder
335 6712 leinfelder
				// for generating the ORE map
336
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
337
	            List<Identifier> dataIds = new ArrayList<Identifier>();
338
339 6706 leinfelder
				// iterate through data objects described by the EML
340 6960 leinfelder
	            if (emlDocument.distributionMetadata != null) {
341
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
342 6744 leinfelder
343 6960 leinfelder
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
344
				        String dataDocUrl = distMetadata.url;
345
				        String dataDocMimeType = distMetadata.mimeType;
346 6744 leinfelder
						// default to binary
347
						if (dataDocMimeType == null) {
348 6982 leinfelder
							dataDocMimeType = "application/octet-stream";
349 6721 leinfelder
						}
350 6852 leinfelder
351
						// process the data
352 6855 leinfelder
						boolean remoteData = false;
353 6852 leinfelder
						String dataDocLocalId = null;
354
						Identifier dataGuid = new Identifier();
355
356
						// handle ecogrid, or downloadable data
357
						String ecogridPrefix = "ecogrid://knb/";
358
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
359
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
360
						} else {
361
							// should we try downloading the remote data?
362
							if (downloadData) {
363
								InputStream dataObject = null;
364
								try {
365
									// download the data from the URL
366
									URL dataURL = new URL(dataDocUrl);
367 6873 leinfelder
									URLConnection dataConnection = dataURL.openConnection();
368
369
									// default is to download the data
370
									dataObject = dataConnection.getInputStream();
371
372
									String detectedContentType = dataConnection.getContentType();
373
									logMetacat.info("Detected content type: " + detectedContentType);
374
375
									if (detectedContentType != null) {
376
										// seems to be HTML from the remote location
377
										if (detectedContentType.contains("html")) {
378
											// if we are not expecting it, we skip it
379
											if (!dataDocMimeType.contains("html")) {
380
												// set to null so we don't download it
381
												dataObject = null;
382
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
383
											}
384
										}
385
386
									} else {
387
										// if we don't know what it is, should we skip it?
388
										dataObject = null;
389
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
390
									}
391
392 6852 leinfelder
								} catch (Exception e) {
393
									// error with the download
394
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
395
								}
396
397
								if (dataObject != null) {
398
									// create the local version of it
399
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
400
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
401
									dataGuid.setValue(dataDocLocalId);
402
403
									// save it locally
404
									Session session = new Session();
405
									session.setSubject(submitter);
406
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
407
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
408 6855 leinfelder
409
									remoteData = true;
410 6852 leinfelder
								}
411
							}
412
413
						}
414
415 6744 leinfelder
						logMetacat.debug("Data local ID: " + dataDocLocalId);
416
						logMetacat.debug("Data URL     : " + dataDocUrl);
417
						logMetacat.debug("Data mime    : " + dataDocMimeType);
418 6852 leinfelder
419 7112 leinfelder
						// check for valid docid.rev
420
						String dataDocid = null;
421
						int dataRev = 0;
422
						if (dataDocLocalId != null) {
423
							// look up the guid for the data
424
							try {
425
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
426
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
427
							} catch (Exception e) {
428
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
429
								dataDocLocalId = null;
430
							}
431
						}
432
433 6852 leinfelder
						// now we have a local id for the data
434
						if (dataDocLocalId != null) {
435 6744 leinfelder
436
							// check if data system metadata exists already
437
							SystemMetadata dataSysMeta = null;
438
							String dataGuidString = null;
439 6706 leinfelder
							try {
440 6744 leinfelder
								// look for the identifier
441 6707 leinfelder
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
442 6744 leinfelder
								// set it
443 6706 leinfelder
								dataGuid.setValue(dataGuidString);
444 6744 leinfelder
								// look up the system metadata
445 6706 leinfelder
								try {
446 6744 leinfelder
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
447
								} catch (Exception e) {
448
									// probably not in the system
449
									dataSysMeta = null;
450 6706 leinfelder
								}
451 6744 leinfelder
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
452
							} catch (McdbDocNotFoundException nf) {
453
								// we didn't find it
454
								dataSysMeta = null;
455
							}
456 6712 leinfelder
457 6744 leinfelder
							// we'll have to generate it
458
							if (dataSysMeta == null) {
459
								// System metadata for data doesn't exist yet, so create it
460 6964 leinfelder
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
461 6961 leinfelder
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
462
463
								// now look it up again
464
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
465
466
								// set the guid
467
								dataGuid.setValue(dataGuidString);
468
469
								// inherit access rules from metadata, if we don't have our own
470
								if (remoteData) {
471
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
472
									// TODO: use access rules defined in EML, per data file
473
								}
474 6744 leinfelder
475
							}
476 6721 leinfelder
477 6961 leinfelder
							// set object format for the data file
478 6964 leinfelder
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
479 6982 leinfelder
							ObjectFormatIdentifier fmt = null;
480 6961 leinfelder
							try {
481 6982 leinfelder
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
482 6961 leinfelder
							} catch (NotFound nfe) {
483
								logMetacat.debug("Couldn't find format identifier for: "
484
												+ dataDocMimeType
485
												+ ". Setting it to application/octet-stream.");
486 6982 leinfelder
								fmt = new ObjectFormatIdentifier();
487
								fmt.setValue("application/octet-stream");
488 6961 leinfelder
							}
489 6982 leinfelder
							dataSysMeta.setFormatId(fmt);
490
491 6961 leinfelder
							// update the values
492
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
493
494
							// include as part of the ORE package
495 6744 leinfelder
							dataIds.add(dataGuid);
496
497
						} // end if (EML package)
498
499
					} // end for (data entities)
500
501
	            } // data entities not null
502
503 6712 leinfelder
				// ORE map
504 6713 leinfelder
				if (includeORE) {
505 6800 leinfelder
					// can we generate them?
506 6713 leinfelder
			        if (!dataIds.isEmpty()) {
507 6800 leinfelder
			        	// it doesn't exist in the system?
508
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
509
510
				            // generate the ORE map for this datapackage
511
				            Identifier resourceMapId = new Identifier();
512 7001 leinfelder
				            // use the local id, not the guid in case we have DOIs for them already
513
				            resourceMapId.setValue(resourceMapPrefix + localId);
514 6800 leinfelder
				            idMap.put(sysMeta.getIdentifier(), dataIds);
515
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
516
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
517
				            // copy most of the same system metadata as the packaging metadata
518
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
519
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
520
				            resourceMapSysMeta.setIdentifier(resourceMapId);
521
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
522
							resourceMapSysMeta.setChecksum(oreChecksum);
523
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
524
							resourceMapSysMeta.setFormatId(formatId);
525
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
526
527
							// set the revision graph
528
							resourceMapSysMeta.setObsoletes(null);
529
							resourceMapSysMeta.setObsoletedBy(null);
530
							// look up the resource map that this one obsoletes
531
							if (sysMeta.getObsoletes() != null) {
532
								Identifier resourceMapObsoletes = new Identifier();
533
								resourceMapObsoletes.setValue(resourceMapPrefix + sysMeta.getObsoletes().getValue());
534
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
535
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
536
								if (resourceMapObsoletesSystemMetadata != null) {
537
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
538
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
539
								}
540
							}
541
							// look up the resource map that this one is obsoletedBy
542
							if (sysMeta.getObsoletedBy() != null) {
543
								Identifier resourceMapObsoletedBy = new Identifier();
544
								resourceMapObsoletedBy.setValue(resourceMapPrefix + sysMeta.getObsoletedBy().getValue());
545
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
546
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
547
								if (resourceMapObsoletedBySystemMetadata != null) {
548
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
549
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
550
								}
551
							}
552
553 6907 leinfelder
							// save it locally, if it doesn't already exist
554
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
555
								Session session = new Session();
556
								session.setSubject(submitter);
557
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
558
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
559
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
560
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
561
							}
562 6800 leinfelder
			        	}
563 6713 leinfelder
			        }
564
				}
565 6706 leinfelder
566
			} catch (ParserConfigurationException pce) {
567 6707 leinfelder
				logMetacat.debug("There was a problem parsing the EML document. "
568 6706 leinfelder
								+ "The error message was: " + pce.getMessage());
569
570
			} catch (SAXException saxe) {
571 6707 leinfelder
				logMetacat.debug("There was a problem traversing the EML document. "
572 6706 leinfelder
								+ "The error message was: " + saxe.getMessage());
573
574
			} catch (XPathExpressionException xpee) {
575 6707 leinfelder
				logMetacat.debug("There was a problem searching the EML document. "
576 6706 leinfelder
								+ "The error message was: " + xpee.getMessage());
577
			} catch (Exception e) {
578 6707 leinfelder
				logMetacat.debug("There was a problem creating System Metadata. "
579 6706 leinfelder
								+ "The error message was: " + e.getMessage());
580 6721 leinfelder
				e.printStackTrace();
581 6706 leinfelder
			} // end try()
582
583
		} // end if()
584
585
		return sysMeta;
586
	}
587 6988 jones
588
    /**
589 6911 leinfelder
     * Generate SystemMetadata for any object in the object store that does
590
     * not already have it.  SystemMetadata documents themselves, are, of course,
591
     * exempt.  This is a utility method for migration of existing object
592
     * stores to DataONE where SystemMetadata is required for all objects.
593
     * @param idList
594
     * @param includeOre
595
     * @param downloadData
596
     * @throws PropertyNotFoundException
597
     * @throws NoSuchAlgorithmException
598
     * @throws AccessionNumberException
599
     * @throws SQLException
600 6964 leinfelder
	 * @throws SAXException
601
	 * @throws HandlerException
602
	 * @throws JiBXException
603
	 * @throws BaseException
604
	 * @throws ParseLSIDException
605
	 * @throws InsufficientKarmaException
606
	 * @throws ClassNotFoundException
607
	 * @throws IOException
608
	 * @throws McdbException
609
	 * @throws AccessException
610
	 * @throws AccessControlException
611 6911 leinfelder
     */
612
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData)
613 6964 leinfelder
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException
614 6911 leinfelder
    {
615
616
        for (String localId : idList) {
617 6998 leinfelder
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
618
            SystemMetadata sm = null;
619
620
            //generate required system metadata fields from the document
621 7123 leinfelder
            try {
622
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
623
            } catch (Exception e) {
624
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
625
				continue;
626
			}
627
628 6998 leinfelder
            //insert the systemmetadata object or just update it as needed
629
            boolean exists = IdentifierManager.getInstance().systemMetadataExists(sm.getIdentifier().getValue());
630
            if (!exists) {
631
            	IdentifierManager.getInstance().insertSystemMetadata(sm);
632
            	logMetacat.info("Generated SystemMetadata for " + localId);
633
            } else {
634
            	IdentifierManager.getInstance().updateSystemMetadata(sm);
635
            	logMetacat.info("Updated SystemMetadata for " + localId);
636
            }
637 6911 leinfelder
        }
638 6964 leinfelder
        logMetacat.info("done generating system metadata for given list");
639 6911 leinfelder
    }
640
641
	/**
642 6800 leinfelder
	 * Determines if we already have registered an ORE map for this package
643
	 * @param guid of the EML/packaging object
644
	 * @return true if there is an ORE map for the given package
645
	 */
646
	private static boolean oreExistsFor(Identifier guid) {
647
		// TODO: implement call to CN.search()
648
		return false;
649
	}
650 6706 leinfelder
651 6707 leinfelder
	/**
652 6706 leinfelder
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
653
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
654
	 * evaluated.
655
	 *
656
	 * @param is The InputStream of bytes
657
	 *
658
	 * @return size The size in bytes of the input stream as a long
659
	 *
660
	 * @throws IOException
661
	 */
662
	private static long sizeOfStream(InputStream is) throws IOException {
663
664
		long size = 0;
665
		byte[] b = new byte[1024];
666
		int numread = is.read(b, 0, 1024);
667
		while (numread != -1) {
668
			size += numread;
669
			numread = is.read(b, 0, 1024);
670
		}
671
		return size;
672
673
	}
674 6962 leinfelder
675
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
676
677
		DocumentImpl doc = new DocumentImpl(docid, false);
678
		String filepath = null;
679
		String filename = null;
680
681
		// deal with data or metadata cases
682
		if (doc.getRootNodeID() == 0) {
683
			// this is a data file
684
			filepath = PropertyService.getProperty("application.datafilepath");
685
		} else {
686
			filepath = PropertyService.getProperty("application.documentfilepath");
687
		}
688
		// ensure it is a directory path
689
		if (!(filepath.endsWith("/"))) {
690
			filepath += "/";
691
		}
692
		filename = filepath + docid;
693
		File documentFile = new File(filename);
694
695
		return documentFile;
696
	}
697 6988 jones
698
	/**
699
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
700
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
701
	 * than or equal to zero, no policy needs to be set, so return null.
702
	 * @return ReplicationPolicy, or null if no replication policy is needed
703
	 */
704
    private static ReplicationPolicy getDefaultReplicationPolicy() {
705
        ReplicationPolicy rp = null;
706
        int numReplicas = -1;
707
        try {
708
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
709
        } catch (NumberFormatException e) {
710
            // The property is not a valid integer, so return a null policy
711
            return null;
712
        } catch (PropertyNotFoundException e) {
713
            // The property is not found, so return a null policy
714
            return null;
715
        }
716
717
        if (numReplicas > 0) {
718
            rp = new ReplicationPolicy();
719
            rp.setReplicationAllowed(true);
720
            rp.setNumberReplicas(numReplicas);
721
            try {
722
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
723
                if (preferredNodeList != null) {
724
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
725
                    if (pNodes != null && !pNodes.isEmpty()) {
726
                        rp.setPreferredMemberNodeList(pNodes);
727
                    }
728
                }
729
            } catch (PropertyNotFoundException e) {
730
                // No preferred list found in properties, so just ignore it; no action needed
731
            }
732
            try {
733
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
734
                if (blockedNodeList != null) {
735
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
736
                    if (bNodes != null && !bNodes.isEmpty()) {
737 7022 leinfelder
                        rp.setBlockedMemberNodeList(bNodes);
738 6988 jones
                    }
739
                }
740
            } catch (PropertyNotFoundException e) {
741
                // No blocked list found in properties, so just ignore it; no action needed
742
            }
743
        }
744
        return rp;
745
    }
746
747
    /**
748 7025 leinfelder
     * Extract a List of NodeReferences from a String listing the node identifiers where
749 6988 jones
     * each identifier is separated by whitespace, comma, or semicolon characters.
750
     * @param nodeString the string containing the list of nodes
751 7025 leinfelder
     * @return the List of NodeReference objects parsed from the input string
752 6988 jones
     */
753
    private static List<NodeReference> extractNodeReferences(String nodeString) {
754
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
755 7022 leinfelder
        String[] result = nodeString.split("[,;\\s]");
756 6988 jones
        for (String r : result) {
757 7025 leinfelder
        	if (r != null && r.length() > 0) {
758
	            NodeReference noderef = new NodeReference();
759
	            noderef.setValue(r);
760
	            nodeList.add(noderef);
761
	        }
762 6988 jones
        }
763
        return nodeList;
764
    }
765 6705 leinfelder
}