Project

General

Profile

1 6705 leinfelder
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author$'
9
 *     '$Date$'
10
 * '$Revision$'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27
28 6962 leinfelder
import java.io.File;
29 6705 leinfelder
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32 6852 leinfelder
import java.net.URL;
33 6873 leinfelder
import java.net.URLConnection;
34 6705 leinfelder
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36 6712 leinfelder
import java.util.ArrayList;
37 6727 leinfelder
import java.util.Collections;
38 6705 leinfelder
import java.util.Date;
39 6712 leinfelder
import java.util.HashMap;
40 6705 leinfelder
import java.util.Hashtable;
41 6712 leinfelder
import java.util.List;
42
import java.util.Map;
43 6709 leinfelder
import java.util.Vector;
44 6705 leinfelder
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47
48 6712 leinfelder
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50 6705 leinfelder
import org.apache.log4j.Logger;
51 7622 leinfelder
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52 8810 leinfelder
import org.dataone.client.v2.formats.ObjectFormatCache;
53 6960 leinfelder
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56 9833 tao
import org.dataone.exceptions.MarshallingException;
57 6712 leinfelder
import org.dataone.ore.ResourceMapFactory;
58 6705 leinfelder
import org.dataone.service.exceptions.BaseException;
59
import org.dataone.service.exceptions.NotFound;
60 6721 leinfelder
import org.dataone.service.types.v1.AccessPolicy;
61 7214 cjones
import org.dataone.service.types.v1.AccessRule;
62 6705 leinfelder
import org.dataone.service.types.v1.Checksum;
63
import org.dataone.service.types.v1.Identifier;
64
import org.dataone.service.types.v1.NodeReference;
65
import org.dataone.service.types.v1.ObjectFormatIdentifier;
66 6988 jones
import org.dataone.service.types.v1.ReplicationPolicy;
67 6721 leinfelder
import org.dataone.service.types.v1.Session;
68 6705 leinfelder
import org.dataone.service.types.v1.Subject;
69 8810 leinfelder
import org.dataone.service.types.v2.SystemMetadata;
70 6705 leinfelder
import org.dataone.service.types.v1.util.ChecksumUtil;
71 6709 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
72 6712 leinfelder
import org.dspace.foresite.ResourceMap;
73 6705 leinfelder
import org.xml.sax.SAXException;
74
75 7087 cjones
import java.util.Calendar;
76 7084 leinfelder
77 6705 leinfelder
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79 6709 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
80 6962 leinfelder
import edu.ucsb.nceas.metacat.DocumentImpl;
81 6705 leinfelder
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84 6712 leinfelder
import edu.ucsb.nceas.metacat.MetaCatServlet;
85 6705 leinfelder
import edu.ucsb.nceas.metacat.MetacatHandler;
86 6708 leinfelder
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87 6705 leinfelder
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89 8986 tao
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
90 6705 leinfelder
import edu.ucsb.nceas.metacat.properties.PropertyService;
91 6708 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
92 6721 leinfelder
import edu.ucsb.nceas.metacat.shared.AccessException;
93 6708 leinfelder
import edu.ucsb.nceas.metacat.shared.HandlerException;
94 6705 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
95
import edu.ucsb.nceas.utilities.ParseLSIDException;
96
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
97
98
public class SystemMetadataFactory {
99 6706 leinfelder
100 7849 leinfelder
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
101 6707 leinfelder
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
102 6961 leinfelder
	/**
103
	 * use this flag if you want to update any existing system metadata values with generated content
104
	 */
105
	private static boolean updateExisting = true;
106 6712 leinfelder
107 8986 tao
108
109 6705 leinfelder
	/**
110 8986 tao
	 * Create a system metadata object for insertion into metacat
111
	 * @param localId
112
	 * @param includeORE
113
	 * @param downloadData
114
	 * @return
115
	 * @throws McdbException
116
	 * @throws McdbDocNotFoundException
117
	 * @throws SQLException
118
	 * @throws IOException
119
	 * @throws AccessionNumberException
120
	 * @throws ClassNotFoundException
121
	 * @throws InsufficientKarmaException
122
	 * @throws ParseLSIDException
123
	 * @throws PropertyNotFoundException
124
	 * @throws BaseException
125
	 * @throws NoSuchAlgorithmException
126 9833 tao
	 * @throws MarshallingException
127 8986 tao
	 * @throws AccessControlException
128
	 * @throws HandlerException
129
	 * @throws SAXException
130
	 * @throws AccessException
131
	 */
132
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
133
            throws McdbException, McdbDocNotFoundException, SQLException,
134
            IOException, AccessionNumberException, ClassNotFoundException,
135
            InsufficientKarmaException, ParseLSIDException,
136
            PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
137 9833 tao
            MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
138 8986 tao
	        boolean indexDataFile = false;
139
	        return createSystemMetadata(indexDataFile, localId, includeORE, downloadData);
140
	}
141
	/**
142 6706 leinfelder
	 * Creates a system metadata object for insertion into metacat
143 8986 tao
	 * @param indexDataFile
144
	 *            Indicate if we need to index data file.
145 6706 leinfelder
	 *
146
	 * @param localId
147
	 *            The local document identifier
148
	 * @param user
149
	 *            The user submitting the system metadata document
150
	 * @param groups
151
	 *            The groups the user belongs to
152
	 *
153
	 * @return sysMeta The system metadata object created
154 6708 leinfelder
	 * @throws SAXException
155
	 * @throws HandlerException
156
	 * @throws AccessControlException
157 6721 leinfelder
	 * @throws AccessException
158 6706 leinfelder
	 */
159 8986 tao
	public static SystemMetadata createSystemMetadata(boolean indexDataFile, String localId, boolean includeORE, boolean downloadData)
160 6706 leinfelder
			throws McdbException, McdbDocNotFoundException, SQLException,
161
			IOException, AccessionNumberException, ClassNotFoundException,
162
			InsufficientKarmaException, ParseLSIDException,
163
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
164 9833 tao
			MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
165 6707 leinfelder
166 6964 leinfelder
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
167 6705 leinfelder
168 6961 leinfelder
		// check for system metadata
169
		SystemMetadata sysMeta = null;
170
171 6706 leinfelder
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
172 6808 leinfelder
		int rev = Integer.valueOf(accNum.getRev());
173 6961 leinfelder
174
		// get/make the guid
175
		String guid = null;
176
		try {
177
			// get the guid if it exists
178
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
179
		} catch (McdbDocNotFoundException dnfe) {
180
			// otherwise create the mapping
181 6964 leinfelder
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
182 6961 leinfelder
			IdentifierManager.getInstance().createMapping(localId, localId);
183 6964 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
184 6961 leinfelder
		}
185
186
		// look up existing system metadata if it exists
187
		Identifier identifier = new Identifier();
188
		identifier.setValue(guid);
189
		try {
190 6964 leinfelder
			logMetacat.debug("Using hazelcast to get system metadata");
191 6961 leinfelder
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
192
			// TODO: if this is the case, we could return here -- what else do we gain?
193
			if (!updateExisting ) {
194
				return sysMeta;
195
			}
196
		} catch (Exception e) {
197 6964 leinfelder
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
198
199 6970 leinfelder
		}
200
201
		if (sysMeta == null) {
202 6961 leinfelder
			// create system metadata
203
			sysMeta = new SystemMetadata();
204
			sysMeta.setIdentifier(identifier);
205
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
206
			sysMeta.setArchived(false);
207
		}
208 6962 leinfelder
209 6706 leinfelder
		// get additional docinfo
210 6708 leinfelder
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
211 6706 leinfelder
		// set the default object format
212 6708 leinfelder
		String doctype = docInfo.get("doctype");
213 6706 leinfelder
		ObjectFormatIdentifier fmtid = null;
214
215
		// set the object format, fall back to defaults
216 6982 leinfelder
		if (doctype.trim().equals("BIN")) {
217
			// we don't know much about this file (yet)
218
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
219 8028 leinfelder
		} else if (doctype.trim().equals("metadata")) {
220
			// special ESRI FGDC format
221
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
222 6982 leinfelder
		} else {
223
			try {
224
				// do we know the given format?
225
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
226
			} catch (NotFound nfe) {
227
				// format is not registered, use default
228 6964 leinfelder
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
229 6706 leinfelder
			}
230
		}
231
232
		sysMeta.setFormatId(fmtid);
233 6707 leinfelder
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
234 6706 leinfelder
235 6962 leinfelder
		// for retrieving the actual object
236
		InputStream inputStream = null;
237
		inputStream = MetacatHandler.read(localId);
238
239 6721 leinfelder
		// create the checksum
240 7222 leinfelder
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
241 6721 leinfelder
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
242 7084 leinfelder
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
243 6721 leinfelder
		sysMeta.setChecksum(checksum);
244
245 6962 leinfelder
		// set the size from file on disk, don't read bytes again
246
		File fileOnDisk = getFileOnDisk(localId);
247
		long fileSize = 0;
248
		if (fileOnDisk.exists()) {
249
			fileSize = fileOnDisk.length();
250
		}
251
		sysMeta.setSize(BigInteger.valueOf(fileSize));
252 6721 leinfelder
253
		// submitter
254
		Subject submitter = new Subject();
255
		submitter.setValue(docInfo.get("user_updated"));
256
		sysMeta.setSubmitter(submitter);
257
258
		// rights holder
259
		Subject owner = new Subject();
260
		owner.setValue(docInfo.get("user_owner"));
261
		sysMeta.setRightsHolder(owner);
262
263
		// dates
264
		String createdDateString = docInfo.get("date_created");
265
		String updatedDateString = docInfo.get("date_updated");
266
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
267
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);
268
		sysMeta.setDateUploaded(createdDate);
269 7084 leinfelder
		//sysMeta.setDateSysMetadataModified(updatedDate);
270
		// use current datetime
271
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
272 6721 leinfelder
273
		// set the revision history
274
		String docidWithoutRev = accNum.getDocid();
275
		Identifier obsoletedBy = null;
276
		Identifier obsoletes = null;
277
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
278 6727 leinfelder
		// ensure this ordering since processing depends on it
279
		Collections.sort(revisions);
280 6721 leinfelder
		for (int existingRev: revisions) {
281
			// use the docid+rev as the guid
282
			String existingPid = docidWithoutRev + "." + existingRev;
283 7001 leinfelder
			try {
284
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
285
			} catch (McdbDocNotFoundException mdfe) {
286
				// we'll be defaulting to the local id
287
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
288
			}
289 6721 leinfelder
			if (existingRev < rev) {
290
				// it's the old docid, until it's not
291
				obsoletes = new Identifier();
292
				obsoletes.setValue(existingPid);
293
			}
294
			if (existingRev > rev) {
295
				// it's the newer docid
296
				obsoletedBy = new Identifier();
297
				obsoletedBy.setValue(existingPid);
298
				// only want the version just after it
299
				break;
300
			}
301
		}
302 6725 leinfelder
		// set them on our object
303 6721 leinfelder
		sysMeta.setObsoletedBy(obsoletedBy);
304
		sysMeta.setObsoletes(obsoletes);
305
306 6725 leinfelder
		// update the system metadata for the object[s] we are revising
307
		if (obsoletedBy != null) {
308 6971 leinfelder
			SystemMetadata obsoletedBySysMeta = null;
309
			try {
310
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
311
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
312
			} catch (McdbDocNotFoundException e) {
313
				// ignore
314
			}
315 6725 leinfelder
			if (obsoletedBySysMeta != null) {
316
				obsoletedBySysMeta.setObsoletes(identifier);
317 7297 leinfelder
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
318 6725 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
319
			}
320
		}
321
		if (obsoletes != null) {
322 6971 leinfelder
			SystemMetadata obsoletesSysMeta = null;
323
			try {
324
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
325
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
326
			} catch (McdbDocNotFoundException e) {
327
				// ignore
328
			}
329 6725 leinfelder
			if (obsoletesSysMeta != null) {
330
				obsoletesSysMeta.setObsoletedBy(identifier);
331 8566 leinfelder
				// DO NOT set archived to true -- it will have unintended consequences if the CN sees this.
332
				//obsoletesSysMeta.setArchived(true);
333 7297 leinfelder
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
334 6911 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
335 6725 leinfelder
			}
336
		}
337
338 6744 leinfelder
		// look up the access control policy we have in metacat
339
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
340 7214 cjones
		try {
341
        List<AccessRule> allowList = accessPolicy.getAllowList();
342 7215 cjones
        int listSize = allowList.size();
343 7214 cjones
        sysMeta.setAccessPolicy(accessPolicy);
344
345
    } catch (NullPointerException npe) {
346
        logMetacat.info("The allow list is empty, can't include an empty " +
347
            "access policy in the system metadata for " + guid);
348
349
    }
350 6721 leinfelder
351
		// authoritative node
352
		NodeReference nr = new NodeReference();
353 7030 cjones
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
354 6721 leinfelder
		sysMeta.setOriginMemberNode(nr);
355
		sysMeta.setAuthoritativeMemberNode(nr);
356
357 6988 jones
		// Set a default replication policy
358
        ReplicationPolicy rp = getDefaultReplicationPolicy();
359
        if (rp != null) {
360
            sysMeta.setReplicationPolicy(rp);
361
        }
362
363 6706 leinfelder
		// further parse EML documents to get data object format,
364
		// describes and describedBy information
365
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
366
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
367
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
368
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
369
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
370
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
371
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
372
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
373
374
			try {
375 6962 leinfelder
376
				// get it again to parse the document
377
				logMetacat.debug("Re-reading document inputStream");
378 6721 leinfelder
				inputStream = MetacatHandler.read(localId);
379 6960 leinfelder
380
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
381
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
382
383 6721 leinfelder
				// iterate through the data objects in the EML doc and add sysmeta
384 6707 leinfelder
				logMetacat.debug("In createSystemMetadata() the number of data "
385 6706 leinfelder
								+ "entities is: "
386 6960 leinfelder
								+ emlDocument.distributionMetadata);
387 6706 leinfelder
388 6712 leinfelder
				// for generating the ORE map
389
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
390
	            List<Identifier> dataIds = new ArrayList<Identifier>();
391
392 6706 leinfelder
				// iterate through data objects described by the EML
393 6960 leinfelder
	            if (emlDocument.distributionMetadata != null) {
394
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
395 6744 leinfelder
396 6960 leinfelder
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
397
				        String dataDocUrl = distMetadata.url;
398
				        String dataDocMimeType = distMetadata.mimeType;
399 6744 leinfelder
						// default to binary
400
						if (dataDocMimeType == null) {
401 6982 leinfelder
							dataDocMimeType = "application/octet-stream";
402 6721 leinfelder
						}
403 6852 leinfelder
404
						// process the data
405 6855 leinfelder
						boolean remoteData = false;
406 6852 leinfelder
						String dataDocLocalId = null;
407
						Identifier dataGuid = new Identifier();
408
409
						// handle ecogrid, or downloadable data
410
						String ecogridPrefix = "ecogrid://knb/";
411
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
412
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
413
						} else {
414
							// should we try downloading the remote data?
415
							if (downloadData) {
416
								InputStream dataObject = null;
417
								try {
418
									// download the data from the URL
419
									URL dataURL = new URL(dataDocUrl);
420 6873 leinfelder
									URLConnection dataConnection = dataURL.openConnection();
421
422
									// default is to download the data
423
									dataObject = dataConnection.getInputStream();
424
425
									String detectedContentType = dataConnection.getContentType();
426
									logMetacat.info("Detected content type: " + detectedContentType);
427
428
									if (detectedContentType != null) {
429
										// seems to be HTML from the remote location
430
										if (detectedContentType.contains("html")) {
431
											// if we are not expecting it, we skip it
432
											if (!dataDocMimeType.contains("html")) {
433
												// set to null so we don't download it
434
												dataObject = null;
435
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
436
											}
437
										}
438
439
									} else {
440
										// if we don't know what it is, should we skip it?
441
										dataObject = null;
442
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
443
									}
444
445 6852 leinfelder
								} catch (Exception e) {
446
									// error with the download
447
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
448
								}
449
450
								if (dataObject != null) {
451
									// create the local version of it
452
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
453
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
454
									dataGuid.setValue(dataDocLocalId);
455
456
									// save it locally
457
									Session session = new Session();
458
									session.setSubject(submitter);
459
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
460
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
461 6855 leinfelder
462
									remoteData = true;
463 6852 leinfelder
								}
464
							}
465
466
						}
467
468 6744 leinfelder
						logMetacat.debug("Data local ID: " + dataDocLocalId);
469
						logMetacat.debug("Data URL     : " + dataDocUrl);
470
						logMetacat.debug("Data mime    : " + dataDocMimeType);
471 6852 leinfelder
472 7112 leinfelder
						// check for valid docid.rev
473
						String dataDocid = null;
474
						int dataRev = 0;
475
						if (dataDocLocalId != null) {
476
							// look up the guid for the data
477
							try {
478
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
479
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
480
							} catch (Exception e) {
481
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
482
								dataDocLocalId = null;
483
							}
484
						}
485
486 6852 leinfelder
						// now we have a local id for the data
487
						if (dataDocLocalId != null) {
488 6744 leinfelder
489
							// check if data system metadata exists already
490
							SystemMetadata dataSysMeta = null;
491
							String dataGuidString = null;
492 6706 leinfelder
							try {
493 6744 leinfelder
								// look for the identifier
494 6707 leinfelder
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
495 6744 leinfelder
								// set it
496 6706 leinfelder
								dataGuid.setValue(dataGuidString);
497 6744 leinfelder
								// look up the system metadata
498 6706 leinfelder
								try {
499 6744 leinfelder
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
500
								} catch (Exception e) {
501
									// probably not in the system
502
									dataSysMeta = null;
503 6706 leinfelder
								}
504 6744 leinfelder
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
505
							} catch (McdbDocNotFoundException nf) {
506
								// we didn't find it
507
								dataSysMeta = null;
508
							}
509 6712 leinfelder
510 6744 leinfelder
							// we'll have to generate it
511
							if (dataSysMeta == null) {
512
								// System metadata for data doesn't exist yet, so create it
513 6964 leinfelder
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
514 6961 leinfelder
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
515
516
								// now look it up again
517
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
518
519
								// set the guid
520
								dataGuid.setValue(dataGuidString);
521
522
								// inherit access rules from metadata, if we don't have our own
523
								if (remoteData) {
524
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
525
									// TODO: use access rules defined in EML, per data file
526
								}
527 6744 leinfelder
528
							}
529 6721 leinfelder
530 6961 leinfelder
							// set object format for the data file
531 6964 leinfelder
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
532 6982 leinfelder
							ObjectFormatIdentifier fmt = null;
533 6961 leinfelder
							try {
534 6982 leinfelder
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
535 6961 leinfelder
							} catch (NotFound nfe) {
536
								logMetacat.debug("Couldn't find format identifier for: "
537
												+ dataDocMimeType
538
												+ ". Setting it to application/octet-stream.");
539 6982 leinfelder
								fmt = new ObjectFormatIdentifier();
540
								fmt.setValue("application/octet-stream");
541 6961 leinfelder
							}
542 6982 leinfelder
							dataSysMeta.setFormatId(fmt);
543
544 6961 leinfelder
							// update the values
545
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
546
547 8986 tao
							// reindex data file if need it.
548
							logMetacat.debug("do we need to reindex guid "+dataGuid.getValue()+"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~?"+indexDataFile);
549
							if(indexDataFile) {
550
							    reindexDataFile(dataSysMeta.getIdentifier(), dataSysMeta);
551
							}
552
553 6961 leinfelder
							// include as part of the ORE package
554 6744 leinfelder
							dataIds.add(dataGuid);
555
556
						} // end if (EML package)
557
558
					} // end for (data entities)
559
560
	            } // data entities not null
561
562 6712 leinfelder
				// ORE map
563 6713 leinfelder
				if (includeORE) {
564 6800 leinfelder
					// can we generate them?
565 6713 leinfelder
			        if (!dataIds.isEmpty()) {
566 6800 leinfelder
			        	// it doesn't exist in the system?
567
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
568
569
				            // generate the ORE map for this datapackage
570
				            Identifier resourceMapId = new Identifier();
571 7001 leinfelder
				            // use the local id, not the guid in case we have DOIs for them already
572 7849 leinfelder
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
573 6800 leinfelder
				            idMap.put(sysMeta.getIdentifier(), dataIds);
574
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
575
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
576
				            // copy most of the same system metadata as the packaging metadata
577
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
578
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
579
				            resourceMapSysMeta.setIdentifier(resourceMapId);
580 7222 leinfelder
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
581 6800 leinfelder
							resourceMapSysMeta.setChecksum(oreChecksum);
582
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
583
							resourceMapSysMeta.setFormatId(formatId);
584
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
585
586
							// set the revision graph
587
							resourceMapSysMeta.setObsoletes(null);
588
							resourceMapSysMeta.setObsoletedBy(null);
589
							// look up the resource map that this one obsoletes
590
							if (sysMeta.getObsoletes() != null) {
591 7273 leinfelder
								// use the localId in case we have a DOI
592
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
593 6800 leinfelder
								Identifier resourceMapObsoletes = new Identifier();
594 7849 leinfelder
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
595 6800 leinfelder
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
596
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
597
								if (resourceMapObsoletesSystemMetadata != null) {
598
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
599 7278 leinfelder
									resourceMapObsoletesSystemMetadata.setArchived(true);
600 6800 leinfelder
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
601
								}
602
							}
603
							// look up the resource map that this one is obsoletedBy
604
							if (sysMeta.getObsoletedBy() != null) {
605 7273 leinfelder
								// use the localId in case we have a DOI
606
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
607 6800 leinfelder
								Identifier resourceMapObsoletedBy = new Identifier();
608 7849 leinfelder
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
609 6800 leinfelder
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
610 7278 leinfelder
								resourceMapSysMeta.setArchived(true);
611 6800 leinfelder
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
612
								if (resourceMapObsoletedBySystemMetadata != null) {
613
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
614
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
615
								}
616
							}
617
618 6907 leinfelder
							// save it locally, if it doesn't already exist
619
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
620
								Session session = new Session();
621
								session.setSubject(submitter);
622
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
623
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
624
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
625
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
626
							}
627 6800 leinfelder
			        	}
628 6713 leinfelder
			        }
629
				}
630 6706 leinfelder
631
			} catch (ParserConfigurationException pce) {
632 6707 leinfelder
				logMetacat.debug("There was a problem parsing the EML document. "
633 6706 leinfelder
								+ "The error message was: " + pce.getMessage());
634
635
			} catch (SAXException saxe) {
636 6707 leinfelder
				logMetacat.debug("There was a problem traversing the EML document. "
637 6706 leinfelder
								+ "The error message was: " + saxe.getMessage());
638
639
			} catch (XPathExpressionException xpee) {
640 6707 leinfelder
				logMetacat.debug("There was a problem searching the EML document. "
641 6706 leinfelder
								+ "The error message was: " + xpee.getMessage());
642
			} catch (Exception e) {
643 6707 leinfelder
				logMetacat.debug("There was a problem creating System Metadata. "
644 6706 leinfelder
								+ "The error message was: " + e.getMessage());
645 6721 leinfelder
				e.printStackTrace();
646 6706 leinfelder
			} // end try()
647
648
		} // end if()
649
650
		return sysMeta;
651
	}
652 8986 tao
653
	/*
654
	 * Re-index the data file since the access rule was changed during the inserting of the eml document.
655
	 * (During first time to index the data file in Metacat API, the eml hasn't been inserted)
656
	 */
657
	private static void reindexDataFile(Identifier id, SystemMetadata sysmeta) {
658
	    try {
659
	        logMetacat.debug("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ reindex"+id.getValue());
660 8991 tao
	        if(sysmeta != null) {
661
	            if(!sysmeta.getArchived()) {
662
	                //set the archive to true to remove index.
663
	                sysmeta.setArchived(true);
664
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
665
	                //re-insert the index
666
	                sysmeta.setArchived(false);
667
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
668
	            } else {
669
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
670
	            }
671
	        }
672
673 8986 tao
        } catch (Exception e) {
674
            // TODO Auto-generated catch block
675
            logMetacat.warn("Can't reindex the data object "+id.getValue()+" since "+e.getMessage());
676
            //e.printStackTrace();
677
        }
678
	}
679 6988 jones
680 8190 leinfelder
	/**
681
	 * Checks for potential ORE object existence
682
	 * @param identifier
683
	 * @return
684
	 */
685
    public static boolean oreExistsFor(Identifier identifier) {
686
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
687 8200 leinfelder
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier, true);
688 8190 leinfelder
		return (ids != null && ids.size() > 0);
689
	}
690
691
	/**
692 6911 leinfelder
     * Generate SystemMetadata for any object in the object store that does
693
     * not already have it.  SystemMetadata documents themselves, are, of course,
694
     * exempt.  This is a utility method for migration of existing object
695
     * stores to DataONE where SystemMetadata is required for all objects.
696
     * @param idList
697
     * @param includeOre
698
     * @param downloadData
699
     * @throws PropertyNotFoundException
700
     * @throws NoSuchAlgorithmException
701
     * @throws AccessionNumberException
702
     * @throws SQLException
703 6964 leinfelder
	 * @throws SAXException
704
	 * @throws HandlerException
705 9833 tao
	 * @throws MarshallingException
706 6964 leinfelder
	 * @throws BaseException
707
	 * @throws ParseLSIDException
708
	 * @throws InsufficientKarmaException
709
	 * @throws ClassNotFoundException
710
	 * @throws IOException
711
	 * @throws McdbException
712
	 * @throws AccessException
713
	 * @throws AccessControlException
714 6911 leinfelder
     */
715
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData)
716 9833 tao
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, MarshallingException, HandlerException, SAXException
717 6911 leinfelder
    {
718
719
        for (String localId : idList) {
720 6998 leinfelder
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
721 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
722
723 6998 leinfelder
            SystemMetadata sm = null;
724
725
            //generate required system metadata fields from the document
726 7123 leinfelder
            try {
727
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
728
            } catch (Exception e) {
729
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
730
				continue;
731
			}
732
733 6998 leinfelder
            //insert the systemmetadata object or just update it as needed
734 7188 leinfelder
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
735
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
736
737 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
738
739 6911 leinfelder
        }
740 6964 leinfelder
        logMetacat.info("done generating system metadata for given list");
741 6911 leinfelder
    }
742 6706 leinfelder
743 6707 leinfelder
	/**
744 6706 leinfelder
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
745
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
746
	 * evaluated.
747
	 *
748
	 * @param is The InputStream of bytes
749
	 *
750
	 * @return size The size in bytes of the input stream as a long
751
	 *
752
	 * @throws IOException
753
	 */
754 7322 leinfelder
	public static long sizeOfStream(InputStream is) throws IOException {
755 6706 leinfelder
756
		long size = 0;
757
		byte[] b = new byte[1024];
758
		int numread = is.read(b, 0, 1024);
759
		while (numread != -1) {
760
			size += numread;
761
			numread = is.read(b, 0, 1024);
762
		}
763
		return size;
764
765
	}
766 6962 leinfelder
767
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
768
769
		DocumentImpl doc = new DocumentImpl(docid, false);
770
		String filepath = null;
771
		String filename = null;
772
773
		// deal with data or metadata cases
774
		if (doc.getRootNodeID() == 0) {
775
			// this is a data file
776
			filepath = PropertyService.getProperty("application.datafilepath");
777
		} else {
778
			filepath = PropertyService.getProperty("application.documentfilepath");
779
		}
780
		// ensure it is a directory path
781
		if (!(filepath.endsWith("/"))) {
782
			filepath += "/";
783
		}
784
		filename = filepath + docid;
785
		File documentFile = new File(filename);
786
787
		return documentFile;
788
	}
789 6988 jones
790
	/**
791
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
792
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
793
	 * than or equal to zero, no policy needs to be set, so return null.
794
	 * @return ReplicationPolicy, or null if no replication policy is needed
795
	 */
796 10047 jones
    protected static ReplicationPolicy getDefaultReplicationPolicy() {
797 6988 jones
        ReplicationPolicy rp = null;
798
        int numReplicas = -1;
799
        try {
800
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
801
        } catch (NumberFormatException e) {
802 10047 jones
            // The property is not a valid integer, so set it to 0
803
            numReplicas = 0;
804 6988 jones
        } catch (PropertyNotFoundException e) {
805 10047 jones
            // The property is not found, so set it to 0
806
            numReplicas = 0;
807 6988 jones
        }
808
809 10047 jones
        rp = new ReplicationPolicy();
810 6988 jones
        if (numReplicas > 0) {
811
            rp.setReplicationAllowed(true);
812
            rp.setNumberReplicas(numReplicas);
813
            try {
814
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
815
                if (preferredNodeList != null) {
816
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
817
                    if (pNodes != null && !pNodes.isEmpty()) {
818
                        rp.setPreferredMemberNodeList(pNodes);
819
                    }
820
                }
821
            } catch (PropertyNotFoundException e) {
822
                // No preferred list found in properties, so just ignore it; no action needed
823
            }
824
            try {
825
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
826
                if (blockedNodeList != null) {
827
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
828
                    if (bNodes != null && !bNodes.isEmpty()) {
829 7022 leinfelder
                        rp.setBlockedMemberNodeList(bNodes);
830 6988 jones
                    }
831
                }
832
            } catch (PropertyNotFoundException e) {
833
                // No blocked list found in properties, so just ignore it; no action needed
834
            }
835 10047 jones
        } else {
836
            rp.setReplicationAllowed(false);
837
            rp.setNumberReplicas(0);
838 6988 jones
        }
839
        return rp;
840
    }
841
842
    /**
843 7025 leinfelder
     * Extract a List of NodeReferences from a String listing the node identifiers where
844 6988 jones
     * each identifier is separated by whitespace, comma, or semicolon characters.
845
     * @param nodeString the string containing the list of nodes
846 7025 leinfelder
     * @return the List of NodeReference objects parsed from the input string
847 6988 jones
     */
848
    private static List<NodeReference> extractNodeReferences(String nodeString) {
849
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
850 7022 leinfelder
        String[] result = nodeString.split("[,;\\s]");
851 6988 jones
        for (String r : result) {
852 7025 leinfelder
        	if (r != null && r.length() > 0) {
853
	            NodeReference noderef = new NodeReference();
854
	            noderef.setValue(r);
855
	            nodeList.add(noderef);
856
	        }
857 6988 jones
        }
858
        return nodeList;
859
    }
860 6705 leinfelder
}