Project

General

Profile

1 6705 leinfelder
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author$'
9
 *     '$Date$'
10
 * '$Revision$'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27
28 6962 leinfelder
import java.io.File;
29 6705 leinfelder
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32 6852 leinfelder
import java.net.URL;
33 6873 leinfelder
import java.net.URLConnection;
34 6705 leinfelder
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36 6712 leinfelder
import java.util.ArrayList;
37 6727 leinfelder
import java.util.Collections;
38 6705 leinfelder
import java.util.Date;
39 6712 leinfelder
import java.util.HashMap;
40 6705 leinfelder
import java.util.Hashtable;
41 6712 leinfelder
import java.util.List;
42
import java.util.Map;
43 6709 leinfelder
import java.util.Vector;
44 6705 leinfelder
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47
48 6712 leinfelder
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50 6705 leinfelder
import org.apache.log4j.Logger;
51 7622 leinfelder
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52 8810 leinfelder
import org.dataone.client.v2.formats.ObjectFormatCache;
53 6960 leinfelder
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56 9833 tao
import org.dataone.exceptions.MarshallingException;
57 6712 leinfelder
import org.dataone.ore.ResourceMapFactory;
58 6705 leinfelder
import org.dataone.service.exceptions.BaseException;
59
import org.dataone.service.exceptions.NotFound;
60 6721 leinfelder
import org.dataone.service.types.v1.AccessPolicy;
61 7214 cjones
import org.dataone.service.types.v1.AccessRule;
62 6705 leinfelder
import org.dataone.service.types.v1.Checksum;
63
import org.dataone.service.types.v1.Identifier;
64
import org.dataone.service.types.v1.NodeReference;
65
import org.dataone.service.types.v1.ObjectFormatIdentifier;
66 6988 jones
import org.dataone.service.types.v1.ReplicationPolicy;
67 6721 leinfelder
import org.dataone.service.types.v1.Session;
68 6705 leinfelder
import org.dataone.service.types.v1.Subject;
69 8810 leinfelder
import org.dataone.service.types.v2.SystemMetadata;
70 6705 leinfelder
import org.dataone.service.types.v1.util.ChecksumUtil;
71 6709 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
72 6712 leinfelder
import org.dspace.foresite.ResourceMap;
73 6705 leinfelder
import org.xml.sax.SAXException;
74
75 7087 cjones
import java.util.Calendar;
76 7084 leinfelder
77 6705 leinfelder
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79 6709 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
80 6962 leinfelder
import edu.ucsb.nceas.metacat.DocumentImpl;
81 6705 leinfelder
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84 6712 leinfelder
import edu.ucsb.nceas.metacat.MetaCatServlet;
85 6705 leinfelder
import edu.ucsb.nceas.metacat.MetacatHandler;
86 6708 leinfelder
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87 6705 leinfelder
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89 8986 tao
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
90 6705 leinfelder
import edu.ucsb.nceas.metacat.properties.PropertyService;
91 6708 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
92 6721 leinfelder
import edu.ucsb.nceas.metacat.shared.AccessException;
93 6708 leinfelder
import edu.ucsb.nceas.metacat.shared.HandlerException;
94 6705 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
95
import edu.ucsb.nceas.utilities.ParseLSIDException;
96
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
97
98
public class SystemMetadataFactory {
99 6706 leinfelder
100 7849 leinfelder
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
101 6707 leinfelder
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
102 6961 leinfelder
	/**
103
	 * use this flag if you want to update any existing system metadata values with generated content
104
	 */
105
	private static boolean updateExisting = true;
106 6712 leinfelder
107 8986 tao
108
109 6705 leinfelder
	/**
110 8986 tao
	 * Create a system metadata object for insertion into metacat
111
	 * @param localId
112
	 * @param includeORE
113
	 * @param downloadData
114
	 * @return
115
	 * @throws McdbException
116
	 * @throws McdbDocNotFoundException
117
	 * @throws SQLException
118
	 * @throws IOException
119
	 * @throws AccessionNumberException
120
	 * @throws ClassNotFoundException
121
	 * @throws InsufficientKarmaException
122
	 * @throws ParseLSIDException
123
	 * @throws PropertyNotFoundException
124
	 * @throws BaseException
125
	 * @throws NoSuchAlgorithmException
126 9833 tao
	 * @throws MarshallingException
127 8986 tao
	 * @throws AccessControlException
128
	 * @throws HandlerException
129
	 * @throws SAXException
130
	 * @throws AccessException
131
	 */
132
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
133
            throws McdbException, McdbDocNotFoundException, SQLException,
134
            IOException, AccessionNumberException, ClassNotFoundException,
135
            InsufficientKarmaException, ParseLSIDException,
136
            PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
137 9833 tao
            MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
138 8986 tao
	        boolean indexDataFile = false;
139
	        return createSystemMetadata(indexDataFile, localId, includeORE, downloadData);
140
	}
141
	/**
142 6706 leinfelder
	 * Creates a system metadata object for insertion into metacat
143 8986 tao
	 * @param indexDataFile
144
	 *            Indicate if we need to index data file.
145 6706 leinfelder
	 *
146
	 * @param localId
147
	 *            The local document identifier
148
	 * @param user
149
	 *            The user submitting the system metadata document
150
	 * @param groups
151
	 *            The groups the user belongs to
152
	 *
153
	 * @return sysMeta The system metadata object created
154 6708 leinfelder
	 * @throws SAXException
155
	 * @throws HandlerException
156
	 * @throws AccessControlException
157 6721 leinfelder
	 * @throws AccessException
158 6706 leinfelder
	 */
159 8986 tao
	public static SystemMetadata createSystemMetadata(boolean indexDataFile, String localId, boolean includeORE, boolean downloadData)
160 6706 leinfelder
			throws McdbException, McdbDocNotFoundException, SQLException,
161
			IOException, AccessionNumberException, ClassNotFoundException,
162
			InsufficientKarmaException, ParseLSIDException,
163
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
164 9833 tao
			MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
165 6707 leinfelder
166 6964 leinfelder
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
167 6705 leinfelder
168 6961 leinfelder
		// check for system metadata
169
		SystemMetadata sysMeta = null;
170
171 6706 leinfelder
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
172 6808 leinfelder
		int rev = Integer.valueOf(accNum.getRev());
173 6961 leinfelder
174
		// get/make the guid
175
		String guid = null;
176
		try {
177
			// get the guid if it exists
178
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
179
		} catch (McdbDocNotFoundException dnfe) {
180
			// otherwise create the mapping
181 6964 leinfelder
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
182 6961 leinfelder
			IdentifierManager.getInstance().createMapping(localId, localId);
183 6964 leinfelder
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
184 6961 leinfelder
		}
185
186
		// look up existing system metadata if it exists
187
		Identifier identifier = new Identifier();
188
		identifier.setValue(guid);
189
		try {
190 6964 leinfelder
			logMetacat.debug("Using hazelcast to get system metadata");
191 6961 leinfelder
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
192
			// TODO: if this is the case, we could return here -- what else do we gain?
193
			if (!updateExisting ) {
194
				return sysMeta;
195
			}
196
		} catch (Exception e) {
197 6964 leinfelder
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
198
199 6970 leinfelder
		}
200
201
		if (sysMeta == null) {
202 6961 leinfelder
			// create system metadata
203
			sysMeta = new SystemMetadata();
204
			sysMeta.setIdentifier(identifier);
205
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
206
			sysMeta.setArchived(false);
207
		}
208 6962 leinfelder
209 6706 leinfelder
		// get additional docinfo
210 6708 leinfelder
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
211 6706 leinfelder
		// set the default object format
212 6708 leinfelder
		String doctype = docInfo.get("doctype");
213 6706 leinfelder
		ObjectFormatIdentifier fmtid = null;
214
215
		// set the object format, fall back to defaults
216 6982 leinfelder
		if (doctype.trim().equals("BIN")) {
217
			// we don't know much about this file (yet)
218
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
219 8028 leinfelder
		} else if (doctype.trim().equals("metadata")) {
220
			// special ESRI FGDC format
221
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
222 6982 leinfelder
		} else {
223
			try {
224
				// do we know the given format?
225
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
226
			} catch (NotFound nfe) {
227
				// format is not registered, use default
228 6964 leinfelder
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
229 6706 leinfelder
			}
230
		}
231
232
		sysMeta.setFormatId(fmtid);
233 6707 leinfelder
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
234 6706 leinfelder
235 6962 leinfelder
		// for retrieving the actual object
236
		InputStream inputStream = null;
237
		inputStream = MetacatHandler.read(localId);
238
239 6721 leinfelder
		// create the checksum
240 7222 leinfelder
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
241 6721 leinfelder
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
242 7084 leinfelder
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
243 6721 leinfelder
		sysMeta.setChecksum(checksum);
244
245 6962 leinfelder
		// set the size from file on disk, don't read bytes again
246
		File fileOnDisk = getFileOnDisk(localId);
247
		long fileSize = 0;
248
		if (fileOnDisk.exists()) {
249
			fileSize = fileOnDisk.length();
250
		}
251
		sysMeta.setSize(BigInteger.valueOf(fileSize));
252 6721 leinfelder
253
		// submitter
254
		Subject submitter = new Subject();
255
		submitter.setValue(docInfo.get("user_updated"));
256
		sysMeta.setSubmitter(submitter);
257
258
		// rights holder
259
		Subject owner = new Subject();
260
		owner.setValue(docInfo.get("user_owner"));
261
		sysMeta.setRightsHolder(owner);
262
263
		// dates
264
		String createdDateString = docInfo.get("date_created");
265
		String updatedDateString = docInfo.get("date_updated");
266
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
267
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);
268
		sysMeta.setDateUploaded(createdDate);
269 7084 leinfelder
		//sysMeta.setDateSysMetadataModified(updatedDate);
270
		// use current datetime
271
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
272 6721 leinfelder
273
		// set the revision history
274
		String docidWithoutRev = accNum.getDocid();
275
		Identifier obsoletedBy = null;
276
		Identifier obsoletes = null;
277
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
278 6727 leinfelder
		// ensure this ordering since processing depends on it
279
		Collections.sort(revisions);
280 6721 leinfelder
		for (int existingRev: revisions) {
281
			// use the docid+rev as the guid
282
			String existingPid = docidWithoutRev + "." + existingRev;
283 7001 leinfelder
			try {
284
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
285
			} catch (McdbDocNotFoundException mdfe) {
286
				// we'll be defaulting to the local id
287
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
288
			}
289 6721 leinfelder
			if (existingRev < rev) {
290
				// it's the old docid, until it's not
291
				obsoletes = new Identifier();
292
				obsoletes.setValue(existingPid);
293
			}
294
			if (existingRev > rev) {
295
				// it's the newer docid
296
				obsoletedBy = new Identifier();
297
				obsoletedBy.setValue(existingPid);
298
				// only want the version just after it
299
				break;
300
			}
301
		}
302 6725 leinfelder
		// set them on our object
303 6721 leinfelder
		sysMeta.setObsoletedBy(obsoletedBy);
304
		sysMeta.setObsoletes(obsoletes);
305
306 6725 leinfelder
		// update the system metadata for the object[s] we are revising
307
		if (obsoletedBy != null) {
308 6971 leinfelder
			SystemMetadata obsoletedBySysMeta = null;
309
			try {
310
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
311
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
312
			} catch (McdbDocNotFoundException e) {
313
				// ignore
314
			}
315 6725 leinfelder
			if (obsoletedBySysMeta != null) {
316
				obsoletedBySysMeta.setObsoletes(identifier);
317 7297 leinfelder
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
318 6725 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
319
			}
320
		}
321
		if (obsoletes != null) {
322 6971 leinfelder
			SystemMetadata obsoletesSysMeta = null;
323
			try {
324
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
325
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
326
			} catch (McdbDocNotFoundException e) {
327
				// ignore
328
			}
329 6725 leinfelder
			if (obsoletesSysMeta != null) {
330
				obsoletesSysMeta.setObsoletedBy(identifier);
331 8566 leinfelder
				// DO NOT set archived to true -- it will have unintended consequences if the CN sees this.
332
				//obsoletesSysMeta.setArchived(true);
333 7297 leinfelder
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
334 6911 leinfelder
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
335 6725 leinfelder
			}
336
		}
337
338 6744 leinfelder
		// look up the access control policy we have in metacat
339
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
340 7214 cjones
		try {
341
        List<AccessRule> allowList = accessPolicy.getAllowList();
342 7215 cjones
        int listSize = allowList.size();
343 7214 cjones
        sysMeta.setAccessPolicy(accessPolicy);
344
345
    } catch (NullPointerException npe) {
346
        logMetacat.info("The allow list is empty, can't include an empty " +
347
            "access policy in the system metadata for " + guid);
348
349
    }
350 6721 leinfelder
351
		// authoritative node
352
		NodeReference nr = new NodeReference();
353 7030 cjones
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
354 6721 leinfelder
		sysMeta.setOriginMemberNode(nr);
355
		sysMeta.setAuthoritativeMemberNode(nr);
356
357 6988 jones
		// Set a default replication policy
358
        ReplicationPolicy rp = getDefaultReplicationPolicy();
359
        if (rp != null) {
360
            sysMeta.setReplicationPolicy(rp);
361
        }
362
363 6706 leinfelder
		// further parse EML documents to get data object format,
364
		// describes and describedBy information
365
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
366
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
367
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
368
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
369
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
370
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
371
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
372
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
373
374
			try {
375 6962 leinfelder
376
				// get it again to parse the document
377
				logMetacat.debug("Re-reading document inputStream");
378 6721 leinfelder
				inputStream = MetacatHandler.read(localId);
379 6960 leinfelder
380
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
381
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
382
383 6721 leinfelder
				// iterate through the data objects in the EML doc and add sysmeta
384 6707 leinfelder
				logMetacat.debug("In createSystemMetadata() the number of data "
385 6706 leinfelder
								+ "entities is: "
386 6960 leinfelder
								+ emlDocument.distributionMetadata);
387 6706 leinfelder
388 6712 leinfelder
				// for generating the ORE map
389
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
390
	            List<Identifier> dataIds = new ArrayList<Identifier>();
391
392 6706 leinfelder
				// iterate through data objects described by the EML
393 6960 leinfelder
	            if (emlDocument.distributionMetadata != null) {
394
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
395 6744 leinfelder
396 6960 leinfelder
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
397
				        String dataDocUrl = distMetadata.url;
398
				        String dataDocMimeType = distMetadata.mimeType;
399 6744 leinfelder
						// default to binary
400
						if (dataDocMimeType == null) {
401 6982 leinfelder
							dataDocMimeType = "application/octet-stream";
402 6721 leinfelder
						}
403 6852 leinfelder
404
						// process the data
405 6855 leinfelder
						boolean remoteData = false;
406 6852 leinfelder
						String dataDocLocalId = null;
407
						Identifier dataGuid = new Identifier();
408
409
						// handle ecogrid, or downloadable data
410
						String ecogridPrefix = "ecogrid://knb/";
411
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
412
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
413
						} else {
414
							// should we try downloading the remote data?
415
							if (downloadData) {
416
								InputStream dataObject = null;
417
								try {
418
									// download the data from the URL
419
									URL dataURL = new URL(dataDocUrl);
420 6873 leinfelder
									URLConnection dataConnection = dataURL.openConnection();
421
422
									// default is to download the data
423
									dataObject = dataConnection.getInputStream();
424
425
									String detectedContentType = dataConnection.getContentType();
426
									logMetacat.info("Detected content type: " + detectedContentType);
427
428
									if (detectedContentType != null) {
429
										// seems to be HTML from the remote location
430
										if (detectedContentType.contains("html")) {
431
											// if we are not expecting it, we skip it
432
											if (!dataDocMimeType.contains("html")) {
433
												// set to null so we don't download it
434
												dataObject = null;
435
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
436
											}
437
										}
438
439
									} else {
440
										// if we don't know what it is, should we skip it?
441
										dataObject = null;
442
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
443
									}
444
445 6852 leinfelder
								} catch (Exception e) {
446
									// error with the download
447
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
448
								}
449
450
								if (dataObject != null) {
451
									// create the local version of it
452
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
453
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
454
									dataGuid.setValue(dataDocLocalId);
455
456
									// save it locally
457
									Session session = new Session();
458
									session.setSubject(submitter);
459
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
460 10276 tao
									Checksum sum = null;
461
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session, sum);
462 6855 leinfelder
463
									remoteData = true;
464 6852 leinfelder
								}
465
							}
466
467
						}
468
469 6744 leinfelder
						logMetacat.debug("Data local ID: " + dataDocLocalId);
470
						logMetacat.debug("Data URL     : " + dataDocUrl);
471
						logMetacat.debug("Data mime    : " + dataDocMimeType);
472 6852 leinfelder
473 7112 leinfelder
						// check for valid docid.rev
474
						String dataDocid = null;
475
						int dataRev = 0;
476
						if (dataDocLocalId != null) {
477
							// look up the guid for the data
478
							try {
479
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
480
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
481
							} catch (Exception e) {
482
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
483
								dataDocLocalId = null;
484
							}
485
						}
486
487 6852 leinfelder
						// now we have a local id for the data
488
						if (dataDocLocalId != null) {
489 6744 leinfelder
490
							// check if data system metadata exists already
491
							SystemMetadata dataSysMeta = null;
492
							String dataGuidString = null;
493 6706 leinfelder
							try {
494 6744 leinfelder
								// look for the identifier
495 6707 leinfelder
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
496 6744 leinfelder
								// set it
497 6706 leinfelder
								dataGuid.setValue(dataGuidString);
498 6744 leinfelder
								// look up the system metadata
499 6706 leinfelder
								try {
500 6744 leinfelder
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
501
								} catch (Exception e) {
502
									// probably not in the system
503
									dataSysMeta = null;
504 6706 leinfelder
								}
505 6744 leinfelder
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
506
							} catch (McdbDocNotFoundException nf) {
507
								// we didn't find it
508
								dataSysMeta = null;
509
							}
510 6712 leinfelder
511 6744 leinfelder
							// we'll have to generate it
512
							if (dataSysMeta == null) {
513
								// System metadata for data doesn't exist yet, so create it
514 6964 leinfelder
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
515 6961 leinfelder
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
516
517
								// now look it up again
518
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
519
520
								// set the guid
521
								dataGuid.setValue(dataGuidString);
522
523
								// inherit access rules from metadata, if we don't have our own
524
								if (remoteData) {
525
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
526
									// TODO: use access rules defined in EML, per data file
527
								}
528 6744 leinfelder
529
							}
530 6721 leinfelder
531 6961 leinfelder
							// set object format for the data file
532 6964 leinfelder
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
533 6982 leinfelder
							ObjectFormatIdentifier fmt = null;
534 6961 leinfelder
							try {
535 6982 leinfelder
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
536 6961 leinfelder
							} catch (NotFound nfe) {
537
								logMetacat.debug("Couldn't find format identifier for: "
538
												+ dataDocMimeType
539
												+ ". Setting it to application/octet-stream.");
540 6982 leinfelder
								fmt = new ObjectFormatIdentifier();
541
								fmt.setValue("application/octet-stream");
542 6961 leinfelder
							}
543 6982 leinfelder
							dataSysMeta.setFormatId(fmt);
544
545 6961 leinfelder
							// update the values
546
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
547
548 8986 tao
							// reindex data file if need it.
549
							logMetacat.debug("do we need to reindex guid "+dataGuid.getValue()+"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~?"+indexDataFile);
550
							if(indexDataFile) {
551
							    reindexDataFile(dataSysMeta.getIdentifier(), dataSysMeta);
552
							}
553
554 6961 leinfelder
							// include as part of the ORE package
555 6744 leinfelder
							dataIds.add(dataGuid);
556
557
						} // end if (EML package)
558
559
					} // end for (data entities)
560
561
	            } // data entities not null
562
563 6712 leinfelder
				// ORE map
564 6713 leinfelder
				if (includeORE) {
565 6800 leinfelder
					// can we generate them?
566 6713 leinfelder
			        if (!dataIds.isEmpty()) {
567 6800 leinfelder
			        	// it doesn't exist in the system?
568
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
569
570
				            // generate the ORE map for this datapackage
571
				            Identifier resourceMapId = new Identifier();
572 7001 leinfelder
				            // use the local id, not the guid in case we have DOIs for them already
573 7849 leinfelder
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
574 6800 leinfelder
				            idMap.put(sysMeta.getIdentifier(), dataIds);
575
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
576
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
577
				            // copy most of the same system metadata as the packaging metadata
578
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
579
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
580
				            resourceMapSysMeta.setIdentifier(resourceMapId);
581 7222 leinfelder
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
582 6800 leinfelder
							resourceMapSysMeta.setChecksum(oreChecksum);
583
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
584
							resourceMapSysMeta.setFormatId(formatId);
585
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
586
587
							// set the revision graph
588
							resourceMapSysMeta.setObsoletes(null);
589
							resourceMapSysMeta.setObsoletedBy(null);
590
							// look up the resource map that this one obsoletes
591
							if (sysMeta.getObsoletes() != null) {
592 7273 leinfelder
								// use the localId in case we have a DOI
593
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
594 6800 leinfelder
								Identifier resourceMapObsoletes = new Identifier();
595 7849 leinfelder
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
596 6800 leinfelder
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
597
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
598
								if (resourceMapObsoletesSystemMetadata != null) {
599
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
600 7278 leinfelder
									resourceMapObsoletesSystemMetadata.setArchived(true);
601 6800 leinfelder
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
602
								}
603
							}
604
							// look up the resource map that this one is obsoletedBy
605
							if (sysMeta.getObsoletedBy() != null) {
606 7273 leinfelder
								// use the localId in case we have a DOI
607
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
608 6800 leinfelder
								Identifier resourceMapObsoletedBy = new Identifier();
609 7849 leinfelder
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
610 6800 leinfelder
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
611 7278 leinfelder
								resourceMapSysMeta.setArchived(true);
612 6800 leinfelder
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
613
								if (resourceMapObsoletedBySystemMetadata != null) {
614
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
615
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
616
								}
617
							}
618
619 6907 leinfelder
							// save it locally, if it doesn't already exist
620
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
621
								Session session = new Session();
622
								session.setSubject(submitter);
623
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
624 10276 tao
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session, resourceMapSysMeta.getChecksum());
625 6907 leinfelder
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
626
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
627
							}
628 6800 leinfelder
			        	}
629 6713 leinfelder
			        }
630
				}
631 6706 leinfelder
632
			} catch (ParserConfigurationException pce) {
633 6707 leinfelder
				logMetacat.debug("There was a problem parsing the EML document. "
634 6706 leinfelder
								+ "The error message was: " + pce.getMessage());
635
636
			} catch (SAXException saxe) {
637 6707 leinfelder
				logMetacat.debug("There was a problem traversing the EML document. "
638 6706 leinfelder
								+ "The error message was: " + saxe.getMessage());
639
640
			} catch (XPathExpressionException xpee) {
641 6707 leinfelder
				logMetacat.debug("There was a problem searching the EML document. "
642 6706 leinfelder
								+ "The error message was: " + xpee.getMessage());
643
			} catch (Exception e) {
644 6707 leinfelder
				logMetacat.debug("There was a problem creating System Metadata. "
645 6706 leinfelder
								+ "The error message was: " + e.getMessage());
646 6721 leinfelder
				e.printStackTrace();
647 6706 leinfelder
			} // end try()
648
649
		} // end if()
650
651
		return sysMeta;
652
	}
653 8986 tao
654
	/*
655
	 * Re-index the data file since the access rule was changed during the inserting of the eml document.
656
	 * (During first time to index the data file in Metacat API, the eml hasn't been inserted)
657
	 */
658
	private static void reindexDataFile(Identifier id, SystemMetadata sysmeta) {
659
	    try {
660
	        logMetacat.debug("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ reindex"+id.getValue());
661 8991 tao
	        if(sysmeta != null) {
662
	            if(!sysmeta.getArchived()) {
663
	                //set the archive to true to remove index.
664
	                sysmeta.setArchived(true);
665
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
666
	                //re-insert the index
667
	                sysmeta.setArchived(false);
668
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
669
	            } else {
670
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
671
	            }
672
	        }
673
674 8986 tao
        } catch (Exception e) {
675
            // TODO Auto-generated catch block
676
            logMetacat.warn("Can't reindex the data object "+id.getValue()+" since "+e.getMessage());
677
            //e.printStackTrace();
678
        }
679
	}
680 6988 jones
681 8190 leinfelder
	/**
682
	 * Checks for potential ORE object existence
683
	 * @param identifier
684
	 * @return
685
	 */
686
    public static boolean oreExistsFor(Identifier identifier) {
687
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
688 8200 leinfelder
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier, true);
689 8190 leinfelder
		return (ids != null && ids.size() > 0);
690
	}
691
692
	/**
693 6911 leinfelder
     * Generate SystemMetadata for any object in the object store that does
694
     * not already have it.  SystemMetadata documents themselves, are, of course,
695
     * exempt.  This is a utility method for migration of existing object
696
     * stores to DataONE where SystemMetadata is required for all objects.
697
     * @param idList
698
     * @param includeOre
699
     * @param downloadData
700
     * @throws PropertyNotFoundException
701
     * @throws NoSuchAlgorithmException
702
     * @throws AccessionNumberException
703
     * @throws SQLException
704 6964 leinfelder
	 * @throws SAXException
705
	 * @throws HandlerException
706 9833 tao
	 * @throws MarshallingException
707 6964 leinfelder
	 * @throws BaseException
708
	 * @throws ParseLSIDException
709
	 * @throws InsufficientKarmaException
710
	 * @throws ClassNotFoundException
711
	 * @throws IOException
712
	 * @throws McdbException
713
	 * @throws AccessException
714
	 * @throws AccessControlException
715 6911 leinfelder
     */
716
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData)
717 9833 tao
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, MarshallingException, HandlerException, SAXException
718 6911 leinfelder
    {
719
720
        for (String localId : idList) {
721 6998 leinfelder
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
722 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
723
724 6998 leinfelder
            SystemMetadata sm = null;
725
726
            //generate required system metadata fields from the document
727 7123 leinfelder
            try {
728
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
729
            } catch (Exception e) {
730
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
731
				continue;
732
			}
733
734 6998 leinfelder
            //insert the systemmetadata object or just update it as needed
735 7188 leinfelder
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
736
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
737
738 7178 leinfelder
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
739
740 6911 leinfelder
        }
741 6964 leinfelder
        logMetacat.info("done generating system metadata for given list");
742 6911 leinfelder
    }
743 6706 leinfelder
744 6707 leinfelder
	/**
745 6706 leinfelder
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
746
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
747
	 * evaluated.
748
	 *
749
	 * @param is The InputStream of bytes
750
	 *
751
	 * @return size The size in bytes of the input stream as a long
752
	 *
753
	 * @throws IOException
754
	 */
755 7322 leinfelder
	public static long sizeOfStream(InputStream is) throws IOException {
756 6706 leinfelder
757
		long size = 0;
758
		byte[] b = new byte[1024];
759
		int numread = is.read(b, 0, 1024);
760
		while (numread != -1) {
761
			size += numread;
762
			numread = is.read(b, 0, 1024);
763
		}
764
		return size;
765
766
	}
767 6962 leinfelder
768
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
769
770
		DocumentImpl doc = new DocumentImpl(docid, false);
771
		String filepath = null;
772
		String filename = null;
773
774
		// deal with data or metadata cases
775
		if (doc.getRootNodeID() == 0) {
776
			// this is a data file
777
			filepath = PropertyService.getProperty("application.datafilepath");
778
		} else {
779
			filepath = PropertyService.getProperty("application.documentfilepath");
780
		}
781
		// ensure it is a directory path
782
		if (!(filepath.endsWith("/"))) {
783
			filepath += "/";
784
		}
785
		filename = filepath + docid;
786
		File documentFile = new File(filename);
787
788
		return documentFile;
789
	}
790 6988 jones
791
	/**
792
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
793
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
794
	 * than or equal to zero, no policy needs to be set, so return null.
795
	 * @return ReplicationPolicy, or null if no replication policy is needed
796
	 */
797 10047 jones
    protected static ReplicationPolicy getDefaultReplicationPolicy() {
798 6988 jones
        ReplicationPolicy rp = null;
799
        int numReplicas = -1;
800
        try {
801
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
802
        } catch (NumberFormatException e) {
803 10047 jones
            // The property is not a valid integer, so set it to 0
804
            numReplicas = 0;
805 6988 jones
        } catch (PropertyNotFoundException e) {
806 10047 jones
            // The property is not found, so set it to 0
807
            numReplicas = 0;
808 6988 jones
        }
809
810 10047 jones
        rp = new ReplicationPolicy();
811 6988 jones
        if (numReplicas > 0) {
812
            rp.setReplicationAllowed(true);
813
            rp.setNumberReplicas(numReplicas);
814
            try {
815
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
816
                if (preferredNodeList != null) {
817
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
818
                    if (pNodes != null && !pNodes.isEmpty()) {
819
                        rp.setPreferredMemberNodeList(pNodes);
820
                    }
821
                }
822
            } catch (PropertyNotFoundException e) {
823
                // No preferred list found in properties, so just ignore it; no action needed
824
            }
825
            try {
826
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
827
                if (blockedNodeList != null) {
828
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
829
                    if (bNodes != null && !bNodes.isEmpty()) {
830 7022 leinfelder
                        rp.setBlockedMemberNodeList(bNodes);
831 6988 jones
                    }
832
                }
833
            } catch (PropertyNotFoundException e) {
834
                // No blocked list found in properties, so just ignore it; no action needed
835
            }
836 10047 jones
        } else {
837
            rp.setReplicationAllowed(false);
838
            rp.setNumberReplicas(0);
839 6988 jones
        }
840
        return rp;
841
    }
842
843
    /**
844 7025 leinfelder
     * Extract a List of NodeReferences from a String listing the node identifiers where
845 6988 jones
     * each identifier is separated by whitespace, comma, or semicolon characters.
846
     * @param nodeString the string containing the list of nodes
847 7025 leinfelder
     * @return the List of NodeReference objects parsed from the input string
848 6988 jones
     */
849
    private static List<NodeReference> extractNodeReferences(String nodeString) {
850
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
851 7022 leinfelder
        String[] result = nodeString.split("[,;\\s]");
852 6988 jones
        for (String r : result) {
853 7025 leinfelder
        	if (r != null && r.length() > 0) {
854
	            NodeReference noderef = new NodeReference();
855
	            noderef.setValue(r);
856
	            nodeList.add(noderef);
857
	        }
858 6988 jones
        }
859
        return nodeList;
860
    }
861 6705 leinfelder
}