Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2013-09-13 15:53:44 -0700 (Fri, 13 Sep 2013) $'
10
 * '$Revision: 8200 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52
import org.dataone.client.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.ore.ResourceMapFactory;
57
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59
import org.dataone.service.types.v1.AccessPolicy;
60
import org.dataone.service.types.v1.AccessRule;
61
import org.dataone.service.types.v1.Checksum;
62
import org.dataone.service.types.v1.Identifier;
63
import org.dataone.service.types.v1.NodeReference;
64
import org.dataone.service.types.v1.ObjectFormatIdentifier;
65
import org.dataone.service.types.v1.ReplicationPolicy;
66
import org.dataone.service.types.v1.Session;
67
import org.dataone.service.types.v1.Subject;
68
import org.dataone.service.types.v1.SystemMetadata;
69
import org.dataone.service.types.v1.util.ChecksumUtil;
70
import org.dataone.service.util.DateTimeMarshaller;
71
import org.dspace.foresite.ResourceMap;
72
import org.jibx.runtime.JiBXException;
73
import org.xml.sax.SAXException;
74

    
75
import java.util.Calendar;
76

    
77
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79
import edu.ucsb.nceas.metacat.DBUtil;
80
import edu.ucsb.nceas.metacat.DocumentImpl;
81
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84
import edu.ucsb.nceas.metacat.MetaCatServlet;
85
import edu.ucsb.nceas.metacat.MetacatHandler;
86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.properties.PropertyService;
90
import edu.ucsb.nceas.metacat.replication.ReplicationService;
91
import edu.ucsb.nceas.metacat.shared.AccessException;
92
import edu.ucsb.nceas.metacat.shared.HandlerException;
93
import edu.ucsb.nceas.metacat.util.DocumentUtil;
94
import edu.ucsb.nceas.utilities.ParseLSIDException;
95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
96

    
97
public class SystemMetadataFactory {
98

    
99
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
100
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
101
	/**
102
	 * use this flag if you want to update any existing system metadata values with generated content
103
	 */
104
	private static boolean updateExisting = true;
105
	
106
	/**
107
	 * Creates a system metadata object for insertion into metacat
108
	 * 
109
	 * @param localId
110
	 *            The local document identifier
111
	 * @param user
112
	 *            The user submitting the system metadata document
113
	 * @param groups
114
	 *            The groups the user belongs to
115
	 * 
116
	 * @return sysMeta The system metadata object created
117
	 * @throws SAXException 
118
	 * @throws HandlerException 
119
	 * @throws AccessControlException 
120
	 * @throws AccessException 
121
	 */
122
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
123
			throws McdbException, McdbDocNotFoundException, SQLException,
124
			IOException, AccessionNumberException, ClassNotFoundException,
125
			InsufficientKarmaException, ParseLSIDException,
126
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
127
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
128
		
129
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
130

    
131
		// check for system metadata
132
		SystemMetadata sysMeta = null;
133
		
134
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
135
		int rev = Integer.valueOf(accNum.getRev());
136
		
137
		// get/make the guid
138
		String guid = null;
139
		try {
140
			// get the guid if it exists
141
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
142
		} catch (McdbDocNotFoundException dnfe) {
143
			// otherwise create the mapping
144
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
145
			IdentifierManager.getInstance().createMapping(localId, localId);
146
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
147
		}
148
		
149
		// look up existing system metadata if it exists
150
		Identifier identifier = new Identifier();
151
		identifier.setValue(guid);
152
		try {
153
			logMetacat.debug("Using hazelcast to get system metadata");
154
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
155
			// TODO: if this is the case, we could return here -- what else do we gain?
156
			if (!updateExisting ) {
157
				return sysMeta;
158
			}
159
		} catch (Exception e) {
160
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
161

    
162
		}
163

    
164
		if (sysMeta == null) {
165
			// create system metadata
166
			sysMeta = new SystemMetadata();
167
			sysMeta.setIdentifier(identifier);
168
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
169
			sysMeta.setArchived(false);
170
		}
171
		
172
		// get additional docinfo
173
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
174
		// set the default object format
175
		String doctype = docInfo.get("doctype");
176
		ObjectFormatIdentifier fmtid = null;
177

    
178
		// set the object format, fall back to defaults
179
		if (doctype.trim().equals("BIN")) {
180
			// we don't know much about this file (yet)
181
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
182
		} else if (doctype.trim().equals("metadata")) {
183
			// special ESRI FGDC format
184
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
185
		} else {
186
			try {
187
				// do we know the given format?
188
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
189
			} catch (NotFound nfe) {
190
				// format is not registered, use default
191
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
192
			}
193
		}
194

    
195
		sysMeta.setFormatId(fmtid);
196
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
197

    
198
		// for retrieving the actual object
199
		InputStream inputStream = null;
200
		inputStream = MetacatHandler.read(localId);
201

    
202
		// create the checksum
203
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
204
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
205
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
206
		sysMeta.setChecksum(checksum);
207
		
208
		// set the size from file on disk, don't read bytes again
209
		File fileOnDisk = getFileOnDisk(localId);
210
		long fileSize = 0;
211
		if (fileOnDisk.exists()) {
212
			fileSize = fileOnDisk.length();
213
		}
214
		sysMeta.setSize(BigInteger.valueOf(fileSize));
215
		
216
		// submitter
217
		Subject submitter = new Subject();
218
		submitter.setValue(docInfo.get("user_updated"));
219
		sysMeta.setSubmitter(submitter);
220
		
221
		// rights holder
222
		Subject owner = new Subject();
223
		owner.setValue(docInfo.get("user_owner"));
224
		sysMeta.setRightsHolder(owner);
225

    
226
		// dates
227
		String createdDateString = docInfo.get("date_created");
228
		String updatedDateString = docInfo.get("date_updated");
229
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
230
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
231
		sysMeta.setDateUploaded(createdDate);
232
		//sysMeta.setDateSysMetadataModified(updatedDate);
233
		// use current datetime 
234
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
235
		
236
		// set the revision history
237
		String docidWithoutRev = accNum.getDocid();
238
		Identifier obsoletedBy = null;
239
		Identifier obsoletes = null;
240
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
241
		// ensure this ordering since processing depends on it
242
		Collections.sort(revisions);
243
		for (int existingRev: revisions) {
244
			// use the docid+rev as the guid
245
			String existingPid = docidWithoutRev + "." + existingRev;
246
			try {
247
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
248
			} catch (McdbDocNotFoundException mdfe) {
249
				// we'll be defaulting to the local id
250
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
251
			}
252
			if (existingRev < rev) {
253
				// it's the old docid, until it's not
254
				obsoletes = new Identifier();
255
				obsoletes.setValue(existingPid);
256
			}
257
			if (existingRev > rev) {
258
				// it's the newer docid
259
				obsoletedBy = new Identifier();
260
				obsoletedBy.setValue(existingPid);
261
				// only want the version just after it
262
				break;
263
			}
264
		}
265
		// set them on our object
266
		sysMeta.setObsoletedBy(obsoletedBy);
267
		sysMeta.setObsoletes(obsoletes);
268
		
269
		// update the system metadata for the object[s] we are revising
270
		if (obsoletedBy != null) {
271
			SystemMetadata obsoletedBySysMeta = null;
272
			try {
273
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
274
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
275
			} catch (McdbDocNotFoundException e) {
276
				// ignore
277
			}
278
			if (obsoletedBySysMeta != null) {
279
				obsoletedBySysMeta.setObsoletes(identifier);
280
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
281
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
282
			}
283
		}
284
		if (obsoletes != null) {
285
			SystemMetadata obsoletesSysMeta = null;
286
			try {
287
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
288
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
289
			} catch (McdbDocNotFoundException e) {
290
				// ignore
291
			}
292
			if (obsoletesSysMeta != null) {
293
				obsoletesSysMeta.setObsoletedBy(identifier);
294
				obsoletesSysMeta.setArchived(true);
295
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
296
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
297
			}
298
		}
299
		
300
		// look up the access control policy we have in metacat
301
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
302
		try {
303
        List<AccessRule> allowList = accessPolicy.getAllowList();
304
        int listSize = allowList.size();
305
        sysMeta.setAccessPolicy(accessPolicy);
306
        
307
    } catch (NullPointerException npe) {
308
        logMetacat.info("The allow list is empty, can't include an empty " +
309
            "access policy in the system metadata for " + guid);
310
        
311
    }
312
		
313
		// authoritative node
314
		NodeReference nr = new NodeReference();
315
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
316
		sysMeta.setOriginMemberNode(nr);
317
		sysMeta.setAuthoritativeMemberNode(nr);
318
		
319
		// Set a default replication policy
320
        ReplicationPolicy rp = getDefaultReplicationPolicy();
321
        if (rp != null) {
322
            sysMeta.setReplicationPolicy(rp);
323
        }
324
		
325
		// further parse EML documents to get data object format,
326
		// describes and describedBy information
327
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
328
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
329
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
330
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
331
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
332
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
333
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
334
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
335

    
336
			try {
337
				
338
				// get it again to parse the document
339
				logMetacat.debug("Re-reading document inputStream");
340
				inputStream = MetacatHandler.read(localId);
341
				
342
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
343
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
344
				
345
				// iterate through the data objects in the EML doc and add sysmeta
346
				logMetacat.debug("In createSystemMetadata() the number of data "
347
								+ "entities is: "
348
								+ emlDocument.distributionMetadata);
349

    
350
				// for generating the ORE map
351
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
352
	            List<Identifier> dataIds = new ArrayList<Identifier>();
353
				
354
				// iterate through data objects described by the EML
355
	            if (emlDocument.distributionMetadata != null) {
356
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
357
	
358
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
359
				        String dataDocUrl = distMetadata.url;
360
				        String dataDocMimeType = distMetadata.mimeType;
361
						// default to binary
362
						if (dataDocMimeType == null) {
363
							dataDocMimeType = "application/octet-stream";
364
						}
365

    
366
						// process the data
367
						boolean remoteData = false;
368
						String dataDocLocalId = null;
369
						Identifier dataGuid = new Identifier();
370

    
371
						// handle ecogrid, or downloadable data
372
						String ecogridPrefix = "ecogrid://knb/";
373
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
374
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
375
						} else {
376
							// should we try downloading the remote data?
377
							if (downloadData) {
378
								InputStream dataObject = null;
379
								try {
380
									// download the data from the URL
381
									URL dataURL = new URL(dataDocUrl);
382
									URLConnection dataConnection = dataURL.openConnection();
383
									
384
									// default is to download the data
385
									dataObject = dataConnection.getInputStream();
386

    
387
									String detectedContentType = dataConnection.getContentType();
388
									logMetacat.info("Detected content type: " + detectedContentType);
389

    
390
									if (detectedContentType != null) {
391
										// seems to be HTML from the remote location
392
										if (detectedContentType.contains("html")) {
393
											// if we are not expecting it, we skip it
394
											if (!dataDocMimeType.contains("html")) {
395
												// set to null so we don't download it
396
												dataObject = null;
397
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
398
											}
399
										}
400
										
401
									} else {
402
										// if we don't know what it is, should we skip it?
403
										dataObject = null;
404
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
405
									}
406
									
407
								} catch (Exception e) {
408
									// error with the download
409
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
410
								}
411
								
412
								if (dataObject != null) {
413
									// create the local version of it
414
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
415
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
416
									dataGuid.setValue(dataDocLocalId);
417
									
418
									// save it locally
419
									Session session = new Session();
420
									session.setSubject(submitter);
421
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
422
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
423
									
424
									remoteData = true;
425
								}
426
							}
427
							
428
						}
429
						
430
						logMetacat.debug("Data local ID: " + dataDocLocalId);
431
						logMetacat.debug("Data URL     : " + dataDocUrl);
432
						logMetacat.debug("Data mime    : " + dataDocMimeType);
433
						
434
						// check for valid docid.rev
435
						String dataDocid = null;
436
						int dataRev = 0;
437
						if (dataDocLocalId != null) {
438
							// look up the guid for the data
439
							try {
440
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
441
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
442
							} catch (Exception e) {
443
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
444
								dataDocLocalId = null;
445
							}
446
						}
447
						
448
						// now we have a local id for the data
449
						if (dataDocLocalId != null) {
450
	
451
							// check if data system metadata exists already
452
							SystemMetadata dataSysMeta = null;
453
							String dataGuidString = null;
454
							try {
455
								// look for the identifier
456
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
457
								// set it
458
								dataGuid.setValue(dataGuidString);
459
								// look up the system metadata
460
								try {
461
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
462
								} catch (Exception e) {
463
									// probably not in the system
464
									dataSysMeta = null;
465
								}
466
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
467
							} catch (McdbDocNotFoundException nf) {
468
								// we didn't find it
469
								dataSysMeta = null;
470
							}
471
								
472
							// we'll have to generate it	
473
							if (dataSysMeta == null) {
474
								// System metadata for data doesn't exist yet, so create it
475
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
476
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
477

    
478
								// now look it up again
479
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
480

    
481
								// set the guid
482
								dataGuid.setValue(dataGuidString);
483
								
484
								// inherit access rules from metadata, if we don't have our own
485
								if (remoteData) {
486
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
487
									// TODO: use access rules defined in EML, per data file
488
								}
489
	
490
							}
491
							
492
							// set object format for the data file
493
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
494
							ObjectFormatIdentifier fmt = null;
495
							try {
496
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
497
							} catch (NotFound nfe) {
498
								logMetacat.debug("Couldn't find format identifier for: "
499
												+ dataDocMimeType
500
												+ ". Setting it to application/octet-stream.");
501
								fmt = new ObjectFormatIdentifier();
502
								fmt.setValue("application/octet-stream");
503
							}
504
							dataSysMeta.setFormatId(fmt);
505

    
506
							// update the values
507
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
508
							
509
							// include as part of the ORE package
510
							dataIds.add(dataGuid);
511
	
512
						} // end if (EML package)
513
	
514
					} // end for (data entities)
515
					
516
	            } // data entities not null
517
	            
518
				// ORE map
519
				if (includeORE) {
520
					// can we generate them?
521
			        if (!dataIds.isEmpty()) {
522
			        	// it doesn't exist in the system?
523
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
524
			        	
525
				            // generate the ORE map for this datapackage
526
				            Identifier resourceMapId = new Identifier();
527
				            // use the local id, not the guid in case we have DOIs for them already
528
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
529
				            idMap.put(sysMeta.getIdentifier(), dataIds);
530
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
531
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
532
				            // copy most of the same system metadata as the packaging metadata
533
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
534
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
535
				            resourceMapSysMeta.setIdentifier(resourceMapId);
536
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
537
							resourceMapSysMeta.setChecksum(oreChecksum);
538
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
539
							resourceMapSysMeta.setFormatId(formatId);
540
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
541
							
542
							// set the revision graph
543
							resourceMapSysMeta.setObsoletes(null);
544
							resourceMapSysMeta.setObsoletedBy(null);
545
							// look up the resource map that this one obsoletes
546
							if (sysMeta.getObsoletes() != null) {
547
								// use the localId in case we have a DOI
548
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
549
								Identifier resourceMapObsoletes = new Identifier();
550
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
551
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
552
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
553
								if (resourceMapObsoletesSystemMetadata != null) {
554
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
555
									resourceMapObsoletesSystemMetadata.setArchived(true);
556
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
557
								}
558
							}
559
							// look up the resource map that this one is obsoletedBy
560
							if (sysMeta.getObsoletedBy() != null) {
561
								// use the localId in case we have a DOI
562
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
563
								Identifier resourceMapObsoletedBy = new Identifier();
564
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
565
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
566
								resourceMapSysMeta.setArchived(true);
567
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
568
								if (resourceMapObsoletedBySystemMetadata != null) {
569
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
570
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
571
								}
572
							}
573
				            
574
							// save it locally, if it doesn't already exist
575
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
576
								Session session = new Session();
577
								session.setSubject(submitter);
578
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
579
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
580
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
581
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
582
							}
583
			        	}
584
			        }
585
				}
586

    
587
			} catch (ParserConfigurationException pce) {
588
				logMetacat.debug("There was a problem parsing the EML document. "
589
								+ "The error message was: " + pce.getMessage());
590

    
591
			} catch (SAXException saxe) {
592
				logMetacat.debug("There was a problem traversing the EML document. "
593
								+ "The error message was: " + saxe.getMessage());
594

    
595
			} catch (XPathExpressionException xpee) {
596
				logMetacat.debug("There was a problem searching the EML document. "
597
								+ "The error message was: " + xpee.getMessage());
598
			} catch (Exception e) {
599
				logMetacat.debug("There was a problem creating System Metadata. "
600
								+ "The error message was: " + e.getMessage());
601
				e.printStackTrace();
602
			} // end try()
603

    
604
		} // end if()
605

    
606
		return sysMeta;
607
	}
608

    
609
	/**
610
	 * Checks for potential ORE object existence 
611
	 * @param identifier
612
	 * @return
613
	 */
614
    public static boolean oreExistsFor(Identifier identifier) {
615
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
616
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier, true);
617
		return (ids != null && ids.size() > 0);
618
	}
619

    
620
	/**
621
     * Generate SystemMetadata for any object in the object store that does
622
     * not already have it.  SystemMetadata documents themselves, are, of course,
623
     * exempt.  This is a utility method for migration of existing object 
624
     * stores to DataONE where SystemMetadata is required for all objects.
625
     * @param idList
626
     * @param includeOre
627
     * @param downloadData
628
     * @throws PropertyNotFoundException
629
     * @throws NoSuchAlgorithmException
630
     * @throws AccessionNumberException
631
     * @throws SQLException
632
	 * @throws SAXException 
633
	 * @throws HandlerException 
634
	 * @throws JiBXException 
635
	 * @throws BaseException 
636
	 * @throws ParseLSIDException 
637
	 * @throws InsufficientKarmaException 
638
	 * @throws ClassNotFoundException 
639
	 * @throws IOException 
640
	 * @throws McdbException 
641
	 * @throws AccessException 
642
	 * @throws AccessControlException 
643
     */
644
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
645
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
646
    {
647
        
648
        for (String localId : idList) { 
649
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
650
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
651

    
652
            SystemMetadata sm = null;
653

    
654
            //generate required system metadata fields from the document
655
            try {
656
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
657
            } catch (Exception e) {
658
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
659
				continue;
660
			}
661
            
662
            //insert the systemmetadata object or just update it as needed
663
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
664
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
665
            
666
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
667

    
668
        }
669
        logMetacat.info("done generating system metadata for given list");
670
    }
671

    
672
	/**
673
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
674
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
675
	 * evaluated.
676
	 * 
677
	 * @param is The InputStream of bytes
678
	 * 
679
	 * @return size The size in bytes of the input stream as a long
680
	 * 
681
	 * @throws IOException
682
	 */
683
	public static long sizeOfStream(InputStream is) throws IOException {
684

    
685
		long size = 0;
686
		byte[] b = new byte[1024];
687
		int numread = is.read(b, 0, 1024);
688
		while (numread != -1) {
689
			size += numread;
690
			numread = is.read(b, 0, 1024);
691
		}
692
		return size;
693

    
694
	}
695
	
696
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
697
		
698
		DocumentImpl doc = new DocumentImpl(docid, false);
699
		String filepath = null;
700
		String filename = null;
701

    
702
		// deal with data or metadata cases
703
		if (doc.getRootNodeID() == 0) {
704
			// this is a data file
705
			filepath = PropertyService.getProperty("application.datafilepath");
706
		} else {
707
			filepath = PropertyService.getProperty("application.documentfilepath");
708
		}
709
		// ensure it is a directory path
710
		if (!(filepath.endsWith("/"))) {
711
			filepath += "/";
712
		}
713
		filename = filepath + docid;
714
		File documentFile = new File(filename);
715
		
716
		return documentFile;
717
	}
718

    
719
	/**
720
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
721
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
722
	 * than or equal to zero, no policy needs to be set, so return null.
723
	 * @return ReplicationPolicy, or null if no replication policy is needed
724
	 */
725
    private static ReplicationPolicy getDefaultReplicationPolicy() {
726
        ReplicationPolicy rp = null;
727
        int numReplicas = -1;
728
        try {
729
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
730
        } catch (NumberFormatException e) {
731
            // The property is not a valid integer, so return a null policy
732
            return null;
733
        } catch (PropertyNotFoundException e) {
734
            // The property is not found, so return a null policy
735
            return null;
736
        }
737
        
738
        if (numReplicas > 0) {
739
            rp = new ReplicationPolicy();
740
            rp.setReplicationAllowed(true);
741
            rp.setNumberReplicas(numReplicas);
742
            try {
743
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
744
                if (preferredNodeList != null) {
745
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
746
                    if (pNodes != null && !pNodes.isEmpty()) {
747
                        rp.setPreferredMemberNodeList(pNodes);
748
                    }
749
                }
750
            } catch (PropertyNotFoundException e) {
751
                // No preferred list found in properties, so just ignore it; no action needed
752
            }
753
            try {
754
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
755
                if (blockedNodeList != null) {
756
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
757
                    if (bNodes != null && !bNodes.isEmpty()) {
758
                        rp.setBlockedMemberNodeList(bNodes);
759
                    }
760
                }
761
            } catch (PropertyNotFoundException e) {
762
                // No blocked list found in properties, so just ignore it; no action needed
763
            }
764
        }
765
        return rp;
766
    }
767

    
768
    /**
769
     * Extract a List of NodeReferences from a String listing the node identifiers where
770
     * each identifier is separated by whitespace, comma, or semicolon characters.
771
     * @param nodeString the string containing the list of nodes
772
     * @return the List of NodeReference objects parsed from the input string
773
     */
774
    private static List<NodeReference> extractNodeReferences(String nodeString) {
775
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
776
        String[] result = nodeString.split("[,;\\s]");
777
        for (String r : result) {
778
        	if (r != null && r.length() > 0) {
779
	            NodeReference noderef = new NodeReference();
780
	            noderef.setValue(r);
781
	            nodeList.add(noderef);
782
	        }
783
        }
784
        return nodeList;
785
    }
786
}
(6-6/6)