Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2012-05-31 21:04:46 -0700 (Thu, 31 May 2012) $'
10
 * '$Revision: 7222 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.MockHttpServletRequest;
52
import org.dataone.client.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.ore.ResourceMapFactory;
57
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59
import org.dataone.service.types.v1.AccessPolicy;
60
import org.dataone.service.types.v1.AccessRule;
61
import org.dataone.service.types.v1.Checksum;
62
import org.dataone.service.types.v1.Identifier;
63
import org.dataone.service.types.v1.NodeReference;
64
import org.dataone.service.types.v1.ObjectFormatIdentifier;
65
import org.dataone.service.types.v1.ReplicationPolicy;
66
import org.dataone.service.types.v1.Session;
67
import org.dataone.service.types.v1.Subject;
68
import org.dataone.service.types.v1.SystemMetadata;
69
import org.dataone.service.types.v1.util.ChecksumUtil;
70
import org.dataone.service.util.DateTimeMarshaller;
71
import org.dspace.foresite.ResourceMap;
72
import org.jibx.runtime.JiBXException;
73
import org.xml.sax.SAXException;
74

    
75
import java.util.Calendar;
76

    
77
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79
import edu.ucsb.nceas.metacat.DBUtil;
80
import edu.ucsb.nceas.metacat.DocumentImpl;
81
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84
import edu.ucsb.nceas.metacat.MetaCatServlet;
85
import edu.ucsb.nceas.metacat.MetacatHandler;
86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.properties.PropertyService;
90
import edu.ucsb.nceas.metacat.replication.ReplicationService;
91
import edu.ucsb.nceas.metacat.shared.AccessException;
92
import edu.ucsb.nceas.metacat.shared.HandlerException;
93
import edu.ucsb.nceas.metacat.util.DocumentUtil;
94
import edu.ucsb.nceas.utilities.ParseLSIDException;
95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
96

    
97
public class SystemMetadataFactory {
98

    
99
	private static final String resourceMapPrefix = "resourceMap_";
100
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
101
	/**
102
	 * use this flag if you want to update any existing system metadata values with generated content
103
	 */
104
	private static boolean updateExisting = true;
105
	
106
	/**
107
	 * Creates a system metadata object for insertion into metacat
108
	 * 
109
	 * @param localId
110
	 *            The local document identifier
111
	 * @param user
112
	 *            The user submitting the system metadata document
113
	 * @param groups
114
	 *            The groups the user belongs to
115
	 * 
116
	 * @return sysMeta The system metadata object created
117
	 * @throws SAXException 
118
	 * @throws HandlerException 
119
	 * @throws AccessControlException 
120
	 * @throws AccessException 
121
	 */
122
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
123
			throws McdbException, McdbDocNotFoundException, SQLException,
124
			IOException, AccessionNumberException, ClassNotFoundException,
125
			InsufficientKarmaException, ParseLSIDException,
126
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
127
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
128
		
129
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
130

    
131
		// check for system metadata
132
		SystemMetadata sysMeta = null;
133
		
134
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
135
		int rev = Integer.valueOf(accNum.getRev());
136
		
137
		// get/make the guid
138
		String guid = null;
139
		try {
140
			// get the guid if it exists
141
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
142
		} catch (McdbDocNotFoundException dnfe) {
143
			// otherwise create the mapping
144
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
145
			IdentifierManager.getInstance().createMapping(localId, localId);
146
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
147
		}
148
		
149
		// look up existing system metadata if it exists
150
		Identifier identifier = new Identifier();
151
		identifier.setValue(guid);
152
		try {
153
			logMetacat.debug("Using hazelcast to get system metadata");
154
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
155
			// TODO: if this is the case, we could return here -- what else do we gain?
156
			if (!updateExisting ) {
157
				return sysMeta;
158
			}
159
		} catch (Exception e) {
160
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
161

    
162
		}
163

    
164
		if (sysMeta == null) {
165
			// create system metadata
166
			sysMeta = new SystemMetadata();
167
			sysMeta.setIdentifier(identifier);
168
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
169
			sysMeta.setArchived(false);
170
		}
171
		
172
		// get additional docinfo
173
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
174
		// set the default object format
175
		String doctype = docInfo.get("doctype");
176
		ObjectFormatIdentifier fmtid = null;
177

    
178
		// set the object format, fall back to defaults
179
		if (doctype.trim().equals("BIN")) {
180
			// we don't know much about this file (yet)
181
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
182
		} else {
183
			try {
184
				// do we know the given format?
185
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
186
			} catch (NotFound nfe) {
187
				// format is not registered, use default
188
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
189
			}
190
		}
191

    
192
		sysMeta.setFormatId(fmtid);
193
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
194

    
195
		// for retrieving the actual object
196
		InputStream inputStream = null;
197
		inputStream = MetacatHandler.read(localId);
198

    
199
		// create the checksum
200
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
201
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
202
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
203
		sysMeta.setChecksum(checksum);
204
		
205
		// set the size from file on disk, don't read bytes again
206
		File fileOnDisk = getFileOnDisk(localId);
207
		long fileSize = 0;
208
		if (fileOnDisk.exists()) {
209
			fileSize = fileOnDisk.length();
210
		}
211
		sysMeta.setSize(BigInteger.valueOf(fileSize));
212
		
213
		// submitter
214
		Subject submitter = new Subject();
215
		submitter.setValue(docInfo.get("user_updated"));
216
		sysMeta.setSubmitter(submitter);
217
		
218
		// rights holder
219
		Subject owner = new Subject();
220
		owner.setValue(docInfo.get("user_owner"));
221
		sysMeta.setRightsHolder(owner);
222

    
223
		// dates
224
		String createdDateString = docInfo.get("date_created");
225
		String updatedDateString = docInfo.get("date_updated");
226
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
227
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
228
		sysMeta.setDateUploaded(createdDate);
229
		//sysMeta.setDateSysMetadataModified(updatedDate);
230
		// use current datetime 
231
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
232
		
233
		// set the revision history
234
		String docidWithoutRev = accNum.getDocid();
235
		Identifier obsoletedBy = null;
236
		Identifier obsoletes = null;
237
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
238
		// ensure this ordering since processing depends on it
239
		Collections.sort(revisions);
240
		for (int existingRev: revisions) {
241
			// use the docid+rev as the guid
242
			String existingPid = docidWithoutRev + "." + existingRev;
243
			try {
244
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
245
			} catch (McdbDocNotFoundException mdfe) {
246
				// we'll be defaulting to the local id
247
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
248
			}
249
			if (existingRev < rev) {
250
				// it's the old docid, until it's not
251
				obsoletes = new Identifier();
252
				obsoletes.setValue(existingPid);
253
			}
254
			if (existingRev > rev) {
255
				// it's the newer docid
256
				obsoletedBy = new Identifier();
257
				obsoletedBy.setValue(existingPid);
258
				// only want the version just after it
259
				break;
260
			}
261
		}
262
		// set them on our object
263
		sysMeta.setObsoletedBy(obsoletedBy);
264
		sysMeta.setObsoletes(obsoletes);
265
		
266
		// update the system metadata for the object[s] we are revising
267
		if (obsoletedBy != null) {
268
			SystemMetadata obsoletedBySysMeta = null;
269
			try {
270
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
271
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
272
			} catch (McdbDocNotFoundException e) {
273
				// ignore
274
			}
275
			if (obsoletedBySysMeta != null) {
276
				obsoletedBySysMeta.setObsoletes(identifier);
277
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
278
			}
279
		}
280
		if (obsoletes != null) {
281
			SystemMetadata obsoletesSysMeta = null;
282
			try {
283
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
284
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
285
			} catch (McdbDocNotFoundException e) {
286
				// ignore
287
			}
288
			if (obsoletesSysMeta != null) {
289
				obsoletesSysMeta.setObsoletedBy(identifier);
290
				obsoletesSysMeta.setArchived(true);
291
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
292
			}
293
		}
294
		
295
		// look up the access control policy we have in metacat
296
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
297
		try {
298
        List<AccessRule> allowList = accessPolicy.getAllowList();
299
        int listSize = allowList.size();
300
        sysMeta.setAccessPolicy(accessPolicy);
301
        
302
    } catch (NullPointerException npe) {
303
        logMetacat.info("The allow list is empty, can't include an empty " +
304
            "access policy in the system metadata for " + guid);
305
        
306
    }
307
		
308
		// authoritative node
309
		NodeReference nr = new NodeReference();
310
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
311
		sysMeta.setOriginMemberNode(nr);
312
		sysMeta.setAuthoritativeMemberNode(nr);
313
		
314
		// Set a default replication policy
315
        ReplicationPolicy rp = getDefaultReplicationPolicy();
316
        if (rp != null) {
317
            sysMeta.setReplicationPolicy(rp);
318
        }
319
		
320
		// further parse EML documents to get data object format,
321
		// describes and describedBy information
322
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
323
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
324
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
325
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
326
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
327
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
328
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
329
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
330

    
331
			try {
332
				
333
				// get it again to parse the document
334
				logMetacat.debug("Re-reading document inputStream");
335
				inputStream = MetacatHandler.read(localId);
336
				
337
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
338
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
339
				
340
				// iterate through the data objects in the EML doc and add sysmeta
341
				logMetacat.debug("In createSystemMetadata() the number of data "
342
								+ "entities is: "
343
								+ emlDocument.distributionMetadata);
344

    
345
				// for generating the ORE map
346
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
347
	            List<Identifier> dataIds = new ArrayList<Identifier>();
348
				
349
				// iterate through data objects described by the EML
350
	            if (emlDocument.distributionMetadata != null) {
351
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
352
	
353
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
354
				        String dataDocUrl = distMetadata.url;
355
				        String dataDocMimeType = distMetadata.mimeType;
356
						// default to binary
357
						if (dataDocMimeType == null) {
358
							dataDocMimeType = "application/octet-stream";
359
						}
360

    
361
						// process the data
362
						boolean remoteData = false;
363
						String dataDocLocalId = null;
364
						Identifier dataGuid = new Identifier();
365

    
366
						// handle ecogrid, or downloadable data
367
						String ecogridPrefix = "ecogrid://knb/";
368
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
369
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
370
						} else {
371
							// should we try downloading the remote data?
372
							if (downloadData) {
373
								InputStream dataObject = null;
374
								try {
375
									// download the data from the URL
376
									URL dataURL = new URL(dataDocUrl);
377
									URLConnection dataConnection = dataURL.openConnection();
378
									
379
									// default is to download the data
380
									dataObject = dataConnection.getInputStream();
381

    
382
									String detectedContentType = dataConnection.getContentType();
383
									logMetacat.info("Detected content type: " + detectedContentType);
384

    
385
									if (detectedContentType != null) {
386
										// seems to be HTML from the remote location
387
										if (detectedContentType.contains("html")) {
388
											// if we are not expecting it, we skip it
389
											if (!dataDocMimeType.contains("html")) {
390
												// set to null so we don't download it
391
												dataObject = null;
392
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
393
											}
394
										}
395
										
396
									} else {
397
										// if we don't know what it is, should we skip it?
398
										dataObject = null;
399
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
400
									}
401
									
402
								} catch (Exception e) {
403
									// error with the download
404
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
405
								}
406
								
407
								if (dataObject != null) {
408
									// create the local version of it
409
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
410
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
411
									dataGuid.setValue(dataDocLocalId);
412
									
413
									// save it locally
414
									Session session = new Session();
415
									session.setSubject(submitter);
416
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
417
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
418
									
419
									remoteData = true;
420
								}
421
							}
422
							
423
						}
424
						
425
						logMetacat.debug("Data local ID: " + dataDocLocalId);
426
						logMetacat.debug("Data URL     : " + dataDocUrl);
427
						logMetacat.debug("Data mime    : " + dataDocMimeType);
428
						
429
						// check for valid docid.rev
430
						String dataDocid = null;
431
						int dataRev = 0;
432
						if (dataDocLocalId != null) {
433
							// look up the guid for the data
434
							try {
435
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
436
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
437
							} catch (Exception e) {
438
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
439
								dataDocLocalId = null;
440
							}
441
						}
442
						
443
						// now we have a local id for the data
444
						if (dataDocLocalId != null) {
445
	
446
							// check if data system metadata exists already
447
							SystemMetadata dataSysMeta = null;
448
							String dataGuidString = null;
449
							try {
450
								// look for the identifier
451
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
452
								// set it
453
								dataGuid.setValue(dataGuidString);
454
								// look up the system metadata
455
								try {
456
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
457
								} catch (Exception e) {
458
									// probably not in the system
459
									dataSysMeta = null;
460
								}
461
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
462
							} catch (McdbDocNotFoundException nf) {
463
								// we didn't find it
464
								dataSysMeta = null;
465
							}
466
								
467
							// we'll have to generate it	
468
							if (dataSysMeta == null) {
469
								// System metadata for data doesn't exist yet, so create it
470
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
471
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
472

    
473
								// now look it up again
474
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
475

    
476
								// set the guid
477
								dataGuid.setValue(dataGuidString);
478
								
479
								// inherit access rules from metadata, if we don't have our own
480
								if (remoteData) {
481
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
482
									// TODO: use access rules defined in EML, per data file
483
								}
484
	
485
							}
486
							
487
							// set object format for the data file
488
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
489
							ObjectFormatIdentifier fmt = null;
490
							try {
491
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
492
							} catch (NotFound nfe) {
493
								logMetacat.debug("Couldn't find format identifier for: "
494
												+ dataDocMimeType
495
												+ ". Setting it to application/octet-stream.");
496
								fmt = new ObjectFormatIdentifier();
497
								fmt.setValue("application/octet-stream");
498
							}
499
							dataSysMeta.setFormatId(fmt);
500

    
501
							// update the values
502
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
503
							
504
							// include as part of the ORE package
505
							dataIds.add(dataGuid);
506
	
507
						} // end if (EML package)
508
	
509
					} // end for (data entities)
510
					
511
	            } // data entities not null
512
	            
513
				// ORE map
514
				if (includeORE) {
515
					// can we generate them?
516
			        if (!dataIds.isEmpty()) {
517
			        	// it doesn't exist in the system?
518
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
519
			        	
520
				            // generate the ORE map for this datapackage
521
				            Identifier resourceMapId = new Identifier();
522
				            // use the local id, not the guid in case we have DOIs for them already
523
				            resourceMapId.setValue(resourceMapPrefix + localId);
524
				            idMap.put(sysMeta.getIdentifier(), dataIds);
525
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
526
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
527
				            // copy most of the same system metadata as the packaging metadata
528
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
529
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
530
				            resourceMapSysMeta.setIdentifier(resourceMapId);
531
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
532
							resourceMapSysMeta.setChecksum(oreChecksum);
533
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
534
							resourceMapSysMeta.setFormatId(formatId);
535
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
536
							
537
							// set the revision graph
538
							resourceMapSysMeta.setObsoletes(null);
539
							resourceMapSysMeta.setObsoletedBy(null);
540
							// look up the resource map that this one obsoletes
541
							if (sysMeta.getObsoletes() != null) {
542
								Identifier resourceMapObsoletes = new Identifier();
543
								resourceMapObsoletes.setValue(resourceMapPrefix + sysMeta.getObsoletes().getValue());
544
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
545
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
546
								if (resourceMapObsoletesSystemMetadata != null) {
547
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
548
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
549
								}
550
							}
551
							// look up the resource map that this one is obsoletedBy
552
							if (sysMeta.getObsoletedBy() != null) {
553
								Identifier resourceMapObsoletedBy = new Identifier();
554
								resourceMapObsoletedBy.setValue(resourceMapPrefix + sysMeta.getObsoletedBy().getValue());
555
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
556
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
557
								if (resourceMapObsoletedBySystemMetadata != null) {
558
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
559
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
560
								}
561
							}
562
				            
563
							// save it locally, if it doesn't already exist
564
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
565
								Session session = new Session();
566
								session.setSubject(submitter);
567
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
568
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
569
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
570
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
571
							}
572
			        	}
573
			        }
574
				}
575

    
576
			} catch (ParserConfigurationException pce) {
577
				logMetacat.debug("There was a problem parsing the EML document. "
578
								+ "The error message was: " + pce.getMessage());
579

    
580
			} catch (SAXException saxe) {
581
				logMetacat.debug("There was a problem traversing the EML document. "
582
								+ "The error message was: " + saxe.getMessage());
583

    
584
			} catch (XPathExpressionException xpee) {
585
				logMetacat.debug("There was a problem searching the EML document. "
586
								+ "The error message was: " + xpee.getMessage());
587
			} catch (Exception e) {
588
				logMetacat.debug("There was a problem creating System Metadata. "
589
								+ "The error message was: " + e.getMessage());
590
				e.printStackTrace();
591
			} // end try()
592

    
593
		} // end if()
594

    
595
		return sysMeta;
596
	}
597

    
598
    /**
599
     * Generate SystemMetadata for any object in the object store that does
600
     * not already have it.  SystemMetadata documents themselves, are, of course,
601
     * exempt.  This is a utility method for migration of existing object 
602
     * stores to DataONE where SystemMetadata is required for all objects.
603
     * @param idList
604
     * @param includeOre
605
     * @param downloadData
606
     * @throws PropertyNotFoundException
607
     * @throws NoSuchAlgorithmException
608
     * @throws AccessionNumberException
609
     * @throws SQLException
610
	 * @throws SAXException 
611
	 * @throws HandlerException 
612
	 * @throws JiBXException 
613
	 * @throws BaseException 
614
	 * @throws ParseLSIDException 
615
	 * @throws InsufficientKarmaException 
616
	 * @throws ClassNotFoundException 
617
	 * @throws IOException 
618
	 * @throws McdbException 
619
	 * @throws AccessException 
620
	 * @throws AccessControlException 
621
     */
622
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
623
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
624
    {
625
        
626
        for (String localId : idList) { 
627
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
628
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
629

    
630
            SystemMetadata sm = null;
631

    
632
            //generate required system metadata fields from the document
633
            try {
634
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
635
            } catch (Exception e) {
636
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
637
				continue;
638
			}
639
            
640
            //insert the systemmetadata object or just update it as needed
641
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
642
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
643
            
644
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
645

    
646
        }
647
        logMetacat.info("done generating system metadata for given list");
648
    }
649
    
650
	/**
651
	 * Determines if we already have registered an ORE map for this package
652
	 * @param guid of the EML/packaging object
653
	 * @return true if there is an ORE map for the given package
654
	 */
655
	private static boolean oreExistsFor(Identifier guid) {
656
		// TODO: implement call to CN.search()
657
		return false;
658
	}
659

    
660
	/**
661
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
662
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
663
	 * evaluated.
664
	 * 
665
	 * @param is The InputStream of bytes
666
	 * 
667
	 * @return size The size in bytes of the input stream as a long
668
	 * 
669
	 * @throws IOException
670
	 */
671
	private static long sizeOfStream(InputStream is) throws IOException {
672

    
673
		long size = 0;
674
		byte[] b = new byte[1024];
675
		int numread = is.read(b, 0, 1024);
676
		while (numread != -1) {
677
			size += numread;
678
			numread = is.read(b, 0, 1024);
679
		}
680
		return size;
681

    
682
	}
683
	
684
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
685
		
686
		DocumentImpl doc = new DocumentImpl(docid, false);
687
		String filepath = null;
688
		String filename = null;
689

    
690
		// deal with data or metadata cases
691
		if (doc.getRootNodeID() == 0) {
692
			// this is a data file
693
			filepath = PropertyService.getProperty("application.datafilepath");
694
		} else {
695
			filepath = PropertyService.getProperty("application.documentfilepath");
696
		}
697
		// ensure it is a directory path
698
		if (!(filepath.endsWith("/"))) {
699
			filepath += "/";
700
		}
701
		filename = filepath + docid;
702
		File documentFile = new File(filename);
703
		
704
		return documentFile;
705
	}
706

    
707
	/**
708
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
709
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
710
	 * than or equal to zero, no policy needs to be set, so return null.
711
	 * @return ReplicationPolicy, or null if no replication policy is needed
712
	 */
713
    private static ReplicationPolicy getDefaultReplicationPolicy() {
714
        ReplicationPolicy rp = null;
715
        int numReplicas = -1;
716
        try {
717
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
718
        } catch (NumberFormatException e) {
719
            // The property is not a valid integer, so return a null policy
720
            return null;
721
        } catch (PropertyNotFoundException e) {
722
            // The property is not found, so return a null policy
723
            return null;
724
        }
725
        
726
        if (numReplicas > 0) {
727
            rp = new ReplicationPolicy();
728
            rp.setReplicationAllowed(true);
729
            rp.setNumberReplicas(numReplicas);
730
            try {
731
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
732
                if (preferredNodeList != null) {
733
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
734
                    if (pNodes != null && !pNodes.isEmpty()) {
735
                        rp.setPreferredMemberNodeList(pNodes);
736
                    }
737
                }
738
            } catch (PropertyNotFoundException e) {
739
                // No preferred list found in properties, so just ignore it; no action needed
740
            }
741
            try {
742
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
743
                if (blockedNodeList != null) {
744
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
745
                    if (bNodes != null && !bNodes.isEmpty()) {
746
                        rp.setBlockedMemberNodeList(bNodes);
747
                    }
748
                }
749
            } catch (PropertyNotFoundException e) {
750
                // No blocked list found in properties, so just ignore it; no action needed
751
            }
752
        }
753
        return rp;
754
    }
755

    
756
    /**
757
     * Extract a List of NodeReferences from a String listing the node identifiers where
758
     * each identifier is separated by whitespace, comma, or semicolon characters.
759
     * @param nodeString the string containing the list of nodes
760
     * @return the List of NodeReference objects parsed from the input string
761
     */
762
    private static List<NodeReference> extractNodeReferences(String nodeString) {
763
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
764
        String[] result = nodeString.split("[,;\\s]");
765
        for (String r : result) {
766
        	if (r != null && r.length() > 0) {
767
	            NodeReference noderef = new NodeReference();
768
	            noderef.setValue(r);
769
	            nodeList.add(noderef);
770
	        }
771
        }
772
        return nodeList;
773
    }
774
}
(5-5/5)