Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2012-06-18 15:43:47 -0700 (Mon, 18 Jun 2012) $'
10
 * '$Revision: 7278 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.MockHttpServletRequest;
52
import org.dataone.client.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.ore.ResourceMapFactory;
57
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59
import org.dataone.service.types.v1.AccessPolicy;
60
import org.dataone.service.types.v1.AccessRule;
61
import org.dataone.service.types.v1.Checksum;
62
import org.dataone.service.types.v1.Identifier;
63
import org.dataone.service.types.v1.NodeReference;
64
import org.dataone.service.types.v1.ObjectFormatIdentifier;
65
import org.dataone.service.types.v1.ReplicationPolicy;
66
import org.dataone.service.types.v1.Session;
67
import org.dataone.service.types.v1.Subject;
68
import org.dataone.service.types.v1.SystemMetadata;
69
import org.dataone.service.types.v1.util.ChecksumUtil;
70
import org.dataone.service.util.DateTimeMarshaller;
71
import org.dspace.foresite.ResourceMap;
72
import org.jibx.runtime.JiBXException;
73
import org.xml.sax.SAXException;
74

    
75
import java.util.Calendar;
76

    
77
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79
import edu.ucsb.nceas.metacat.DBUtil;
80
import edu.ucsb.nceas.metacat.DocumentImpl;
81
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84
import edu.ucsb.nceas.metacat.MetaCatServlet;
85
import edu.ucsb.nceas.metacat.MetacatHandler;
86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.properties.PropertyService;
90
import edu.ucsb.nceas.metacat.replication.ReplicationService;
91
import edu.ucsb.nceas.metacat.shared.AccessException;
92
import edu.ucsb.nceas.metacat.shared.HandlerException;
93
import edu.ucsb.nceas.metacat.util.DocumentUtil;
94
import edu.ucsb.nceas.utilities.ParseLSIDException;
95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
96

    
97
public class SystemMetadataFactory {
98

    
99
	private static final String resourceMapPrefix = "resourceMap_";
100
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
101
	/**
102
	 * use this flag if you want to update any existing system metadata values with generated content
103
	 */
104
	private static boolean updateExisting = true;
105
	
106
	/**
107
	 * Creates a system metadata object for insertion into metacat
108
	 * 
109
	 * @param localId
110
	 *            The local document identifier
111
	 * @param user
112
	 *            The user submitting the system metadata document
113
	 * @param groups
114
	 *            The groups the user belongs to
115
	 * 
116
	 * @return sysMeta The system metadata object created
117
	 * @throws SAXException 
118
	 * @throws HandlerException 
119
	 * @throws AccessControlException 
120
	 * @throws AccessException 
121
	 */
122
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
123
			throws McdbException, McdbDocNotFoundException, SQLException,
124
			IOException, AccessionNumberException, ClassNotFoundException,
125
			InsufficientKarmaException, ParseLSIDException,
126
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
127
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
128
		
129
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
130

    
131
		// check for system metadata
132
		SystemMetadata sysMeta = null;
133
		
134
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
135
		int rev = Integer.valueOf(accNum.getRev());
136
		
137
		// get/make the guid
138
		String guid = null;
139
		try {
140
			// get the guid if it exists
141
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
142
		} catch (McdbDocNotFoundException dnfe) {
143
			// otherwise create the mapping
144
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
145
			IdentifierManager.getInstance().createMapping(localId, localId);
146
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
147
		}
148
		
149
		// look up existing system metadata if it exists
150
		Identifier identifier = new Identifier();
151
		identifier.setValue(guid);
152
		try {
153
			logMetacat.debug("Using hazelcast to get system metadata");
154
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
155
			// TODO: if this is the case, we could return here -- what else do we gain?
156
			if (!updateExisting ) {
157
				return sysMeta;
158
			}
159
		} catch (Exception e) {
160
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
161

    
162
		}
163

    
164
		if (sysMeta == null) {
165
			// create system metadata
166
			sysMeta = new SystemMetadata();
167
			sysMeta.setIdentifier(identifier);
168
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
169
			sysMeta.setArchived(false);
170
		}
171
		
172
		// get additional docinfo
173
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
174
		// set the default object format
175
		String doctype = docInfo.get("doctype");
176
		ObjectFormatIdentifier fmtid = null;
177

    
178
		// set the object format, fall back to defaults
179
		if (doctype.trim().equals("BIN")) {
180
			// we don't know much about this file (yet)
181
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
182
		} else {
183
			try {
184
				// do we know the given format?
185
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
186
			} catch (NotFound nfe) {
187
				// format is not registered, use default
188
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
189
			}
190
		}
191

    
192
		sysMeta.setFormatId(fmtid);
193
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
194

    
195
		// for retrieving the actual object
196
		InputStream inputStream = null;
197
		inputStream = MetacatHandler.read(localId);
198

    
199
		// create the checksum
200
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
201
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
202
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
203
		sysMeta.setChecksum(checksum);
204
		
205
		// set the size from file on disk, don't read bytes again
206
		File fileOnDisk = getFileOnDisk(localId);
207
		long fileSize = 0;
208
		if (fileOnDisk.exists()) {
209
			fileSize = fileOnDisk.length();
210
		}
211
		sysMeta.setSize(BigInteger.valueOf(fileSize));
212
		
213
		// submitter
214
		Subject submitter = new Subject();
215
		submitter.setValue(docInfo.get("user_updated"));
216
		sysMeta.setSubmitter(submitter);
217
		
218
		// rights holder
219
		Subject owner = new Subject();
220
		owner.setValue(docInfo.get("user_owner"));
221
		sysMeta.setRightsHolder(owner);
222

    
223
		// dates
224
		String createdDateString = docInfo.get("date_created");
225
		String updatedDateString = docInfo.get("date_updated");
226
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
227
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
228
		sysMeta.setDateUploaded(createdDate);
229
		//sysMeta.setDateSysMetadataModified(updatedDate);
230
		// use current datetime 
231
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
232
		
233
		// set the revision history
234
		String docidWithoutRev = accNum.getDocid();
235
		Identifier obsoletedBy = null;
236
		Identifier obsoletes = null;
237
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
238
		// ensure this ordering since processing depends on it
239
		Collections.sort(revisions);
240
		for (int existingRev: revisions) {
241
			// use the docid+rev as the guid
242
			String existingPid = docidWithoutRev + "." + existingRev;
243
			try {
244
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
245
			} catch (McdbDocNotFoundException mdfe) {
246
				// we'll be defaulting to the local id
247
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
248
			}
249
			if (existingRev < rev) {
250
				// it's the old docid, until it's not
251
				obsoletes = new Identifier();
252
				obsoletes.setValue(existingPid);
253
			}
254
			if (existingRev > rev) {
255
				// it's the newer docid
256
				obsoletedBy = new Identifier();
257
				obsoletedBy.setValue(existingPid);
258
				// only want the version just after it
259
				break;
260
			}
261
		}
262
		// set them on our object
263
		sysMeta.setObsoletedBy(obsoletedBy);
264
		sysMeta.setObsoletes(obsoletes);
265
		
266
		// update the system metadata for the object[s] we are revising
267
		if (obsoletedBy != null) {
268
			SystemMetadata obsoletedBySysMeta = null;
269
			try {
270
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
271
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
272
			} catch (McdbDocNotFoundException e) {
273
				// ignore
274
			}
275
			if (obsoletedBySysMeta != null) {
276
				obsoletedBySysMeta.setObsoletes(identifier);
277
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
278
			}
279
		}
280
		if (obsoletes != null) {
281
			SystemMetadata obsoletesSysMeta = null;
282
			try {
283
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
284
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
285
			} catch (McdbDocNotFoundException e) {
286
				// ignore
287
			}
288
			if (obsoletesSysMeta != null) {
289
				obsoletesSysMeta.setObsoletedBy(identifier);
290
				obsoletesSysMeta.setArchived(true);
291
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
292
			}
293
		}
294
		
295
		// look up the access control policy we have in metacat
296
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
297
		try {
298
        List<AccessRule> allowList = accessPolicy.getAllowList();
299
        int listSize = allowList.size();
300
        sysMeta.setAccessPolicy(accessPolicy);
301
        
302
    } catch (NullPointerException npe) {
303
        logMetacat.info("The allow list is empty, can't include an empty " +
304
            "access policy in the system metadata for " + guid);
305
        
306
    }
307
		
308
		// authoritative node
309
		NodeReference nr = new NodeReference();
310
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
311
		sysMeta.setOriginMemberNode(nr);
312
		sysMeta.setAuthoritativeMemberNode(nr);
313
		
314
		// Set a default replication policy
315
        ReplicationPolicy rp = getDefaultReplicationPolicy();
316
        if (rp != null) {
317
            sysMeta.setReplicationPolicy(rp);
318
        }
319
		
320
		// further parse EML documents to get data object format,
321
		// describes and describedBy information
322
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
323
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
324
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
325
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
326
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
327
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
328
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
329
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
330

    
331
			try {
332
				
333
				// get it again to parse the document
334
				logMetacat.debug("Re-reading document inputStream");
335
				inputStream = MetacatHandler.read(localId);
336
				
337
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
338
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
339
				
340
				// iterate through the data objects in the EML doc and add sysmeta
341
				logMetacat.debug("In createSystemMetadata() the number of data "
342
								+ "entities is: "
343
								+ emlDocument.distributionMetadata);
344

    
345
				// for generating the ORE map
346
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
347
	            List<Identifier> dataIds = new ArrayList<Identifier>();
348
				
349
				// iterate through data objects described by the EML
350
	            if (emlDocument.distributionMetadata != null) {
351
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
352
	
353
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
354
				        String dataDocUrl = distMetadata.url;
355
				        String dataDocMimeType = distMetadata.mimeType;
356
						// default to binary
357
						if (dataDocMimeType == null) {
358
							dataDocMimeType = "application/octet-stream";
359
						}
360

    
361
						// process the data
362
						boolean remoteData = false;
363
						String dataDocLocalId = null;
364
						Identifier dataGuid = new Identifier();
365

    
366
						// handle ecogrid, or downloadable data
367
						String ecogridPrefix = "ecogrid://knb/";
368
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
369
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
370
						} else {
371
							// should we try downloading the remote data?
372
							if (downloadData) {
373
								InputStream dataObject = null;
374
								try {
375
									// download the data from the URL
376
									URL dataURL = new URL(dataDocUrl);
377
									URLConnection dataConnection = dataURL.openConnection();
378
									
379
									// default is to download the data
380
									dataObject = dataConnection.getInputStream();
381

    
382
									String detectedContentType = dataConnection.getContentType();
383
									logMetacat.info("Detected content type: " + detectedContentType);
384

    
385
									if (detectedContentType != null) {
386
										// seems to be HTML from the remote location
387
										if (detectedContentType.contains("html")) {
388
											// if we are not expecting it, we skip it
389
											if (!dataDocMimeType.contains("html")) {
390
												// set to null so we don't download it
391
												dataObject = null;
392
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
393
											}
394
										}
395
										
396
									} else {
397
										// if we don't know what it is, should we skip it?
398
										dataObject = null;
399
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
400
									}
401
									
402
								} catch (Exception e) {
403
									// error with the download
404
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
405
								}
406
								
407
								if (dataObject != null) {
408
									// create the local version of it
409
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
410
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
411
									dataGuid.setValue(dataDocLocalId);
412
									
413
									// save it locally
414
									Session session = new Session();
415
									session.setSubject(submitter);
416
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
417
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
418
									
419
									remoteData = true;
420
								}
421
							}
422
							
423
						}
424
						
425
						logMetacat.debug("Data local ID: " + dataDocLocalId);
426
						logMetacat.debug("Data URL     : " + dataDocUrl);
427
						logMetacat.debug("Data mime    : " + dataDocMimeType);
428
						
429
						// check for valid docid.rev
430
						String dataDocid = null;
431
						int dataRev = 0;
432
						if (dataDocLocalId != null) {
433
							// look up the guid for the data
434
							try {
435
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
436
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
437
							} catch (Exception e) {
438
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
439
								dataDocLocalId = null;
440
							}
441
						}
442
						
443
						// now we have a local id for the data
444
						if (dataDocLocalId != null) {
445
	
446
							// check if data system metadata exists already
447
							SystemMetadata dataSysMeta = null;
448
							String dataGuidString = null;
449
							try {
450
								// look for the identifier
451
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
452
								// set it
453
								dataGuid.setValue(dataGuidString);
454
								// look up the system metadata
455
								try {
456
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
457
								} catch (Exception e) {
458
									// probably not in the system
459
									dataSysMeta = null;
460
								}
461
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
462
							} catch (McdbDocNotFoundException nf) {
463
								// we didn't find it
464
								dataSysMeta = null;
465
							}
466
								
467
							// we'll have to generate it	
468
							if (dataSysMeta == null) {
469
								// System metadata for data doesn't exist yet, so create it
470
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
471
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
472

    
473
								// now look it up again
474
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
475

    
476
								// set the guid
477
								dataGuid.setValue(dataGuidString);
478
								
479
								// inherit access rules from metadata, if we don't have our own
480
								if (remoteData) {
481
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
482
									// TODO: use access rules defined in EML, per data file
483
								}
484
	
485
							}
486
							
487
							// set object format for the data file
488
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
489
							ObjectFormatIdentifier fmt = null;
490
							try {
491
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
492
							} catch (NotFound nfe) {
493
								logMetacat.debug("Couldn't find format identifier for: "
494
												+ dataDocMimeType
495
												+ ". Setting it to application/octet-stream.");
496
								fmt = new ObjectFormatIdentifier();
497
								fmt.setValue("application/octet-stream");
498
							}
499
							dataSysMeta.setFormatId(fmt);
500

    
501
							// update the values
502
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
503
							
504
							// include as part of the ORE package
505
							dataIds.add(dataGuid);
506
	
507
						} // end if (EML package)
508
	
509
					} // end for (data entities)
510
					
511
	            } // data entities not null
512
	            
513
				// ORE map
514
				if (includeORE) {
515
					// can we generate them?
516
			        if (!dataIds.isEmpty()) {
517
			        	// it doesn't exist in the system?
518
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
519
			        	
520
				            // generate the ORE map for this datapackage
521
				            Identifier resourceMapId = new Identifier();
522
				            // use the local id, not the guid in case we have DOIs for them already
523
				            resourceMapId.setValue(resourceMapPrefix + localId);
524
				            idMap.put(sysMeta.getIdentifier(), dataIds);
525
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
526
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
527
				            // copy most of the same system metadata as the packaging metadata
528
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
529
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
530
				            resourceMapSysMeta.setIdentifier(resourceMapId);
531
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
532
							resourceMapSysMeta.setChecksum(oreChecksum);
533
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
534
							resourceMapSysMeta.setFormatId(formatId);
535
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
536
							
537
							// set the revision graph
538
							resourceMapSysMeta.setObsoletes(null);
539
							resourceMapSysMeta.setObsoletedBy(null);
540
							// look up the resource map that this one obsoletes
541
							if (sysMeta.getObsoletes() != null) {
542
								// use the localId in case we have a DOI
543
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
544
								Identifier resourceMapObsoletes = new Identifier();
545
								resourceMapObsoletes.setValue(resourceMapPrefix + obsoletesLocalId );
546
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
547
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
548
								if (resourceMapObsoletesSystemMetadata != null) {
549
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
550
									resourceMapObsoletesSystemMetadata.setArchived(true);
551
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
552
								}
553
							}
554
							// look up the resource map that this one is obsoletedBy
555
							if (sysMeta.getObsoletedBy() != null) {
556
								// use the localId in case we have a DOI
557
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
558
								Identifier resourceMapObsoletedBy = new Identifier();
559
								resourceMapObsoletedBy.setValue(resourceMapPrefix + obsoletedByLocalId);
560
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
561
								resourceMapSysMeta.setArchived(true);
562
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
563
								if (resourceMapObsoletedBySystemMetadata != null) {
564
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
565
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
566
								}
567
							}
568
				            
569
							// save it locally, if it doesn't already exist
570
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
571
								Session session = new Session();
572
								session.setSubject(submitter);
573
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
574
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
575
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
576
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
577
							}
578
			        	}
579
			        }
580
				}
581

    
582
			} catch (ParserConfigurationException pce) {
583
				logMetacat.debug("There was a problem parsing the EML document. "
584
								+ "The error message was: " + pce.getMessage());
585

    
586
			} catch (SAXException saxe) {
587
				logMetacat.debug("There was a problem traversing the EML document. "
588
								+ "The error message was: " + saxe.getMessage());
589

    
590
			} catch (XPathExpressionException xpee) {
591
				logMetacat.debug("There was a problem searching the EML document. "
592
								+ "The error message was: " + xpee.getMessage());
593
			} catch (Exception e) {
594
				logMetacat.debug("There was a problem creating System Metadata. "
595
								+ "The error message was: " + e.getMessage());
596
				e.printStackTrace();
597
			} // end try()
598

    
599
		} // end if()
600

    
601
		return sysMeta;
602
	}
603

    
604
    /**
605
     * Generate SystemMetadata for any object in the object store that does
606
     * not already have it.  SystemMetadata documents themselves, are, of course,
607
     * exempt.  This is a utility method for migration of existing object 
608
     * stores to DataONE where SystemMetadata is required for all objects.
609
     * @param idList
610
     * @param includeOre
611
     * @param downloadData
612
     * @throws PropertyNotFoundException
613
     * @throws NoSuchAlgorithmException
614
     * @throws AccessionNumberException
615
     * @throws SQLException
616
	 * @throws SAXException 
617
	 * @throws HandlerException 
618
	 * @throws JiBXException 
619
	 * @throws BaseException 
620
	 * @throws ParseLSIDException 
621
	 * @throws InsufficientKarmaException 
622
	 * @throws ClassNotFoundException 
623
	 * @throws IOException 
624
	 * @throws McdbException 
625
	 * @throws AccessException 
626
	 * @throws AccessControlException 
627
     */
628
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
629
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
630
    {
631
        
632
        for (String localId : idList) { 
633
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
634
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
635

    
636
            SystemMetadata sm = null;
637

    
638
            //generate required system metadata fields from the document
639
            try {
640
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
641
            } catch (Exception e) {
642
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
643
				continue;
644
			}
645
            
646
            //insert the systemmetadata object or just update it as needed
647
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
648
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
649
            
650
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
651

    
652
        }
653
        logMetacat.info("done generating system metadata for given list");
654
    }
655
    
656
	/**
657
	 * Determines if we already have registered an ORE map for this package
658
	 * @param guid of the EML/packaging object
659
	 * @return true if there is an ORE map for the given package
660
	 */
661
	private static boolean oreExistsFor(Identifier guid) {
662
		// TODO: implement call to CN.search()
663
		return false;
664
	}
665

    
666
	/**
667
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
668
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
669
	 * evaluated.
670
	 * 
671
	 * @param is The InputStream of bytes
672
	 * 
673
	 * @return size The size in bytes of the input stream as a long
674
	 * 
675
	 * @throws IOException
676
	 */
677
	private static long sizeOfStream(InputStream is) throws IOException {
678

    
679
		long size = 0;
680
		byte[] b = new byte[1024];
681
		int numread = is.read(b, 0, 1024);
682
		while (numread != -1) {
683
			size += numread;
684
			numread = is.read(b, 0, 1024);
685
		}
686
		return size;
687

    
688
	}
689
	
690
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
691
		
692
		DocumentImpl doc = new DocumentImpl(docid, false);
693
		String filepath = null;
694
		String filename = null;
695

    
696
		// deal with data or metadata cases
697
		if (doc.getRootNodeID() == 0) {
698
			// this is a data file
699
			filepath = PropertyService.getProperty("application.datafilepath");
700
		} else {
701
			filepath = PropertyService.getProperty("application.documentfilepath");
702
		}
703
		// ensure it is a directory path
704
		if (!(filepath.endsWith("/"))) {
705
			filepath += "/";
706
		}
707
		filename = filepath + docid;
708
		File documentFile = new File(filename);
709
		
710
		return documentFile;
711
	}
712

    
713
	/**
714
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
715
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
716
	 * than or equal to zero, no policy needs to be set, so return null.
717
	 * @return ReplicationPolicy, or null if no replication policy is needed
718
	 */
719
    private static ReplicationPolicy getDefaultReplicationPolicy() {
720
        ReplicationPolicy rp = null;
721
        int numReplicas = -1;
722
        try {
723
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
724
        } catch (NumberFormatException e) {
725
            // The property is not a valid integer, so return a null policy
726
            return null;
727
        } catch (PropertyNotFoundException e) {
728
            // The property is not found, so return a null policy
729
            return null;
730
        }
731
        
732
        if (numReplicas > 0) {
733
            rp = new ReplicationPolicy();
734
            rp.setReplicationAllowed(true);
735
            rp.setNumberReplicas(numReplicas);
736
            try {
737
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
738
                if (preferredNodeList != null) {
739
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
740
                    if (pNodes != null && !pNodes.isEmpty()) {
741
                        rp.setPreferredMemberNodeList(pNodes);
742
                    }
743
                }
744
            } catch (PropertyNotFoundException e) {
745
                // No preferred list found in properties, so just ignore it; no action needed
746
            }
747
            try {
748
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
749
                if (blockedNodeList != null) {
750
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
751
                    if (bNodes != null && !bNodes.isEmpty()) {
752
                        rp.setBlockedMemberNodeList(bNodes);
753
                    }
754
                }
755
            } catch (PropertyNotFoundException e) {
756
                // No blocked list found in properties, so just ignore it; no action needed
757
            }
758
        }
759
        return rp;
760
    }
761

    
762
    /**
763
     * Extract a List of NodeReferences from a String listing the node identifiers where
764
     * each identifier is separated by whitespace, comma, or semicolon characters.
765
     * @param nodeString the string containing the list of nodes
766
     * @return the List of NodeReference objects parsed from the input string
767
     */
768
    private static List<NodeReference> extractNodeReferences(String nodeString) {
769
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
770
        String[] result = nodeString.split("[,;\\s]");
771
        for (String r : result) {
772
        	if (r != null && r.length() > 0) {
773
	            NodeReference noderef = new NodeReference();
774
	            noderef.setValue(r);
775
	            nodeList.add(noderef);
776
	        }
777
        }
778
        return nodeList;
779
    }
780
}
(5-5/5)