Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2013-04-24 19:34:36 -0700 (Wed, 24 Apr 2013) $'
10
 * '$Revision: 7622 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52
import org.dataone.client.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.ore.ResourceMapFactory;
57
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59
import org.dataone.service.types.v1.AccessPolicy;
60
import org.dataone.service.types.v1.AccessRule;
61
import org.dataone.service.types.v1.Checksum;
62
import org.dataone.service.types.v1.Identifier;
63
import org.dataone.service.types.v1.NodeReference;
64
import org.dataone.service.types.v1.ObjectFormatIdentifier;
65
import org.dataone.service.types.v1.ReplicationPolicy;
66
import org.dataone.service.types.v1.Session;
67
import org.dataone.service.types.v1.Subject;
68
import org.dataone.service.types.v1.SystemMetadata;
69
import org.dataone.service.types.v1.util.ChecksumUtil;
70
import org.dataone.service.util.DateTimeMarshaller;
71
import org.dspace.foresite.ResourceMap;
72
import org.jibx.runtime.JiBXException;
73
import org.xml.sax.SAXException;
74

    
75
import java.util.Calendar;
76

    
77
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79
import edu.ucsb.nceas.metacat.DBUtil;
80
import edu.ucsb.nceas.metacat.DocumentImpl;
81
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84
import edu.ucsb.nceas.metacat.MetaCatServlet;
85
import edu.ucsb.nceas.metacat.MetacatHandler;
86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.properties.PropertyService;
90
import edu.ucsb.nceas.metacat.replication.ReplicationService;
91
import edu.ucsb.nceas.metacat.shared.AccessException;
92
import edu.ucsb.nceas.metacat.shared.HandlerException;
93
import edu.ucsb.nceas.metacat.util.DocumentUtil;
94
import edu.ucsb.nceas.utilities.ParseLSIDException;
95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
96

    
97
public class SystemMetadataFactory {
98

    
99
	private static final String resourceMapPrefix = "resourceMap_";
100
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
101
	/**
102
	 * use this flag if you want to update any existing system metadata values with generated content
103
	 */
104
	private static boolean updateExisting = true;
105
	
106
	/**
107
	 * Creates a system metadata object for insertion into metacat
108
	 * 
109
	 * @param localId
110
	 *            The local document identifier
111
	 * @param user
112
	 *            The user submitting the system metadata document
113
	 * @param groups
114
	 *            The groups the user belongs to
115
	 * 
116
	 * @return sysMeta The system metadata object created
117
	 * @throws SAXException 
118
	 * @throws HandlerException 
119
	 * @throws AccessControlException 
120
	 * @throws AccessException 
121
	 */
122
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
123
			throws McdbException, McdbDocNotFoundException, SQLException,
124
			IOException, AccessionNumberException, ClassNotFoundException,
125
			InsufficientKarmaException, ParseLSIDException,
126
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
127
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
128
		
129
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
130

    
131
		// check for system metadata
132
		SystemMetadata sysMeta = null;
133
		
134
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
135
		int rev = Integer.valueOf(accNum.getRev());
136
		
137
		// get/make the guid
138
		String guid = null;
139
		try {
140
			// get the guid if it exists
141
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
142
		} catch (McdbDocNotFoundException dnfe) {
143
			// otherwise create the mapping
144
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
145
			IdentifierManager.getInstance().createMapping(localId, localId);
146
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
147
		}
148
		
149
		// look up existing system metadata if it exists
150
		Identifier identifier = new Identifier();
151
		identifier.setValue(guid);
152
		try {
153
			logMetacat.debug("Using hazelcast to get system metadata");
154
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
155
			// TODO: if this is the case, we could return here -- what else do we gain?
156
			if (!updateExisting ) {
157
				return sysMeta;
158
			}
159
		} catch (Exception e) {
160
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
161

    
162
		}
163

    
164
		if (sysMeta == null) {
165
			// create system metadata
166
			sysMeta = new SystemMetadata();
167
			sysMeta.setIdentifier(identifier);
168
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
169
			sysMeta.setArchived(false);
170
		}
171
		
172
		// get additional docinfo
173
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
174
		// set the default object format
175
		String doctype = docInfo.get("doctype");
176
		ObjectFormatIdentifier fmtid = null;
177

    
178
		// set the object format, fall back to defaults
179
		if (doctype.trim().equals("BIN")) {
180
			// we don't know much about this file (yet)
181
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
182
		} else {
183
			try {
184
				// do we know the given format?
185
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
186
			} catch (NotFound nfe) {
187
				// format is not registered, use default
188
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
189
			}
190
		}
191

    
192
		sysMeta.setFormatId(fmtid);
193
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
194

    
195
		// for retrieving the actual object
196
		InputStream inputStream = null;
197
		inputStream = MetacatHandler.read(localId);
198

    
199
		// create the checksum
200
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
201
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
202
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
203
		sysMeta.setChecksum(checksum);
204
		
205
		// set the size from file on disk, don't read bytes again
206
		File fileOnDisk = getFileOnDisk(localId);
207
		long fileSize = 0;
208
		if (fileOnDisk.exists()) {
209
			fileSize = fileOnDisk.length();
210
		}
211
		sysMeta.setSize(BigInteger.valueOf(fileSize));
212
		
213
		// submitter
214
		Subject submitter = new Subject();
215
		submitter.setValue(docInfo.get("user_updated"));
216
		sysMeta.setSubmitter(submitter);
217
		
218
		// rights holder
219
		Subject owner = new Subject();
220
		owner.setValue(docInfo.get("user_owner"));
221
		sysMeta.setRightsHolder(owner);
222

    
223
		// dates
224
		String createdDateString = docInfo.get("date_created");
225
		String updatedDateString = docInfo.get("date_updated");
226
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
227
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
228
		sysMeta.setDateUploaded(createdDate);
229
		//sysMeta.setDateSysMetadataModified(updatedDate);
230
		// use current datetime 
231
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
232
		
233
		// set the revision history
234
		String docidWithoutRev = accNum.getDocid();
235
		Identifier obsoletedBy = null;
236
		Identifier obsoletes = null;
237
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
238
		// ensure this ordering since processing depends on it
239
		Collections.sort(revisions);
240
		for (int existingRev: revisions) {
241
			// use the docid+rev as the guid
242
			String existingPid = docidWithoutRev + "." + existingRev;
243
			try {
244
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
245
			} catch (McdbDocNotFoundException mdfe) {
246
				// we'll be defaulting to the local id
247
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
248
			}
249
			if (existingRev < rev) {
250
				// it's the old docid, until it's not
251
				obsoletes = new Identifier();
252
				obsoletes.setValue(existingPid);
253
			}
254
			if (existingRev > rev) {
255
				// it's the newer docid
256
				obsoletedBy = new Identifier();
257
				obsoletedBy.setValue(existingPid);
258
				// only want the version just after it
259
				break;
260
			}
261
		}
262
		// set them on our object
263
		sysMeta.setObsoletedBy(obsoletedBy);
264
		sysMeta.setObsoletes(obsoletes);
265
		
266
		// update the system metadata for the object[s] we are revising
267
		if (obsoletedBy != null) {
268
			SystemMetadata obsoletedBySysMeta = null;
269
			try {
270
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
271
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
272
			} catch (McdbDocNotFoundException e) {
273
				// ignore
274
			}
275
			if (obsoletedBySysMeta != null) {
276
				obsoletedBySysMeta.setObsoletes(identifier);
277
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
278
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
279
			}
280
		}
281
		if (obsoletes != null) {
282
			SystemMetadata obsoletesSysMeta = null;
283
			try {
284
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
285
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
286
			} catch (McdbDocNotFoundException e) {
287
				// ignore
288
			}
289
			if (obsoletesSysMeta != null) {
290
				obsoletesSysMeta.setObsoletedBy(identifier);
291
				obsoletesSysMeta.setArchived(true);
292
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
293
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
294
			}
295
		}
296
		
297
		// look up the access control policy we have in metacat
298
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
299
		try {
300
        List<AccessRule> allowList = accessPolicy.getAllowList();
301
        int listSize = allowList.size();
302
        sysMeta.setAccessPolicy(accessPolicy);
303
        
304
    } catch (NullPointerException npe) {
305
        logMetacat.info("The allow list is empty, can't include an empty " +
306
            "access policy in the system metadata for " + guid);
307
        
308
    }
309
		
310
		// authoritative node
311
		NodeReference nr = new NodeReference();
312
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
313
		sysMeta.setOriginMemberNode(nr);
314
		sysMeta.setAuthoritativeMemberNode(nr);
315
		
316
		// Set a default replication policy
317
        ReplicationPolicy rp = getDefaultReplicationPolicy();
318
        if (rp != null) {
319
            sysMeta.setReplicationPolicy(rp);
320
        }
321
		
322
		// further parse EML documents to get data object format,
323
		// describes and describedBy information
324
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
325
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
326
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
327
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
328
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
329
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
330
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
331
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
332

    
333
			try {
334
				
335
				// get it again to parse the document
336
				logMetacat.debug("Re-reading document inputStream");
337
				inputStream = MetacatHandler.read(localId);
338
				
339
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
340
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
341
				
342
				// iterate through the data objects in the EML doc and add sysmeta
343
				logMetacat.debug("In createSystemMetadata() the number of data "
344
								+ "entities is: "
345
								+ emlDocument.distributionMetadata);
346

    
347
				// for generating the ORE map
348
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
349
	            List<Identifier> dataIds = new ArrayList<Identifier>();
350
				
351
				// iterate through data objects described by the EML
352
	            if (emlDocument.distributionMetadata != null) {
353
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
354
	
355
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
356
				        String dataDocUrl = distMetadata.url;
357
				        String dataDocMimeType = distMetadata.mimeType;
358
						// default to binary
359
						if (dataDocMimeType == null) {
360
							dataDocMimeType = "application/octet-stream";
361
						}
362

    
363
						// process the data
364
						boolean remoteData = false;
365
						String dataDocLocalId = null;
366
						Identifier dataGuid = new Identifier();
367

    
368
						// handle ecogrid, or downloadable data
369
						String ecogridPrefix = "ecogrid://knb/";
370
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
371
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
372
						} else {
373
							// should we try downloading the remote data?
374
							if (downloadData) {
375
								InputStream dataObject = null;
376
								try {
377
									// download the data from the URL
378
									URL dataURL = new URL(dataDocUrl);
379
									URLConnection dataConnection = dataURL.openConnection();
380
									
381
									// default is to download the data
382
									dataObject = dataConnection.getInputStream();
383

    
384
									String detectedContentType = dataConnection.getContentType();
385
									logMetacat.info("Detected content type: " + detectedContentType);
386

    
387
									if (detectedContentType != null) {
388
										// seems to be HTML from the remote location
389
										if (detectedContentType.contains("html")) {
390
											// if we are not expecting it, we skip it
391
											if (!dataDocMimeType.contains("html")) {
392
												// set to null so we don't download it
393
												dataObject = null;
394
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
395
											}
396
										}
397
										
398
									} else {
399
										// if we don't know what it is, should we skip it?
400
										dataObject = null;
401
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
402
									}
403
									
404
								} catch (Exception e) {
405
									// error with the download
406
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
407
								}
408
								
409
								if (dataObject != null) {
410
									// create the local version of it
411
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
412
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
413
									dataGuid.setValue(dataDocLocalId);
414
									
415
									// save it locally
416
									Session session = new Session();
417
									session.setSubject(submitter);
418
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
419
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
420
									
421
									remoteData = true;
422
								}
423
							}
424
							
425
						}
426
						
427
						logMetacat.debug("Data local ID: " + dataDocLocalId);
428
						logMetacat.debug("Data URL     : " + dataDocUrl);
429
						logMetacat.debug("Data mime    : " + dataDocMimeType);
430
						
431
						// check for valid docid.rev
432
						String dataDocid = null;
433
						int dataRev = 0;
434
						if (dataDocLocalId != null) {
435
							// look up the guid for the data
436
							try {
437
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
438
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
439
							} catch (Exception e) {
440
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
441
								dataDocLocalId = null;
442
							}
443
						}
444
						
445
						// now we have a local id for the data
446
						if (dataDocLocalId != null) {
447
	
448
							// check if data system metadata exists already
449
							SystemMetadata dataSysMeta = null;
450
							String dataGuidString = null;
451
							try {
452
								// look for the identifier
453
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
454
								// set it
455
								dataGuid.setValue(dataGuidString);
456
								// look up the system metadata
457
								try {
458
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
459
								} catch (Exception e) {
460
									// probably not in the system
461
									dataSysMeta = null;
462
								}
463
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
464
							} catch (McdbDocNotFoundException nf) {
465
								// we didn't find it
466
								dataSysMeta = null;
467
							}
468
								
469
							// we'll have to generate it	
470
							if (dataSysMeta == null) {
471
								// System metadata for data doesn't exist yet, so create it
472
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
473
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
474

    
475
								// now look it up again
476
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
477

    
478
								// set the guid
479
								dataGuid.setValue(dataGuidString);
480
								
481
								// inherit access rules from metadata, if we don't have our own
482
								if (remoteData) {
483
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
484
									// TODO: use access rules defined in EML, per data file
485
								}
486
	
487
							}
488
							
489
							// set object format for the data file
490
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
491
							ObjectFormatIdentifier fmt = null;
492
							try {
493
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
494
							} catch (NotFound nfe) {
495
								logMetacat.debug("Couldn't find format identifier for: "
496
												+ dataDocMimeType
497
												+ ". Setting it to application/octet-stream.");
498
								fmt = new ObjectFormatIdentifier();
499
								fmt.setValue("application/octet-stream");
500
							}
501
							dataSysMeta.setFormatId(fmt);
502

    
503
							// update the values
504
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
505
							
506
							// include as part of the ORE package
507
							dataIds.add(dataGuid);
508
	
509
						} // end if (EML package)
510
	
511
					} // end for (data entities)
512
					
513
	            } // data entities not null
514
	            
515
				// ORE map
516
				if (includeORE) {
517
					// can we generate them?
518
			        if (!dataIds.isEmpty()) {
519
			        	// it doesn't exist in the system?
520
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
521
			        	
522
				            // generate the ORE map for this datapackage
523
				            Identifier resourceMapId = new Identifier();
524
				            // use the local id, not the guid in case we have DOIs for them already
525
				            resourceMapId.setValue(resourceMapPrefix + localId);
526
				            idMap.put(sysMeta.getIdentifier(), dataIds);
527
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
528
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
529
				            // copy most of the same system metadata as the packaging metadata
530
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
531
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
532
				            resourceMapSysMeta.setIdentifier(resourceMapId);
533
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
534
							resourceMapSysMeta.setChecksum(oreChecksum);
535
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
536
							resourceMapSysMeta.setFormatId(formatId);
537
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
538
							
539
							// set the revision graph
540
							resourceMapSysMeta.setObsoletes(null);
541
							resourceMapSysMeta.setObsoletedBy(null);
542
							// look up the resource map that this one obsoletes
543
							if (sysMeta.getObsoletes() != null) {
544
								// use the localId in case we have a DOI
545
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
546
								Identifier resourceMapObsoletes = new Identifier();
547
								resourceMapObsoletes.setValue(resourceMapPrefix + obsoletesLocalId );
548
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
549
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
550
								if (resourceMapObsoletesSystemMetadata != null) {
551
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
552
									resourceMapObsoletesSystemMetadata.setArchived(true);
553
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
554
								}
555
							}
556
							// look up the resource map that this one is obsoletedBy
557
							if (sysMeta.getObsoletedBy() != null) {
558
								// use the localId in case we have a DOI
559
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
560
								Identifier resourceMapObsoletedBy = new Identifier();
561
								resourceMapObsoletedBy.setValue(resourceMapPrefix + obsoletedByLocalId);
562
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
563
								resourceMapSysMeta.setArchived(true);
564
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
565
								if (resourceMapObsoletedBySystemMetadata != null) {
566
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
567
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
568
								}
569
							}
570
				            
571
							// save it locally, if it doesn't already exist
572
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
573
								Session session = new Session();
574
								session.setSubject(submitter);
575
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
576
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
577
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
578
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
579
							}
580
			        	}
581
			        }
582
				}
583

    
584
			} catch (ParserConfigurationException pce) {
585
				logMetacat.debug("There was a problem parsing the EML document. "
586
								+ "The error message was: " + pce.getMessage());
587

    
588
			} catch (SAXException saxe) {
589
				logMetacat.debug("There was a problem traversing the EML document. "
590
								+ "The error message was: " + saxe.getMessage());
591

    
592
			} catch (XPathExpressionException xpee) {
593
				logMetacat.debug("There was a problem searching the EML document. "
594
								+ "The error message was: " + xpee.getMessage());
595
			} catch (Exception e) {
596
				logMetacat.debug("There was a problem creating System Metadata. "
597
								+ "The error message was: " + e.getMessage());
598
				e.printStackTrace();
599
			} // end try()
600

    
601
		} // end if()
602

    
603
		return sysMeta;
604
	}
605

    
606
    /**
607
     * Generate SystemMetadata for any object in the object store that does
608
     * not already have it.  SystemMetadata documents themselves, are, of course,
609
     * exempt.  This is a utility method for migration of existing object 
610
     * stores to DataONE where SystemMetadata is required for all objects.
611
     * @param idList
612
     * @param includeOre
613
     * @param downloadData
614
     * @throws PropertyNotFoundException
615
     * @throws NoSuchAlgorithmException
616
     * @throws AccessionNumberException
617
     * @throws SQLException
618
	 * @throws SAXException 
619
	 * @throws HandlerException 
620
	 * @throws JiBXException 
621
	 * @throws BaseException 
622
	 * @throws ParseLSIDException 
623
	 * @throws InsufficientKarmaException 
624
	 * @throws ClassNotFoundException 
625
	 * @throws IOException 
626
	 * @throws McdbException 
627
	 * @throws AccessException 
628
	 * @throws AccessControlException 
629
     */
630
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
631
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
632
    {
633
        
634
        for (String localId : idList) { 
635
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
636
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
637

    
638
            SystemMetadata sm = null;
639

    
640
            //generate required system metadata fields from the document
641
            try {
642
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
643
            } catch (Exception e) {
644
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
645
				continue;
646
			}
647
            
648
            //insert the systemmetadata object or just update it as needed
649
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
650
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
651
            
652
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
653

    
654
        }
655
        logMetacat.info("done generating system metadata for given list");
656
    }
657
    
658
	/**
659
	 * Determines if we already have registered an ORE map for this package
660
	 * @param guid of the EML/packaging object
661
	 * @return true if there is an ORE map for the given package
662
	 */
663
	private static boolean oreExistsFor(Identifier guid) {
664
		// TODO: implement call to CN.search()
665
		return false;
666
	}
667

    
668
	/**
669
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
670
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
671
	 * evaluated.
672
	 * 
673
	 * @param is The InputStream of bytes
674
	 * 
675
	 * @return size The size in bytes of the input stream as a long
676
	 * 
677
	 * @throws IOException
678
	 */
679
	public static long sizeOfStream(InputStream is) throws IOException {
680

    
681
		long size = 0;
682
		byte[] b = new byte[1024];
683
		int numread = is.read(b, 0, 1024);
684
		while (numread != -1) {
685
			size += numread;
686
			numread = is.read(b, 0, 1024);
687
		}
688
		return size;
689

    
690
	}
691
	
692
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
693
		
694
		DocumentImpl doc = new DocumentImpl(docid, false);
695
		String filepath = null;
696
		String filename = null;
697

    
698
		// deal with data or metadata cases
699
		if (doc.getRootNodeID() == 0) {
700
			// this is a data file
701
			filepath = PropertyService.getProperty("application.datafilepath");
702
		} else {
703
			filepath = PropertyService.getProperty("application.documentfilepath");
704
		}
705
		// ensure it is a directory path
706
		if (!(filepath.endsWith("/"))) {
707
			filepath += "/";
708
		}
709
		filename = filepath + docid;
710
		File documentFile = new File(filename);
711
		
712
		return documentFile;
713
	}
714

    
715
	/**
716
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
717
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
718
	 * than or equal to zero, no policy needs to be set, so return null.
719
	 * @return ReplicationPolicy, or null if no replication policy is needed
720
	 */
721
    private static ReplicationPolicy getDefaultReplicationPolicy() {
722
        ReplicationPolicy rp = null;
723
        int numReplicas = -1;
724
        try {
725
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
726
        } catch (NumberFormatException e) {
727
            // The property is not a valid integer, so return a null policy
728
            return null;
729
        } catch (PropertyNotFoundException e) {
730
            // The property is not found, so return a null policy
731
            return null;
732
        }
733
        
734
        if (numReplicas > 0) {
735
            rp = new ReplicationPolicy();
736
            rp.setReplicationAllowed(true);
737
            rp.setNumberReplicas(numReplicas);
738
            try {
739
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
740
                if (preferredNodeList != null) {
741
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
742
                    if (pNodes != null && !pNodes.isEmpty()) {
743
                        rp.setPreferredMemberNodeList(pNodes);
744
                    }
745
                }
746
            } catch (PropertyNotFoundException e) {
747
                // No preferred list found in properties, so just ignore it; no action needed
748
            }
749
            try {
750
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
751
                if (blockedNodeList != null) {
752
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
753
                    if (bNodes != null && !bNodes.isEmpty()) {
754
                        rp.setBlockedMemberNodeList(bNodes);
755
                    }
756
                }
757
            } catch (PropertyNotFoundException e) {
758
                // No blocked list found in properties, so just ignore it; no action needed
759
            }
760
        }
761
        return rp;
762
    }
763

    
764
    /**
765
     * Extract a List of NodeReferences from a String listing the node identifiers where
766
     * each identifier is separated by whitespace, comma, or semicolon characters.
767
     * @param nodeString the string containing the list of nodes
768
     * @return the List of NodeReference objects parsed from the input string
769
     */
770
    private static List<NodeReference> extractNodeReferences(String nodeString) {
771
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
772
        String[] result = nodeString.split("[,;\\s]");
773
        for (String r : result) {
774
        	if (r != null && r.length() > 0) {
775
	            NodeReference noderef = new NodeReference();
776
	            noderef.setValue(r);
777
	            nodeList.add(noderef);
778
	        }
779
        }
780
        return nodeList;
781
    }
782
}
(6-6/6)