Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2014-07-23 16:19:48 -0700 (Wed, 23 Jul 2014) $'
10
 * '$Revision: 8810 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52
import org.dataone.client.v2.formats.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.ore.ResourceMapFactory;
57
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59
import org.dataone.service.types.v1.AccessPolicy;
60
import org.dataone.service.types.v1.AccessRule;
61
import org.dataone.service.types.v1.Checksum;
62
import org.dataone.service.types.v1.Identifier;
63
import org.dataone.service.types.v1.NodeReference;
64
import org.dataone.service.types.v1.ObjectFormatIdentifier;
65
import org.dataone.service.types.v1.ReplicationPolicy;
66
import org.dataone.service.types.v1.Session;
67
import org.dataone.service.types.v1.Subject;
68
import org.dataone.service.types.v2.SystemMetadata;
69
import org.dataone.service.types.v1.util.ChecksumUtil;
70
import org.dataone.service.util.DateTimeMarshaller;
71
import org.dspace.foresite.ResourceMap;
72
import org.jibx.runtime.JiBXException;
73
import org.xml.sax.SAXException;
74

    
75
import java.util.Calendar;
76

    
77
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79
import edu.ucsb.nceas.metacat.DBUtil;
80
import edu.ucsb.nceas.metacat.DocumentImpl;
81
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84
import edu.ucsb.nceas.metacat.MetaCatServlet;
85
import edu.ucsb.nceas.metacat.MetacatHandler;
86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.properties.PropertyService;
90
import edu.ucsb.nceas.metacat.replication.ReplicationService;
91
import edu.ucsb.nceas.metacat.shared.AccessException;
92
import edu.ucsb.nceas.metacat.shared.HandlerException;
93
import edu.ucsb.nceas.metacat.util.DocumentUtil;
94
import edu.ucsb.nceas.utilities.ParseLSIDException;
95
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
96

    
97
public class SystemMetadataFactory {
98

    
99
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
100
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
101
	/**
102
	 * use this flag if you want to update any existing system metadata values with generated content
103
	 */
104
	private static boolean updateExisting = true;
105
	
106
	/**
107
	 * Creates a system metadata object for insertion into metacat
108
	 * 
109
	 * @param localId
110
	 *            The local document identifier
111
	 * @param user
112
	 *            The user submitting the system metadata document
113
	 * @param groups
114
	 *            The groups the user belongs to
115
	 * 
116
	 * @return sysMeta The system metadata object created
117
	 * @throws SAXException 
118
	 * @throws HandlerException 
119
	 * @throws AccessControlException 
120
	 * @throws AccessException 
121
	 */
122
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
123
			throws McdbException, McdbDocNotFoundException, SQLException,
124
			IOException, AccessionNumberException, ClassNotFoundException,
125
			InsufficientKarmaException, ParseLSIDException,
126
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
127
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
128
		
129
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
130

    
131
		// check for system metadata
132
		SystemMetadata sysMeta = null;
133
		
134
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
135
		int rev = Integer.valueOf(accNum.getRev());
136
		
137
		// get/make the guid
138
		String guid = null;
139
		try {
140
			// get the guid if it exists
141
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
142
		} catch (McdbDocNotFoundException dnfe) {
143
			// otherwise create the mapping
144
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
145
			IdentifierManager.getInstance().createMapping(localId, localId);
146
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
147
		}
148
		
149
		// look up existing system metadata if it exists
150
		Identifier identifier = new Identifier();
151
		identifier.setValue(guid);
152
		try {
153
			logMetacat.debug("Using hazelcast to get system metadata");
154
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
155
			// TODO: if this is the case, we could return here -- what else do we gain?
156
			if (!updateExisting ) {
157
				return sysMeta;
158
			}
159
		} catch (Exception e) {
160
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
161

    
162
		}
163

    
164
		if (sysMeta == null) {
165
			// create system metadata
166
			sysMeta = new SystemMetadata();
167
			sysMeta.setIdentifier(identifier);
168
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
169
			sysMeta.setArchived(false);
170
		}
171
		
172
		// get additional docinfo
173
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
174
		// set the default object format
175
		String doctype = docInfo.get("doctype");
176
		ObjectFormatIdentifier fmtid = null;
177

    
178
		// set the object format, fall back to defaults
179
		if (doctype.trim().equals("BIN")) {
180
			// we don't know much about this file (yet)
181
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
182
		} else if (doctype.trim().equals("metadata")) {
183
			// special ESRI FGDC format
184
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
185
		} else {
186
			try {
187
				// do we know the given format?
188
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
189
			} catch (NotFound nfe) {
190
				// format is not registered, use default
191
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
192
			}
193
		}
194

    
195
		sysMeta.setFormatId(fmtid);
196
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
197

    
198
		// for retrieving the actual object
199
		InputStream inputStream = null;
200
		inputStream = MetacatHandler.read(localId);
201

    
202
		// create the checksum
203
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
204
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
205
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
206
		sysMeta.setChecksum(checksum);
207
		
208
		// set the size from file on disk, don't read bytes again
209
		File fileOnDisk = getFileOnDisk(localId);
210
		long fileSize = 0;
211
		if (fileOnDisk.exists()) {
212
			fileSize = fileOnDisk.length();
213
		}
214
		sysMeta.setSize(BigInteger.valueOf(fileSize));
215
		
216
		// submitter
217
		Subject submitter = new Subject();
218
		submitter.setValue(docInfo.get("user_updated"));
219
		sysMeta.setSubmitter(submitter);
220
		
221
		// rights holder
222
		Subject owner = new Subject();
223
		owner.setValue(docInfo.get("user_owner"));
224
		sysMeta.setRightsHolder(owner);
225

    
226
		// dates
227
		String createdDateString = docInfo.get("date_created");
228
		String updatedDateString = docInfo.get("date_updated");
229
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
230
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
231
		sysMeta.setDateUploaded(createdDate);
232
		//sysMeta.setDateSysMetadataModified(updatedDate);
233
		// use current datetime 
234
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
235
		
236
		// set the revision history
237
		String docidWithoutRev = accNum.getDocid();
238
		Identifier obsoletedBy = null;
239
		Identifier obsoletes = null;
240
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
241
		// ensure this ordering since processing depends on it
242
		Collections.sort(revisions);
243
		for (int existingRev: revisions) {
244
			// use the docid+rev as the guid
245
			String existingPid = docidWithoutRev + "." + existingRev;
246
			try {
247
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
248
			} catch (McdbDocNotFoundException mdfe) {
249
				// we'll be defaulting to the local id
250
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
251
			}
252
			if (existingRev < rev) {
253
				// it's the old docid, until it's not
254
				obsoletes = new Identifier();
255
				obsoletes.setValue(existingPid);
256
			}
257
			if (existingRev > rev) {
258
				// it's the newer docid
259
				obsoletedBy = new Identifier();
260
				obsoletedBy.setValue(existingPid);
261
				// only want the version just after it
262
				break;
263
			}
264
		}
265
		// set them on our object
266
		sysMeta.setObsoletedBy(obsoletedBy);
267
		sysMeta.setObsoletes(obsoletes);
268
		
269
		// update the system metadata for the object[s] we are revising
270
		if (obsoletedBy != null) {
271
			SystemMetadata obsoletedBySysMeta = null;
272
			try {
273
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
274
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
275
			} catch (McdbDocNotFoundException e) {
276
				// ignore
277
			}
278
			if (obsoletedBySysMeta != null) {
279
				obsoletedBySysMeta.setObsoletes(identifier);
280
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
281
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
282
			}
283
		}
284
		if (obsoletes != null) {
285
			SystemMetadata obsoletesSysMeta = null;
286
			try {
287
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
288
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
289
			} catch (McdbDocNotFoundException e) {
290
				// ignore
291
			}
292
			if (obsoletesSysMeta != null) {
293
				obsoletesSysMeta.setObsoletedBy(identifier);
294
				// DO NOT set archived to true -- it will have unintended consequences if the CN sees this.
295
				//obsoletesSysMeta.setArchived(true);
296
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
297
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
298
			}
299
		}
300
		
301
		// look up the access control policy we have in metacat
302
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
303
		try {
304
        List<AccessRule> allowList = accessPolicy.getAllowList();
305
        int listSize = allowList.size();
306
        sysMeta.setAccessPolicy(accessPolicy);
307
        
308
    } catch (NullPointerException npe) {
309
        logMetacat.info("The allow list is empty, can't include an empty " +
310
            "access policy in the system metadata for " + guid);
311
        
312
    }
313
		
314
		// authoritative node
315
		NodeReference nr = new NodeReference();
316
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
317
		sysMeta.setOriginMemberNode(nr);
318
		sysMeta.setAuthoritativeMemberNode(nr);
319
		
320
		// Set a default replication policy
321
        ReplicationPolicy rp = getDefaultReplicationPolicy();
322
        if (rp != null) {
323
            sysMeta.setReplicationPolicy(rp);
324
        }
325
		
326
		// further parse EML documents to get data object format,
327
		// describes and describedBy information
328
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
329
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
330
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
331
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
332
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
333
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
334
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
335
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
336

    
337
			try {
338
				
339
				// get it again to parse the document
340
				logMetacat.debug("Re-reading document inputStream");
341
				inputStream = MetacatHandler.read(localId);
342
				
343
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
344
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
345
				
346
				// iterate through the data objects in the EML doc and add sysmeta
347
				logMetacat.debug("In createSystemMetadata() the number of data "
348
								+ "entities is: "
349
								+ emlDocument.distributionMetadata);
350

    
351
				// for generating the ORE map
352
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
353
	            List<Identifier> dataIds = new ArrayList<Identifier>();
354
				
355
				// iterate through data objects described by the EML
356
	            if (emlDocument.distributionMetadata != null) {
357
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
358
	
359
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
360
				        String dataDocUrl = distMetadata.url;
361
				        String dataDocMimeType = distMetadata.mimeType;
362
						// default to binary
363
						if (dataDocMimeType == null) {
364
							dataDocMimeType = "application/octet-stream";
365
						}
366

    
367
						// process the data
368
						boolean remoteData = false;
369
						String dataDocLocalId = null;
370
						Identifier dataGuid = new Identifier();
371

    
372
						// handle ecogrid, or downloadable data
373
						String ecogridPrefix = "ecogrid://knb/";
374
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
375
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
376
						} else {
377
							// should we try downloading the remote data?
378
							if (downloadData) {
379
								InputStream dataObject = null;
380
								try {
381
									// download the data from the URL
382
									URL dataURL = new URL(dataDocUrl);
383
									URLConnection dataConnection = dataURL.openConnection();
384
									
385
									// default is to download the data
386
									dataObject = dataConnection.getInputStream();
387

    
388
									String detectedContentType = dataConnection.getContentType();
389
									logMetacat.info("Detected content type: " + detectedContentType);
390

    
391
									if (detectedContentType != null) {
392
										// seems to be HTML from the remote location
393
										if (detectedContentType.contains("html")) {
394
											// if we are not expecting it, we skip it
395
											if (!dataDocMimeType.contains("html")) {
396
												// set to null so we don't download it
397
												dataObject = null;
398
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
399
											}
400
										}
401
										
402
									} else {
403
										// if we don't know what it is, should we skip it?
404
										dataObject = null;
405
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
406
									}
407
									
408
								} catch (Exception e) {
409
									// error with the download
410
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
411
								}
412
								
413
								if (dataObject != null) {
414
									// create the local version of it
415
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
416
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
417
									dataGuid.setValue(dataDocLocalId);
418
									
419
									// save it locally
420
									Session session = new Session();
421
									session.setSubject(submitter);
422
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
423
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
424
									
425
									remoteData = true;
426
								}
427
							}
428
							
429
						}
430
						
431
						logMetacat.debug("Data local ID: " + dataDocLocalId);
432
						logMetacat.debug("Data URL     : " + dataDocUrl);
433
						logMetacat.debug("Data mime    : " + dataDocMimeType);
434
						
435
						// check for valid docid.rev
436
						String dataDocid = null;
437
						int dataRev = 0;
438
						if (dataDocLocalId != null) {
439
							// look up the guid for the data
440
							try {
441
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
442
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
443
							} catch (Exception e) {
444
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
445
								dataDocLocalId = null;
446
							}
447
						}
448
						
449
						// now we have a local id for the data
450
						if (dataDocLocalId != null) {
451
	
452
							// check if data system metadata exists already
453
							SystemMetadata dataSysMeta = null;
454
							String dataGuidString = null;
455
							try {
456
								// look for the identifier
457
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
458
								// set it
459
								dataGuid.setValue(dataGuidString);
460
								// look up the system metadata
461
								try {
462
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
463
								} catch (Exception e) {
464
									// probably not in the system
465
									dataSysMeta = null;
466
								}
467
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
468
							} catch (McdbDocNotFoundException nf) {
469
								// we didn't find it
470
								dataSysMeta = null;
471
							}
472
								
473
							// we'll have to generate it	
474
							if (dataSysMeta == null) {
475
								// System metadata for data doesn't exist yet, so create it
476
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
477
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
478

    
479
								// now look it up again
480
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
481

    
482
								// set the guid
483
								dataGuid.setValue(dataGuidString);
484
								
485
								// inherit access rules from metadata, if we don't have our own
486
								if (remoteData) {
487
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
488
									// TODO: use access rules defined in EML, per data file
489
								}
490
	
491
							}
492
							
493
							// set object format for the data file
494
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
495
							ObjectFormatIdentifier fmt = null;
496
							try {
497
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
498
							} catch (NotFound nfe) {
499
								logMetacat.debug("Couldn't find format identifier for: "
500
												+ dataDocMimeType
501
												+ ". Setting it to application/octet-stream.");
502
								fmt = new ObjectFormatIdentifier();
503
								fmt.setValue("application/octet-stream");
504
							}
505
							dataSysMeta.setFormatId(fmt);
506

    
507
							// update the values
508
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
509
							
510
							// include as part of the ORE package
511
							dataIds.add(dataGuid);
512
	
513
						} // end if (EML package)
514
	
515
					} // end for (data entities)
516
					
517
	            } // data entities not null
518
	            
519
				// ORE map
520
				if (includeORE) {
521
					// can we generate them?
522
			        if (!dataIds.isEmpty()) {
523
			        	// it doesn't exist in the system?
524
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
525
			        	
526
				            // generate the ORE map for this datapackage
527
				            Identifier resourceMapId = new Identifier();
528
				            // use the local id, not the guid in case we have DOIs for them already
529
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
530
				            idMap.put(sysMeta.getIdentifier(), dataIds);
531
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
532
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
533
				            // copy most of the same system metadata as the packaging metadata
534
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
535
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
536
				            resourceMapSysMeta.setIdentifier(resourceMapId);
537
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
538
							resourceMapSysMeta.setChecksum(oreChecksum);
539
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
540
							resourceMapSysMeta.setFormatId(formatId);
541
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
542
							
543
							// set the revision graph
544
							resourceMapSysMeta.setObsoletes(null);
545
							resourceMapSysMeta.setObsoletedBy(null);
546
							// look up the resource map that this one obsoletes
547
							if (sysMeta.getObsoletes() != null) {
548
								// use the localId in case we have a DOI
549
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
550
								Identifier resourceMapObsoletes = new Identifier();
551
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
552
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
553
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
554
								if (resourceMapObsoletesSystemMetadata != null) {
555
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
556
									resourceMapObsoletesSystemMetadata.setArchived(true);
557
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
558
								}
559
							}
560
							// look up the resource map that this one is obsoletedBy
561
							if (sysMeta.getObsoletedBy() != null) {
562
								// use the localId in case we have a DOI
563
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
564
								Identifier resourceMapObsoletedBy = new Identifier();
565
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
566
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
567
								resourceMapSysMeta.setArchived(true);
568
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
569
								if (resourceMapObsoletedBySystemMetadata != null) {
570
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
571
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
572
								}
573
							}
574
				            
575
							// save it locally, if it doesn't already exist
576
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
577
								Session session = new Session();
578
								session.setSubject(submitter);
579
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
580
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
581
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
582
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
583
							}
584
			        	}
585
			        }
586
				}
587

    
588
			} catch (ParserConfigurationException pce) {
589
				logMetacat.debug("There was a problem parsing the EML document. "
590
								+ "The error message was: " + pce.getMessage());
591

    
592
			} catch (SAXException saxe) {
593
				logMetacat.debug("There was a problem traversing the EML document. "
594
								+ "The error message was: " + saxe.getMessage());
595

    
596
			} catch (XPathExpressionException xpee) {
597
				logMetacat.debug("There was a problem searching the EML document. "
598
								+ "The error message was: " + xpee.getMessage());
599
			} catch (Exception e) {
600
				logMetacat.debug("There was a problem creating System Metadata. "
601
								+ "The error message was: " + e.getMessage());
602
				e.printStackTrace();
603
			} // end try()
604

    
605
		} // end if()
606

    
607
		return sysMeta;
608
	}
609

    
610
	/**
611
	 * Checks for potential ORE object existence 
612
	 * @param identifier
613
	 * @return
614
	 */
615
    public static boolean oreExistsFor(Identifier identifier) {
616
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
617
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier, true);
618
		return (ids != null && ids.size() > 0);
619
	}
620

    
621
	/**
622
     * Generate SystemMetadata for any object in the object store that does
623
     * not already have it.  SystemMetadata documents themselves, are, of course,
624
     * exempt.  This is a utility method for migration of existing object 
625
     * stores to DataONE where SystemMetadata is required for all objects.
626
     * @param idList
627
     * @param includeOre
628
     * @param downloadData
629
     * @throws PropertyNotFoundException
630
     * @throws NoSuchAlgorithmException
631
     * @throws AccessionNumberException
632
     * @throws SQLException
633
	 * @throws SAXException 
634
	 * @throws HandlerException 
635
	 * @throws JiBXException 
636
	 * @throws BaseException 
637
	 * @throws ParseLSIDException 
638
	 * @throws InsufficientKarmaException 
639
	 * @throws ClassNotFoundException 
640
	 * @throws IOException 
641
	 * @throws McdbException 
642
	 * @throws AccessException 
643
	 * @throws AccessControlException 
644
     */
645
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
646
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
647
    {
648
        
649
        for (String localId : idList) { 
650
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
651
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
652

    
653
            SystemMetadata sm = null;
654

    
655
            //generate required system metadata fields from the document
656
            try {
657
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
658
            } catch (Exception e) {
659
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
660
				continue;
661
			}
662
            
663
            //insert the systemmetadata object or just update it as needed
664
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
665
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
666
            
667
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
668

    
669
        }
670
        logMetacat.info("done generating system metadata for given list");
671
    }
672

    
673
	/**
674
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
675
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
676
	 * evaluated.
677
	 * 
678
	 * @param is The InputStream of bytes
679
	 * 
680
	 * @return size The size in bytes of the input stream as a long
681
	 * 
682
	 * @throws IOException
683
	 */
684
	public static long sizeOfStream(InputStream is) throws IOException {
685

    
686
		long size = 0;
687
		byte[] b = new byte[1024];
688
		int numread = is.read(b, 0, 1024);
689
		while (numread != -1) {
690
			size += numread;
691
			numread = is.read(b, 0, 1024);
692
		}
693
		return size;
694

    
695
	}
696
	
697
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
698
		
699
		DocumentImpl doc = new DocumentImpl(docid, false);
700
		String filepath = null;
701
		String filename = null;
702

    
703
		// deal with data or metadata cases
704
		if (doc.getRootNodeID() == 0) {
705
			// this is a data file
706
			filepath = PropertyService.getProperty("application.datafilepath");
707
		} else {
708
			filepath = PropertyService.getProperty("application.documentfilepath");
709
		}
710
		// ensure it is a directory path
711
		if (!(filepath.endsWith("/"))) {
712
			filepath += "/";
713
		}
714
		filename = filepath + docid;
715
		File documentFile = new File(filename);
716
		
717
		return documentFile;
718
	}
719

    
720
	/**
721
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
722
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
723
	 * than or equal to zero, no policy needs to be set, so return null.
724
	 * @return ReplicationPolicy, or null if no replication policy is needed
725
	 */
726
    private static ReplicationPolicy getDefaultReplicationPolicy() {
727
        ReplicationPolicy rp = null;
728
        int numReplicas = -1;
729
        try {
730
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
731
        } catch (NumberFormatException e) {
732
            // The property is not a valid integer, so return a null policy
733
            return null;
734
        } catch (PropertyNotFoundException e) {
735
            // The property is not found, so return a null policy
736
            return null;
737
        }
738
        
739
        if (numReplicas > 0) {
740
            rp = new ReplicationPolicy();
741
            rp.setReplicationAllowed(true);
742
            rp.setNumberReplicas(numReplicas);
743
            try {
744
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
745
                if (preferredNodeList != null) {
746
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
747
                    if (pNodes != null && !pNodes.isEmpty()) {
748
                        rp.setPreferredMemberNodeList(pNodes);
749
                    }
750
                }
751
            } catch (PropertyNotFoundException e) {
752
                // No preferred list found in properties, so just ignore it; no action needed
753
            }
754
            try {
755
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
756
                if (blockedNodeList != null) {
757
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
758
                    if (bNodes != null && !bNodes.isEmpty()) {
759
                        rp.setBlockedMemberNodeList(bNodes);
760
                    }
761
                }
762
            } catch (PropertyNotFoundException e) {
763
                // No blocked list found in properties, so just ignore it; no action needed
764
            }
765
        }
766
        return rp;
767
    }
768

    
769
    /**
770
     * Extract a List of NodeReferences from a String listing the node identifiers where
771
     * each identifier is separated by whitespace, comma, or semicolon characters.
772
     * @param nodeString the string containing the list of nodes
773
     * @return the List of NodeReference objects parsed from the input string
774
     */
775
    private static List<NodeReference> extractNodeReferences(String nodeString) {
776
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
777
        String[] result = nodeString.split("[,;\\s]");
778
        for (String r : result) {
779
        	if (r != null && r.length() > 0) {
780
	            NodeReference noderef = new NodeReference();
781
	            noderef.setValue(r);
782
	            nodeList.add(noderef);
783
	        }
784
        }
785
        return nodeList;
786
    }
787
}
(7-7/7)