Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2012-05-23 16:41:39 -0700 (Wed, 23 May 2012) $'
10
 * '$Revision: 7188 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.MockHttpServletRequest;
52
import org.dataone.client.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.ore.ResourceMapFactory;
57
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59
import org.dataone.service.types.v1.AccessPolicy;
60
import org.dataone.service.types.v1.Checksum;
61
import org.dataone.service.types.v1.Identifier;
62
import org.dataone.service.types.v1.NodeReference;
63
import org.dataone.service.types.v1.ObjectFormatIdentifier;
64
import org.dataone.service.types.v1.ReplicationPolicy;
65
import org.dataone.service.types.v1.Session;
66
import org.dataone.service.types.v1.Subject;
67
import org.dataone.service.types.v1.SystemMetadata;
68
import org.dataone.service.types.v1.util.ChecksumUtil;
69
import org.dataone.service.util.DateTimeMarshaller;
70
import org.dspace.foresite.ResourceMap;
71
import org.jibx.runtime.JiBXException;
72
import org.xml.sax.SAXException;
73

    
74
import java.util.Calendar;
75

    
76
import edu.ucsb.nceas.metacat.AccessionNumber;
77
import edu.ucsb.nceas.metacat.AccessionNumberException;
78
import edu.ucsb.nceas.metacat.DBUtil;
79
import edu.ucsb.nceas.metacat.DocumentImpl;
80
import edu.ucsb.nceas.metacat.IdentifierManager;
81
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
82
import edu.ucsb.nceas.metacat.McdbException;
83
import edu.ucsb.nceas.metacat.MetaCatServlet;
84
import edu.ucsb.nceas.metacat.MetacatHandler;
85
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
86
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
87
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
88
import edu.ucsb.nceas.metacat.properties.PropertyService;
89
import edu.ucsb.nceas.metacat.replication.ReplicationService;
90
import edu.ucsb.nceas.metacat.shared.AccessException;
91
import edu.ucsb.nceas.metacat.shared.HandlerException;
92
import edu.ucsb.nceas.metacat.util.DocumentUtil;
93
import edu.ucsb.nceas.utilities.ParseLSIDException;
94
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
95

    
96
public class SystemMetadataFactory {
97

    
98
	private static final String resourceMapPrefix = "resourceMap_";
99
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
100
	/**
101
	 * use this flag if you want to update any existing system metadata values with generated content
102
	 */
103
	private static boolean updateExisting = true;
104
	
105
	/**
106
	 * Creates a system metadata object for insertion into metacat
107
	 * 
108
	 * @param localId
109
	 *            The local document identifier
110
	 * @param user
111
	 *            The user submitting the system metadata document
112
	 * @param groups
113
	 *            The groups the user belongs to
114
	 * 
115
	 * @return sysMeta The system metadata object created
116
	 * @throws SAXException 
117
	 * @throws HandlerException 
118
	 * @throws AccessControlException 
119
	 * @throws AccessException 
120
	 */
121
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
122
			throws McdbException, McdbDocNotFoundException, SQLException,
123
			IOException, AccessionNumberException, ClassNotFoundException,
124
			InsufficientKarmaException, ParseLSIDException,
125
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
126
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
127
		
128
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
129

    
130
		// check for system metadata
131
		SystemMetadata sysMeta = null;
132
		
133
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
134
		int rev = Integer.valueOf(accNum.getRev());
135
		
136
		// get/make the guid
137
		String guid = null;
138
		try {
139
			// get the guid if it exists
140
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
141
		} catch (McdbDocNotFoundException dnfe) {
142
			// otherwise create the mapping
143
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
144
			IdentifierManager.getInstance().createMapping(localId, localId);
145
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
146
		}
147
		
148
		// look up existing system metadata if it exists
149
		Identifier identifier = new Identifier();
150
		identifier.setValue(guid);
151
		try {
152
			logMetacat.debug("Using hazelcast to get system metadata");
153
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
154
			// TODO: if this is the case, we could return here -- what else do we gain?
155
			if (!updateExisting ) {
156
				return sysMeta;
157
			}
158
		} catch (Exception e) {
159
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
160

    
161
		}
162

    
163
		if (sysMeta == null) {
164
			// create system metadata
165
			sysMeta = new SystemMetadata();
166
			sysMeta.setIdentifier(identifier);
167
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
168
			sysMeta.setArchived(false);
169
		}
170
		
171
		// get additional docinfo
172
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
173
		// set the default object format
174
		String doctype = docInfo.get("doctype");
175
		ObjectFormatIdentifier fmtid = null;
176

    
177
		// set the object format, fall back to defaults
178
		if (doctype.trim().equals("BIN")) {
179
			// we don't know much about this file (yet)
180
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
181
		} else {
182
			try {
183
				// do we know the given format?
184
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
185
			} catch (NotFound nfe) {
186
				// format is not registered, use default
187
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
188
			}
189
		}
190

    
191
		sysMeta.setFormatId(fmtid);
192
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
193

    
194
		// for retrieving the actual object
195
		InputStream inputStream = null;
196
		inputStream = MetacatHandler.read(localId);
197

    
198
		// create the checksum
199
		String algorithm = "MD5";
200
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
201
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
202
		sysMeta.setChecksum(checksum);
203
		
204
		// set the size from file on disk, don't read bytes again
205
		File fileOnDisk = getFileOnDisk(localId);
206
		long fileSize = 0;
207
		if (fileOnDisk.exists()) {
208
			fileSize = fileOnDisk.length();
209
		}
210
		sysMeta.setSize(BigInteger.valueOf(fileSize));
211
		
212
		// submitter
213
		Subject submitter = new Subject();
214
		submitter.setValue(docInfo.get("user_updated"));
215
		sysMeta.setSubmitter(submitter);
216
		
217
		// rights holder
218
		Subject owner = new Subject();
219
		owner.setValue(docInfo.get("user_owner"));
220
		sysMeta.setRightsHolder(owner);
221

    
222
		// dates
223
		String createdDateString = docInfo.get("date_created");
224
		String updatedDateString = docInfo.get("date_updated");
225
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
226
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
227
		sysMeta.setDateUploaded(createdDate);
228
		//sysMeta.setDateSysMetadataModified(updatedDate);
229
		// use current datetime 
230
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
231
		
232
		// set the revision history
233
		String docidWithoutRev = accNum.getDocid();
234
		Identifier obsoletedBy = null;
235
		Identifier obsoletes = null;
236
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
237
		// ensure this ordering since processing depends on it
238
		Collections.sort(revisions);
239
		for (int existingRev: revisions) {
240
			// use the docid+rev as the guid
241
			String existingPid = docidWithoutRev + "." + existingRev;
242
			try {
243
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
244
			} catch (McdbDocNotFoundException mdfe) {
245
				// we'll be defaulting to the local id
246
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
247
			}
248
			if (existingRev < rev) {
249
				// it's the old docid, until it's not
250
				obsoletes = new Identifier();
251
				obsoletes.setValue(existingPid);
252
			}
253
			if (existingRev > rev) {
254
				// it's the newer docid
255
				obsoletedBy = new Identifier();
256
				obsoletedBy.setValue(existingPid);
257
				// only want the version just after it
258
				break;
259
			}
260
		}
261
		// set them on our object
262
		sysMeta.setObsoletedBy(obsoletedBy);
263
		sysMeta.setObsoletes(obsoletes);
264
		
265
		// update the system metadata for the object[s] we are revising
266
		if (obsoletedBy != null) {
267
			SystemMetadata obsoletedBySysMeta = null;
268
			try {
269
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
270
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
271
			} catch (McdbDocNotFoundException e) {
272
				// ignore
273
			}
274
			if (obsoletedBySysMeta != null) {
275
				obsoletedBySysMeta.setObsoletes(identifier);
276
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
277
			}
278
		}
279
		if (obsoletes != null) {
280
			SystemMetadata obsoletesSysMeta = null;
281
			try {
282
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
283
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
284
			} catch (McdbDocNotFoundException e) {
285
				// ignore
286
			}
287
			if (obsoletesSysMeta != null) {
288
				obsoletesSysMeta.setObsoletedBy(identifier);
289
				obsoletesSysMeta.setArchived(true);
290
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
291
			}
292
		}
293
		
294
		// look up the access control policy we have in metacat
295
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
296
		sysMeta.setAccessPolicy(accessPolicy);
297
		
298
		// authoritative node
299
		NodeReference nr = new NodeReference();
300
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
301
		sysMeta.setOriginMemberNode(nr);
302
		sysMeta.setAuthoritativeMemberNode(nr);
303
		
304
		// Set a default replication policy
305
        ReplicationPolicy rp = getDefaultReplicationPolicy();
306
        if (rp != null) {
307
            sysMeta.setReplicationPolicy(rp);
308
        }
309
		
310
		// further parse EML documents to get data object format,
311
		// describes and describedBy information
312
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
313
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
314
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
315
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
316
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
317
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
318
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
319
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
320

    
321
			try {
322
				
323
				// get it again to parse the document
324
				logMetacat.debug("Re-reading document inputStream");
325
				inputStream = MetacatHandler.read(localId);
326
				
327
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
328
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
329
				
330
				// iterate through the data objects in the EML doc and add sysmeta
331
				logMetacat.debug("In createSystemMetadata() the number of data "
332
								+ "entities is: "
333
								+ emlDocument.distributionMetadata);
334

    
335
				// for generating the ORE map
336
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
337
	            List<Identifier> dataIds = new ArrayList<Identifier>();
338
				
339
				// iterate through data objects described by the EML
340
	            if (emlDocument.distributionMetadata != null) {
341
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
342
	
343
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
344
				        String dataDocUrl = distMetadata.url;
345
				        String dataDocMimeType = distMetadata.mimeType;
346
						// default to binary
347
						if (dataDocMimeType == null) {
348
							dataDocMimeType = "application/octet-stream";
349
						}
350

    
351
						// process the data
352
						boolean remoteData = false;
353
						String dataDocLocalId = null;
354
						Identifier dataGuid = new Identifier();
355

    
356
						// handle ecogrid, or downloadable data
357
						String ecogridPrefix = "ecogrid://knb/";
358
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
359
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
360
						} else {
361
							// should we try downloading the remote data?
362
							if (downloadData) {
363
								InputStream dataObject = null;
364
								try {
365
									// download the data from the URL
366
									URL dataURL = new URL(dataDocUrl);
367
									URLConnection dataConnection = dataURL.openConnection();
368
									
369
									// default is to download the data
370
									dataObject = dataConnection.getInputStream();
371

    
372
									String detectedContentType = dataConnection.getContentType();
373
									logMetacat.info("Detected content type: " + detectedContentType);
374

    
375
									if (detectedContentType != null) {
376
										// seems to be HTML from the remote location
377
										if (detectedContentType.contains("html")) {
378
											// if we are not expecting it, we skip it
379
											if (!dataDocMimeType.contains("html")) {
380
												// set to null so we don't download it
381
												dataObject = null;
382
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
383
											}
384
										}
385
										
386
									} else {
387
										// if we don't know what it is, should we skip it?
388
										dataObject = null;
389
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
390
									}
391
									
392
								} catch (Exception e) {
393
									// error with the download
394
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
395
								}
396
								
397
								if (dataObject != null) {
398
									// create the local version of it
399
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
400
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
401
									dataGuid.setValue(dataDocLocalId);
402
									
403
									// save it locally
404
									Session session = new Session();
405
									session.setSubject(submitter);
406
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
407
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
408
									
409
									remoteData = true;
410
								}
411
							}
412
							
413
						}
414
						
415
						logMetacat.debug("Data local ID: " + dataDocLocalId);
416
						logMetacat.debug("Data URL     : " + dataDocUrl);
417
						logMetacat.debug("Data mime    : " + dataDocMimeType);
418
						
419
						// check for valid docid.rev
420
						String dataDocid = null;
421
						int dataRev = 0;
422
						if (dataDocLocalId != null) {
423
							// look up the guid for the data
424
							try {
425
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
426
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
427
							} catch (Exception e) {
428
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
429
								dataDocLocalId = null;
430
							}
431
						}
432
						
433
						// now we have a local id for the data
434
						if (dataDocLocalId != null) {
435
	
436
							// check if data system metadata exists already
437
							SystemMetadata dataSysMeta = null;
438
							String dataGuidString = null;
439
							try {
440
								// look for the identifier
441
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
442
								// set it
443
								dataGuid.setValue(dataGuidString);
444
								// look up the system metadata
445
								try {
446
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
447
								} catch (Exception e) {
448
									// probably not in the system
449
									dataSysMeta = null;
450
								}
451
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
452
							} catch (McdbDocNotFoundException nf) {
453
								// we didn't find it
454
								dataSysMeta = null;
455
							}
456
								
457
							// we'll have to generate it	
458
							if (dataSysMeta == null) {
459
								// System metadata for data doesn't exist yet, so create it
460
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
461
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
462

    
463
								// now look it up again
464
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
465

    
466
								// set the guid
467
								dataGuid.setValue(dataGuidString);
468
								
469
								// inherit access rules from metadata, if we don't have our own
470
								if (remoteData) {
471
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
472
									// TODO: use access rules defined in EML, per data file
473
								}
474
	
475
							}
476
							
477
							// set object format for the data file
478
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
479
							ObjectFormatIdentifier fmt = null;
480
							try {
481
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
482
							} catch (NotFound nfe) {
483
								logMetacat.debug("Couldn't find format identifier for: "
484
												+ dataDocMimeType
485
												+ ". Setting it to application/octet-stream.");
486
								fmt = new ObjectFormatIdentifier();
487
								fmt.setValue("application/octet-stream");
488
							}
489
							dataSysMeta.setFormatId(fmt);
490

    
491
							// update the values
492
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
493
							
494
							// include as part of the ORE package
495
							dataIds.add(dataGuid);
496
	
497
						} // end if (EML package)
498
	
499
					} // end for (data entities)
500
					
501
	            } // data entities not null
502
	            
503
				// ORE map
504
				if (includeORE) {
505
					// can we generate them?
506
			        if (!dataIds.isEmpty()) {
507
			        	// it doesn't exist in the system?
508
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
509
			        	
510
				            // generate the ORE map for this datapackage
511
				            Identifier resourceMapId = new Identifier();
512
				            // use the local id, not the guid in case we have DOIs for them already
513
				            resourceMapId.setValue(resourceMapPrefix + localId);
514
				            idMap.put(sysMeta.getIdentifier(), dataIds);
515
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
516
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
517
				            // copy most of the same system metadata as the packaging metadata
518
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
519
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
520
				            resourceMapSysMeta.setIdentifier(resourceMapId);
521
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
522
							resourceMapSysMeta.setChecksum(oreChecksum);
523
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
524
							resourceMapSysMeta.setFormatId(formatId);
525
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
526
							
527
							// set the revision graph
528
							resourceMapSysMeta.setObsoletes(null);
529
							resourceMapSysMeta.setObsoletedBy(null);
530
							// look up the resource map that this one obsoletes
531
							if (sysMeta.getObsoletes() != null) {
532
								Identifier resourceMapObsoletes = new Identifier();
533
								resourceMapObsoletes.setValue(resourceMapPrefix + sysMeta.getObsoletes().getValue());
534
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
535
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
536
								if (resourceMapObsoletesSystemMetadata != null) {
537
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
538
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
539
								}
540
							}
541
							// look up the resource map that this one is obsoletedBy
542
							if (sysMeta.getObsoletedBy() != null) {
543
								Identifier resourceMapObsoletedBy = new Identifier();
544
								resourceMapObsoletedBy.setValue(resourceMapPrefix + sysMeta.getObsoletedBy().getValue());
545
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
546
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
547
								if (resourceMapObsoletedBySystemMetadata != null) {
548
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
549
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
550
								}
551
							}
552
				            
553
							// save it locally, if it doesn't already exist
554
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
555
								Session session = new Session();
556
								session.setSubject(submitter);
557
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
558
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
559
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
560
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
561
							}
562
			        	}
563
			        }
564
				}
565

    
566
			} catch (ParserConfigurationException pce) {
567
				logMetacat.debug("There was a problem parsing the EML document. "
568
								+ "The error message was: " + pce.getMessage());
569

    
570
			} catch (SAXException saxe) {
571
				logMetacat.debug("There was a problem traversing the EML document. "
572
								+ "The error message was: " + saxe.getMessage());
573

    
574
			} catch (XPathExpressionException xpee) {
575
				logMetacat.debug("There was a problem searching the EML document. "
576
								+ "The error message was: " + xpee.getMessage());
577
			} catch (Exception e) {
578
				logMetacat.debug("There was a problem creating System Metadata. "
579
								+ "The error message was: " + e.getMessage());
580
				e.printStackTrace();
581
			} // end try()
582

    
583
		} // end if()
584

    
585
		return sysMeta;
586
	}
587

    
588
    /**
589
     * Generate SystemMetadata for any object in the object store that does
590
     * not already have it.  SystemMetadata documents themselves, are, of course,
591
     * exempt.  This is a utility method for migration of existing object 
592
     * stores to DataONE where SystemMetadata is required for all objects.
593
     * @param idList
594
     * @param includeOre
595
     * @param downloadData
596
     * @throws PropertyNotFoundException
597
     * @throws NoSuchAlgorithmException
598
     * @throws AccessionNumberException
599
     * @throws SQLException
600
	 * @throws SAXException 
601
	 * @throws HandlerException 
602
	 * @throws JiBXException 
603
	 * @throws BaseException 
604
	 * @throws ParseLSIDException 
605
	 * @throws InsufficientKarmaException 
606
	 * @throws ClassNotFoundException 
607
	 * @throws IOException 
608
	 * @throws McdbException 
609
	 * @throws AccessException 
610
	 * @throws AccessControlException 
611
     */
612
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
613
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
614
    {
615
        
616
        for (String localId : idList) { 
617
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
618
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
619

    
620
            SystemMetadata sm = null;
621

    
622
            //generate required system metadata fields from the document
623
            try {
624
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
625
            } catch (Exception e) {
626
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
627
				continue;
628
			}
629
            
630
            //insert the systemmetadata object or just update it as needed
631
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
632
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
633
            
634
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
635

    
636
        }
637
        logMetacat.info("done generating system metadata for given list");
638
    }
639
    
640
	/**
641
	 * Determines if we already have registered an ORE map for this package
642
	 * @param guid of the EML/packaging object
643
	 * @return true if there is an ORE map for the given package
644
	 */
645
	private static boolean oreExistsFor(Identifier guid) {
646
		// TODO: implement call to CN.search()
647
		return false;
648
	}
649

    
650
	/**
651
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
652
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
653
	 * evaluated.
654
	 * 
655
	 * @param is The InputStream of bytes
656
	 * 
657
	 * @return size The size in bytes of the input stream as a long
658
	 * 
659
	 * @throws IOException
660
	 */
661
	private static long sizeOfStream(InputStream is) throws IOException {
662

    
663
		long size = 0;
664
		byte[] b = new byte[1024];
665
		int numread = is.read(b, 0, 1024);
666
		while (numread != -1) {
667
			size += numread;
668
			numread = is.read(b, 0, 1024);
669
		}
670
		return size;
671

    
672
	}
673
	
674
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
675
		
676
		DocumentImpl doc = new DocumentImpl(docid, false);
677
		String filepath = null;
678
		String filename = null;
679

    
680
		// deal with data or metadata cases
681
		if (doc.getRootNodeID() == 0) {
682
			// this is a data file
683
			filepath = PropertyService.getProperty("application.datafilepath");
684
		} else {
685
			filepath = PropertyService.getProperty("application.documentfilepath");
686
		}
687
		// ensure it is a directory path
688
		if (!(filepath.endsWith("/"))) {
689
			filepath += "/";
690
		}
691
		filename = filepath + docid;
692
		File documentFile = new File(filename);
693
		
694
		return documentFile;
695
	}
696

    
697
	/**
698
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
699
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
700
	 * than or equal to zero, no policy needs to be set, so return null.
701
	 * @return ReplicationPolicy, or null if no replication policy is needed
702
	 */
703
    private static ReplicationPolicy getDefaultReplicationPolicy() {
704
        ReplicationPolicy rp = null;
705
        int numReplicas = -1;
706
        try {
707
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
708
        } catch (NumberFormatException e) {
709
            // The property is not a valid integer, so return a null policy
710
            return null;
711
        } catch (PropertyNotFoundException e) {
712
            // The property is not found, so return a null policy
713
            return null;
714
        }
715
        
716
        if (numReplicas > 0) {
717
            rp = new ReplicationPolicy();
718
            rp.setReplicationAllowed(true);
719
            rp.setNumberReplicas(numReplicas);
720
            try {
721
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
722
                if (preferredNodeList != null) {
723
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
724
                    if (pNodes != null && !pNodes.isEmpty()) {
725
                        rp.setPreferredMemberNodeList(pNodes);
726
                    }
727
                }
728
            } catch (PropertyNotFoundException e) {
729
                // No preferred list found in properties, so just ignore it; no action needed
730
            }
731
            try {
732
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
733
                if (blockedNodeList != null) {
734
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
735
                    if (bNodes != null && !bNodes.isEmpty()) {
736
                        rp.setBlockedMemberNodeList(bNodes);
737
                    }
738
                }
739
            } catch (PropertyNotFoundException e) {
740
                // No blocked list found in properties, so just ignore it; no action needed
741
            }
742
        }
743
        return rp;
744
    }
745

    
746
    /**
747
     * Extract a List of NodeReferences from a String listing the node identifiers where
748
     * each identifier is separated by whitespace, comma, or semicolon characters.
749
     * @param nodeString the string containing the list of nodes
750
     * @return the List of NodeReference objects parsed from the input string
751
     */
752
    private static List<NodeReference> extractNodeReferences(String nodeString) {
753
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
754
        String[] result = nodeString.split("[,;\\s]");
755
        for (String r : result) {
756
        	if (r != null && r.length() > 0) {
757
	            NodeReference noderef = new NodeReference();
758
	            noderef.setValue(r);
759
	            nodeList.add(noderef);
760
	        }
761
        }
762
        return nodeList;
763
    }
764
}
(5-5/5)