Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: jones $'
9
 *     '$Date: 2012-02-07 00:02:18 -0800 (Tue, 07 Feb 2012) $'
10
 * '$Revision: 6988 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.MockHttpServletRequest;
52
import org.dataone.client.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.ore.ResourceMapFactory;
57
import org.dataone.service.exceptions.BaseException;
58
import org.dataone.service.exceptions.NotFound;
59
import org.dataone.service.types.v1.AccessPolicy;
60
import org.dataone.service.types.v1.Checksum;
61
import org.dataone.service.types.v1.Identifier;
62
import org.dataone.service.types.v1.NodeReference;
63
import org.dataone.service.types.v1.ObjectFormatIdentifier;
64
import org.dataone.service.types.v1.ReplicationPolicy;
65
import org.dataone.service.types.v1.Session;
66
import org.dataone.service.types.v1.Subject;
67
import org.dataone.service.types.v1.SystemMetadata;
68
import org.dataone.service.types.v1.util.ChecksumUtil;
69
import org.dataone.service.util.DateTimeMarshaller;
70
import org.dspace.foresite.ResourceMap;
71
import org.jibx.runtime.JiBXException;
72
import org.xml.sax.SAXException;
73

    
74
import edu.ucsb.nceas.metacat.AccessionNumber;
75
import edu.ucsb.nceas.metacat.AccessionNumberException;
76
import edu.ucsb.nceas.metacat.DBUtil;
77
import edu.ucsb.nceas.metacat.DocumentImpl;
78
import edu.ucsb.nceas.metacat.IdentifierManager;
79
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
80
import edu.ucsb.nceas.metacat.McdbException;
81
import edu.ucsb.nceas.metacat.MetaCatServlet;
82
import edu.ucsb.nceas.metacat.MetacatHandler;
83
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
84
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
85
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
86
import edu.ucsb.nceas.metacat.properties.PropertyService;
87
import edu.ucsb.nceas.metacat.replication.ReplicationService;
88
import edu.ucsb.nceas.metacat.shared.AccessException;
89
import edu.ucsb.nceas.metacat.shared.HandlerException;
90
import edu.ucsb.nceas.metacat.util.DocumentUtil;
91
import edu.ucsb.nceas.utilities.ParseLSIDException;
92
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
93

    
94
public class SystemMetadataFactory {
95

    
96
	private static final String resourceMapPrefix = "resourceMap_";
97
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
98
	/**
99
	 * use this flag if you want to update any existing system metadata values with generated content
100
	 */
101
	private static boolean updateExisting = true;
102
	
103
	/**
104
	 * Creates a system metadata object for insertion into metacat
105
	 * 
106
	 * @param localId
107
	 *            The local document identifier
108
	 * @param user
109
	 *            The user submitting the system metadata document
110
	 * @param groups
111
	 *            The groups the user belongs to
112
	 * 
113
	 * @return sysMeta The system metadata object created
114
	 * @throws SAXException 
115
	 * @throws HandlerException 
116
	 * @throws AccessControlException 
117
	 * @throws AccessException 
118
	 */
119
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
120
			throws McdbException, McdbDocNotFoundException, SQLException,
121
			IOException, AccessionNumberException, ClassNotFoundException,
122
			InsufficientKarmaException, ParseLSIDException,
123
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
124
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
125
		
126
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
127

    
128
		// check for system metadata
129
		SystemMetadata sysMeta = null;
130
		
131
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
132
		int rev = Integer.valueOf(accNum.getRev());
133
		
134
		// get/make the guid
135
		String guid = null;
136
		try {
137
			// get the guid if it exists
138
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
139
		} catch (McdbDocNotFoundException dnfe) {
140
			// otherwise create the mapping
141
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
142
			IdentifierManager.getInstance().createMapping(localId, localId);
143
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
144
		}
145
		
146
		// look up existing system metadata if it exists
147
		Identifier identifier = new Identifier();
148
		identifier.setValue(guid);
149
		try {
150
			logMetacat.debug("Using hazelcast to get system metadata");
151
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
152
			// TODO: if this is the case, we could return here -- what else do we gain?
153
			if (!updateExisting ) {
154
				return sysMeta;
155
			}
156
		} catch (Exception e) {
157
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
158

    
159
		}
160

    
161
		if (sysMeta == null) {
162
			// create system metadata
163
			sysMeta = new SystemMetadata();
164
			sysMeta.setIdentifier(identifier);
165
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
166
			sysMeta.setArchived(false);
167
		}
168
		
169
		// get additional docinfo
170
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
171
		// set the default object format
172
		String doctype = docInfo.get("doctype");
173
		ObjectFormatIdentifier fmtid = null;
174

    
175
		// set the object format, fall back to defaults
176
		if (doctype.trim().equals("BIN")) {
177
			// we don't know much about this file (yet)
178
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
179
		} else {
180
			try {
181
				// do we know the given format?
182
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
183
			} catch (NotFound nfe) {
184
				// format is not registered, use default
185
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
186
			}
187
		}
188

    
189
		sysMeta.setFormatId(fmtid);
190
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
191

    
192
		// for retrieving the actual object
193
		InputStream inputStream = null;
194
		inputStream = MetacatHandler.read(localId);
195

    
196
		// create the checksum
197
		String algorithm = "MD5";
198
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
199
		sysMeta.setChecksum(checksum);
200
		
201
		// set the size from file on disk, don't read bytes again
202
		File fileOnDisk = getFileOnDisk(localId);
203
		long fileSize = 0;
204
		if (fileOnDisk.exists()) {
205
			fileSize = fileOnDisk.length();
206
		}
207
		sysMeta.setSize(BigInteger.valueOf(fileSize));
208
		
209
		// submitter
210
		Subject submitter = new Subject();
211
		submitter.setValue(docInfo.get("user_updated"));
212
		sysMeta.setSubmitter(submitter);
213
		
214
		// rights holder
215
		Subject owner = new Subject();
216
		owner.setValue(docInfo.get("user_owner"));
217
		sysMeta.setRightsHolder(owner);
218

    
219
		// dates
220
		String createdDateString = docInfo.get("date_created");
221
		String updatedDateString = docInfo.get("date_updated");
222
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
223
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
224
		sysMeta.setDateUploaded(createdDate);
225
		sysMeta.setDateSysMetadataModified(updatedDate);
226
		
227
		// set the revision history
228
		String docidWithoutRev = accNum.getDocid();
229
		Identifier obsoletedBy = null;
230
		Identifier obsoletes = null;
231
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
232
		// ensure this ordering since processing depends on it
233
		Collections.sort(revisions);
234
		for (int existingRev: revisions) {
235
			// use the docid+rev as the guid
236
			String existingPid = docidWithoutRev + "." + existingRev;
237
			if (existingRev < rev) {
238
				// it's the old docid, until it's not
239
				obsoletes = new Identifier();
240
				obsoletes.setValue(existingPid);
241
			}
242
			if (existingRev > rev) {
243
				// it's the newer docid
244
				obsoletedBy = new Identifier();
245
				obsoletedBy.setValue(existingPid);
246
				// only want the version just after it
247
				break;
248
			}
249
		}
250
		// set them on our object
251
		sysMeta.setObsoletedBy(obsoletedBy);
252
		sysMeta.setObsoletes(obsoletes);
253
		
254
		// update the system metadata for the object[s] we are revising
255
		if (obsoletedBy != null) {
256
			SystemMetadata obsoletedBySysMeta = null;
257
			try {
258
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
259
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
260
			} catch (McdbDocNotFoundException e) {
261
				// ignore
262
			}
263
			if (obsoletedBySysMeta != null) {
264
				obsoletedBySysMeta.setObsoletes(identifier);
265
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
266
			}
267
		}
268
		if (obsoletes != null) {
269
			SystemMetadata obsoletesSysMeta = null;
270
			try {
271
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
272
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
273
			} catch (McdbDocNotFoundException e) {
274
				// ignore
275
			}
276
			if (obsoletesSysMeta != null) {
277
				obsoletesSysMeta.setObsoletedBy(identifier);
278
				obsoletesSysMeta.setArchived(true);
279
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
280
			}
281
		}
282
		
283
		// look up the access control policy we have in metacat
284
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
285
		sysMeta.setAccessPolicy(accessPolicy);
286
		
287
		// authoritative node
288
		NodeReference nr = new NodeReference();
289
		nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
290
		sysMeta.setOriginMemberNode(nr);
291
		sysMeta.setAuthoritativeMemberNode(nr);
292
		
293
		// Set a default replication policy
294
        ReplicationPolicy rp = getDefaultReplicationPolicy();
295
        if (rp != null) {
296
            sysMeta.setReplicationPolicy(rp);
297
        }
298
		
299
		// further parse EML documents to get data object format,
300
		// describes and describedBy information
301
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
302
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
303
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
304
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
305
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
306
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
307
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
308
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
309

    
310
			try {
311
				
312
				// get it again to parse the document
313
				logMetacat.debug("Re-reading document inputStream");
314
				inputStream = MetacatHandler.read(localId);
315
				
316
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
317
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
318
				
319
				// iterate through the data objects in the EML doc and add sysmeta
320
				logMetacat.debug("In createSystemMetadata() the number of data "
321
								+ "entities is: "
322
								+ emlDocument.distributionMetadata);
323

    
324
				// for generating the ORE map
325
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
326
	            List<Identifier> dataIds = new ArrayList<Identifier>();
327
				
328
				// iterate through data objects described by the EML
329
	            if (emlDocument.distributionMetadata != null) {
330
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
331
	
332
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
333
				        String dataDocUrl = distMetadata.url;
334
				        String dataDocMimeType = distMetadata.mimeType;
335
						// default to binary
336
						if (dataDocMimeType == null) {
337
							dataDocMimeType = "application/octet-stream";
338
						}
339

    
340
						// process the data
341
						boolean remoteData = false;
342
						String dataDocLocalId = null;
343
						Identifier dataGuid = new Identifier();
344

    
345
						// handle ecogrid, or downloadable data
346
						String ecogridPrefix = "ecogrid://knb/";
347
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
348
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
349
						} else {
350
							// should we try downloading the remote data?
351
							if (downloadData) {
352
								InputStream dataObject = null;
353
								try {
354
									// download the data from the URL
355
									URL dataURL = new URL(dataDocUrl);
356
									URLConnection dataConnection = dataURL.openConnection();
357
									
358
									// default is to download the data
359
									dataObject = dataConnection.getInputStream();
360

    
361
									String detectedContentType = dataConnection.getContentType();
362
									logMetacat.info("Detected content type: " + detectedContentType);
363

    
364
									if (detectedContentType != null) {
365
										// seems to be HTML from the remote location
366
										if (detectedContentType.contains("html")) {
367
											// if we are not expecting it, we skip it
368
											if (!dataDocMimeType.contains("html")) {
369
												// set to null so we don't download it
370
												dataObject = null;
371
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
372
											}
373
										}
374
										
375
									} else {
376
										// if we don't know what it is, should we skip it?
377
										dataObject = null;
378
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
379
									}
380
									
381
								} catch (Exception e) {
382
									// error with the download
383
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
384
								}
385
								
386
								if (dataObject != null) {
387
									// create the local version of it
388
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
389
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
390
									dataGuid.setValue(dataDocLocalId);
391
									
392
									// save it locally
393
									Session session = new Session();
394
									session.setSubject(submitter);
395
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
396
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
397
									
398
									remoteData = true;
399
								}
400
							}
401
							
402
						}
403
						
404
						logMetacat.debug("Data local ID: " + dataDocLocalId);
405
						logMetacat.debug("Data URL     : " + dataDocUrl);
406
						logMetacat.debug("Data mime    : " + dataDocMimeType);
407
						
408
						// now we have a local id for the data
409
						if (dataDocLocalId != null) {
410
							
411
							// look up the guid for the data
412
							String dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
413
							int dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
414
	
415
							// check if data system metadata exists already
416
							SystemMetadata dataSysMeta = null;
417
							String dataGuidString = null;
418
							try {
419
								// look for the identifier
420
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
421
								// set it
422
								dataGuid.setValue(dataGuidString);
423
								// look up the system metadata
424
								try {
425
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
426
								} catch (Exception e) {
427
									// probably not in the system
428
									dataSysMeta = null;
429
								}
430
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
431
							} catch (McdbDocNotFoundException nf) {
432
								// we didn't find it
433
								dataSysMeta = null;
434
							}
435
								
436
							// we'll have to generate it	
437
							if (dataSysMeta == null) {
438
								// System metadata for data doesn't exist yet, so create it
439
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
440
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
441

    
442
								// now look it up again
443
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
444

    
445
								// set the guid
446
								dataGuid.setValue(dataGuidString);
447
								
448
								// inherit access rules from metadata, if we don't have our own
449
								if (remoteData) {
450
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
451
									// TODO: use access rules defined in EML, per data file
452
								}
453
	
454
							}
455
							
456
							// set object format for the data file
457
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
458
							ObjectFormatIdentifier fmt = null;
459
							try {
460
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
461
							} catch (NotFound nfe) {
462
								logMetacat.debug("Couldn't find format identifier for: "
463
												+ dataDocMimeType
464
												+ ". Setting it to application/octet-stream.");
465
								fmt = new ObjectFormatIdentifier();
466
								fmt.setValue("application/octet-stream");
467
							}
468
							dataSysMeta.setFormatId(fmt);
469

    
470
							// update the values
471
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
472
							
473
							// include as part of the ORE package
474
							dataIds.add(dataGuid);
475
	
476
						} // end if (EML package)
477
	
478
					} // end for (data entities)
479
					
480
	            } // data entities not null
481
	            
482
				// ORE map
483
				if (includeORE) {
484
					// can we generate them?
485
			        if (!dataIds.isEmpty()) {
486
			        	// it doesn't exist in the system?
487
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
488
			        	
489
				            // generate the ORE map for this datapackage
490
				            Identifier resourceMapId = new Identifier();
491
				            // want to be able to run this over and over again for now
492
				            resourceMapId.setValue(resourceMapPrefix + sysMeta.getIdentifier().getValue());
493
				            idMap.put(sysMeta.getIdentifier(), dataIds);
494
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
495
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
496
				            // copy most of the same system metadata as the packaging metadata
497
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
498
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
499
				            resourceMapSysMeta.setIdentifier(resourceMapId);
500
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), "MD5");
501
							resourceMapSysMeta.setChecksum(oreChecksum);
502
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
503
							resourceMapSysMeta.setFormatId(formatId);
504
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
505
							
506
							// set the revision graph
507
							resourceMapSysMeta.setObsoletes(null);
508
							resourceMapSysMeta.setObsoletedBy(null);
509
							// look up the resource map that this one obsoletes
510
							if (sysMeta.getObsoletes() != null) {
511
								Identifier resourceMapObsoletes = new Identifier();
512
								resourceMapObsoletes.setValue(resourceMapPrefix + sysMeta.getObsoletes().getValue());
513
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
514
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
515
								if (resourceMapObsoletesSystemMetadata != null) {
516
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
517
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
518
								}
519
							}
520
							// look up the resource map that this one is obsoletedBy
521
							if (sysMeta.getObsoletedBy() != null) {
522
								Identifier resourceMapObsoletedBy = new Identifier();
523
								resourceMapObsoletedBy.setValue(resourceMapPrefix + sysMeta.getObsoletedBy().getValue());
524
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
525
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
526
								if (resourceMapObsoletedBySystemMetadata != null) {
527
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
528
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
529
								}
530
							}
531
				            
532
							// save it locally, if it doesn't already exist
533
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
534
								Session session = new Session();
535
								session.setSubject(submitter);
536
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
537
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
538
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
539
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
540
							}
541
			        	}
542
			        }
543
				}
544

    
545
			} catch (ParserConfigurationException pce) {
546
				logMetacat.debug("There was a problem parsing the EML document. "
547
								+ "The error message was: " + pce.getMessage());
548

    
549
			} catch (SAXException saxe) {
550
				logMetacat.debug("There was a problem traversing the EML document. "
551
								+ "The error message was: " + saxe.getMessage());
552

    
553
			} catch (XPathExpressionException xpee) {
554
				logMetacat.debug("There was a problem searching the EML document. "
555
								+ "The error message was: " + xpee.getMessage());
556
			} catch (Exception e) {
557
				logMetacat.debug("There was a problem creating System Metadata. "
558
								+ "The error message was: " + e.getMessage());
559
				e.printStackTrace();
560
			} // end try()
561

    
562
		} // end if()
563

    
564
		return sysMeta;
565
	}
566

    
567
    /**
568
     * Generate SystemMetadata for any object in the object store that does
569
     * not already have it.  SystemMetadata documents themselves, are, of course,
570
     * exempt.  This is a utility method for migration of existing object 
571
     * stores to DataONE where SystemMetadata is required for all objects.
572
     * @param idList
573
     * @param includeOre
574
     * @param downloadData
575
     * @throws PropertyNotFoundException
576
     * @throws NoSuchAlgorithmException
577
     * @throws AccessionNumberException
578
     * @throws SQLException
579
	 * @throws SAXException 
580
	 * @throws HandlerException 
581
	 * @throws JiBXException 
582
	 * @throws BaseException 
583
	 * @throws ParseLSIDException 
584
	 * @throws InsufficientKarmaException 
585
	 * @throws ClassNotFoundException 
586
	 * @throws IOException 
587
	 * @throws McdbException 
588
	 * @throws AccessException 
589
	 * @throws AccessControlException 
590
     */
591
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
592
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
593
    {
594
        
595
        for (String localId : idList) { 
596
            //for each id, add a system metadata doc
597
        	generateSystemMetadata(localId, includeOre, downloadData);
598
        }
599
        logMetacat.info("done generating system metadata for given list");
600
    }
601
    
602

    
603
    /**
604
     * Generate SystemMetadata for a particular object with identifier localId.
605
     * This is a utility method for migration of existing objects 
606
     * to DataONE where SystemMetadata is required for all objects.
607
     * @param localId
608
     * @param includeOre
609
     * @param downloadData
610
     * @throws PropertyNotFoundException
611
     * @throws NoSuchAlgorithmException
612
     * @throws AccessionNumberException
613
     * @throws SQLException
614
     * @throws SAXException 
615
     * @throws HandlerException 
616
     * @throws JiBXException 
617
     * @throws BaseException 
618
     * @throws ParseLSIDException 
619
     * @throws InsufficientKarmaException 
620
     * @throws ClassNotFoundException 
621
     * @throws IOException 
622
     * @throws McdbException 
623
     * @throws AccessException 
624
     * @throws AccessControlException 
625
     */
626
    protected static void generateSystemMetadata(String localId, boolean includeOre, boolean downloadData) 
627
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
628
    {
629
    	logMetacat.debug("Creating SystemMetadata for localId " + localId);
630
        SystemMetadata sm = null;
631

    
632
        //generate required system metadata fields from the document
633
    	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
634
    	
635
        //insert the systemmetadata object or just update it as needed
636
        boolean exists = IdentifierManager.getInstance().systemMetadataExists(sm.getIdentifier().getValue());
637
        if (!exists) {
638
        	IdentifierManager.getInstance().createSystemMetadata(sm);
639
        	logMetacat.info("Generated SystemMetadata for " + localId);
640
        } else {
641
        	IdentifierManager.getInstance().updateSystemMetadata(sm);
642
        	logMetacat.info("Updated SystemMetadata for " + localId);
643
        }
644
    }
645
	
646
	/**
647
	 * Determines if we already have registered an ORE map for this package
648
	 * @param guid of the EML/packaging object
649
	 * @return true if there is an ORE map for the given package
650
	 */
651
	private static boolean oreExistsFor(Identifier guid) {
652
		// TODO: implement call to CN.search()
653
		return false;
654
	}
655

    
656
	/**
657
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
658
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
659
	 * evaluated.
660
	 * 
661
	 * @param is The InputStream of bytes
662
	 * 
663
	 * @return size The size in bytes of the input stream as a long
664
	 * 
665
	 * @throws IOException
666
	 */
667
	private static long sizeOfStream(InputStream is) throws IOException {
668

    
669
		long size = 0;
670
		byte[] b = new byte[1024];
671
		int numread = is.read(b, 0, 1024);
672
		while (numread != -1) {
673
			size += numread;
674
			numread = is.read(b, 0, 1024);
675
		}
676
		return size;
677

    
678
	}
679
	
680
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
681
		
682
		DocumentImpl doc = new DocumentImpl(docid, false);
683
		String filepath = null;
684
		String filename = null;
685

    
686
		// deal with data or metadata cases
687
		if (doc.getRootNodeID() == 0) {
688
			// this is a data file
689
			filepath = PropertyService.getProperty("application.datafilepath");
690
		} else {
691
			filepath = PropertyService.getProperty("application.documentfilepath");
692
		}
693
		// ensure it is a directory path
694
		if (!(filepath.endsWith("/"))) {
695
			filepath += "/";
696
		}
697
		filename = filepath + docid;
698
		File documentFile = new File(filename);
699
		
700
		return documentFile;
701
	}
702

    
703
	/**
704
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
705
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
706
	 * than or equal to zero, no policy needs to be set, so return null.
707
	 * @return ReplicationPolicy, or null if no replication policy is needed
708
	 */
709
    private static ReplicationPolicy getDefaultReplicationPolicy() {
710
        ReplicationPolicy rp = null;
711
        int numReplicas = -1;
712
        try {
713
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
714
        } catch (NumberFormatException e) {
715
            // The property is not a valid integer, so return a null policy
716
            return null;
717
        } catch (PropertyNotFoundException e) {
718
            // The property is not found, so return a null policy
719
            return null;
720
        }
721
        
722
        if (numReplicas > 0) {
723
            rp = new ReplicationPolicy();
724
            rp.setReplicationAllowed(true);
725
            rp.setNumberReplicas(numReplicas);
726
            try {
727
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
728
                if (preferredNodeList != null) {
729
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
730
                    if (pNodes != null && !pNodes.isEmpty()) {
731
                        rp.setPreferredMemberNodeList(pNodes);
732
                    }
733
                }
734
            } catch (PropertyNotFoundException e) {
735
                // No preferred list found in properties, so just ignore it; no action needed
736
            }
737
            try {
738
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
739
                if (blockedNodeList != null) {
740
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
741
                    if (bNodes != null && !bNodes.isEmpty()) {
742
                        rp.setPreferredMemberNodeList(bNodes);
743
                    }
744
                }
745
            } catch (PropertyNotFoundException e) {
746
                // No blocked list found in properties, so just ignore it; no action needed
747
            }
748
        }
749
        return rp;
750
    }
751

    
752
    /**
753
     * Extract a List of NodeReferences froma String listing the node identifiers where
754
     * each identifier is separated by whitespace, comma, or semicolon characters.
755
     * @param nodeString the string containing the list of nodes
756
     * @return the List of NodeReference objects parsed fromt he input string
757
     */
758
    private static List<NodeReference> extractNodeReferences(String nodeString) {
759
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
760
        String[] result = nodeString.split("[,;\\s]*");
761
        for (String r : result) {
762
            NodeReference noderef = new NodeReference();
763
            noderef.setValue(r);
764
            nodeList.add(noderef);
765
        }
766
        return nodeList;
767
    }
768
}
(5-5/5)