Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2013-09-12 13:45:05 -0700 (Thu, 12 Sep 2013) $'
10
 * '$Revision: 8189 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32
import java.math.BigInteger;
33
import java.net.URL;
34
import java.net.URLConnection;
35
import java.security.NoSuchAlgorithmException;
36
import java.sql.SQLException;
37
import java.util.ArrayList;
38
import java.util.Collections;
39
import java.util.Date;
40
import java.util.HashMap;
41
import java.util.Hashtable;
42
import java.util.List;
43
import java.util.Map;
44
import java.util.Vector;
45

    
46
import javax.xml.parsers.ParserConfigurationException;
47
import javax.xml.xpath.XPathExpressionException;
48

    
49
import org.apache.commons.beanutils.BeanUtils;
50
import org.apache.commons.io.IOUtils;
51
import org.apache.log4j.Logger;
52
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
53
import org.dataone.client.ObjectFormatCache;
54
import org.dataone.eml.DataoneEMLParser;
55
import org.dataone.eml.EMLDocument;
56
import org.dataone.eml.EMLDocument.DistributionMetadata;
57
import org.dataone.ore.ResourceMapFactory;
58
import org.dataone.service.exceptions.BaseException;
59
import org.dataone.service.exceptions.NotFound;
60
import org.dataone.service.types.v1.AccessPolicy;
61
import org.dataone.service.types.v1.AccessRule;
62
import org.dataone.service.types.v1.Checksum;
63
import org.dataone.service.types.v1.Identifier;
64
import org.dataone.service.types.v1.NodeReference;
65
import org.dataone.service.types.v1.ObjectFormatIdentifier;
66
import org.dataone.service.types.v1.ReplicationPolicy;
67
import org.dataone.service.types.v1.Session;
68
import org.dataone.service.types.v1.Subject;
69
import org.dataone.service.types.v1.SystemMetadata;
70
import org.dataone.service.types.v1.util.ChecksumUtil;
71
import org.dataone.service.util.DateTimeMarshaller;
72
import org.dspace.foresite.ResourceMap;
73
import org.jibx.runtime.JiBXException;
74
import org.w3c.dom.Node;
75
import org.w3c.dom.NodeList;
76
import org.xml.sax.SAXException;
77

    
78
import java.util.Calendar;
79

    
80
import edu.ucsb.nceas.metacat.AccessionNumber;
81
import edu.ucsb.nceas.metacat.AccessionNumberException;
82
import edu.ucsb.nceas.metacat.DBUtil;
83
import edu.ucsb.nceas.metacat.DocumentImpl;
84
import edu.ucsb.nceas.metacat.IdentifierManager;
85
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
86
import edu.ucsb.nceas.metacat.McdbException;
87
import edu.ucsb.nceas.metacat.MetaCatServlet;
88
import edu.ucsb.nceas.metacat.MetacatHandler;
89
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
90
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
91
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
92
import edu.ucsb.nceas.metacat.properties.PropertyService;
93
import edu.ucsb.nceas.metacat.replication.ReplicationService;
94
import edu.ucsb.nceas.metacat.shared.AccessException;
95
import edu.ucsb.nceas.metacat.shared.HandlerException;
96
import edu.ucsb.nceas.metacat.util.DocumentUtil;
97
import edu.ucsb.nceas.utilities.ParseLSIDException;
98
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
99
import edu.ucsb.nceas.utilities.XMLUtilities;
100

    
101
public class SystemMetadataFactory {
102

    
103
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
104
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
105
	/**
106
	 * use this flag if you want to update any existing system metadata values with generated content
107
	 */
108
	private static boolean updateExisting = true;
109
	
110
	/**
111
	 * Creates a system metadata object for insertion into metacat
112
	 * 
113
	 * @param localId
114
	 *            The local document identifier
115
	 * @param user
116
	 *            The user submitting the system metadata document
117
	 * @param groups
118
	 *            The groups the user belongs to
119
	 * 
120
	 * @return sysMeta The system metadata object created
121
	 * @throws SAXException 
122
	 * @throws HandlerException 
123
	 * @throws AccessControlException 
124
	 * @throws AccessException 
125
	 */
126
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
127
			throws McdbException, McdbDocNotFoundException, SQLException,
128
			IOException, AccessionNumberException, ClassNotFoundException,
129
			InsufficientKarmaException, ParseLSIDException,
130
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
131
			JiBXException, AccessControlException, HandlerException, SAXException, AccessException {
132
		
133
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
134

    
135
		// check for system metadata
136
		SystemMetadata sysMeta = null;
137
		
138
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
139
		int rev = Integer.valueOf(accNum.getRev());
140
		
141
		// get/make the guid
142
		String guid = null;
143
		try {
144
			// get the guid if it exists
145
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
146
		} catch (McdbDocNotFoundException dnfe) {
147
			// otherwise create the mapping
148
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
149
			IdentifierManager.getInstance().createMapping(localId, localId);
150
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
151
		}
152
		
153
		// look up existing system metadata if it exists
154
		Identifier identifier = new Identifier();
155
		identifier.setValue(guid);
156
		try {
157
			logMetacat.debug("Using hazelcast to get system metadata");
158
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
159
			// TODO: if this is the case, we could return here -- what else do we gain?
160
			if (!updateExisting ) {
161
				return sysMeta;
162
			}
163
		} catch (Exception e) {
164
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
165

    
166
		}
167

    
168
		if (sysMeta == null) {
169
			// create system metadata
170
			sysMeta = new SystemMetadata();
171
			sysMeta.setIdentifier(identifier);
172
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
173
			sysMeta.setArchived(false);
174
		}
175
		
176
		// get additional docinfo
177
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
178
		// set the default object format
179
		String doctype = docInfo.get("doctype");
180
		ObjectFormatIdentifier fmtid = null;
181

    
182
		// set the object format, fall back to defaults
183
		if (doctype.trim().equals("BIN")) {
184
			// we don't know much about this file (yet)
185
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
186
		} else if (doctype.trim().equals("metadata")) {
187
			// special ESRI FGDC format
188
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
189
		} else {
190
			try {
191
				// do we know the given format?
192
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
193
			} catch (NotFound nfe) {
194
				// format is not registered, use default
195
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
196
			}
197
		}
198

    
199
		sysMeta.setFormatId(fmtid);
200
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
201

    
202
		// for retrieving the actual object
203
		InputStream inputStream = null;
204
		inputStream = MetacatHandler.read(localId);
205

    
206
		// create the checksum
207
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
208
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
209
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
210
		sysMeta.setChecksum(checksum);
211
		
212
		// set the size from file on disk, don't read bytes again
213
		File fileOnDisk = getFileOnDisk(localId);
214
		long fileSize = 0;
215
		if (fileOnDisk.exists()) {
216
			fileSize = fileOnDisk.length();
217
		}
218
		sysMeta.setSize(BigInteger.valueOf(fileSize));
219
		
220
		// submitter
221
		Subject submitter = new Subject();
222
		submitter.setValue(docInfo.get("user_updated"));
223
		sysMeta.setSubmitter(submitter);
224
		
225
		// rights holder
226
		Subject owner = new Subject();
227
		owner.setValue(docInfo.get("user_owner"));
228
		sysMeta.setRightsHolder(owner);
229

    
230
		// dates
231
		String createdDateString = docInfo.get("date_created");
232
		String updatedDateString = docInfo.get("date_updated");
233
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
234
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
235
		sysMeta.setDateUploaded(createdDate);
236
		//sysMeta.setDateSysMetadataModified(updatedDate);
237
		// use current datetime 
238
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
239
		
240
		// set the revision history
241
		String docidWithoutRev = accNum.getDocid();
242
		Identifier obsoletedBy = null;
243
		Identifier obsoletes = null;
244
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
245
		// ensure this ordering since processing depends on it
246
		Collections.sort(revisions);
247
		for (int existingRev: revisions) {
248
			// use the docid+rev as the guid
249
			String existingPid = docidWithoutRev + "." + existingRev;
250
			try {
251
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
252
			} catch (McdbDocNotFoundException mdfe) {
253
				// we'll be defaulting to the local id
254
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
255
			}
256
			if (existingRev < rev) {
257
				// it's the old docid, until it's not
258
				obsoletes = new Identifier();
259
				obsoletes.setValue(existingPid);
260
			}
261
			if (existingRev > rev) {
262
				// it's the newer docid
263
				obsoletedBy = new Identifier();
264
				obsoletedBy.setValue(existingPid);
265
				// only want the version just after it
266
				break;
267
			}
268
		}
269
		// set them on our object
270
		sysMeta.setObsoletedBy(obsoletedBy);
271
		sysMeta.setObsoletes(obsoletes);
272
		
273
		// update the system metadata for the object[s] we are revising
274
		if (obsoletedBy != null) {
275
			SystemMetadata obsoletedBySysMeta = null;
276
			try {
277
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
278
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
279
			} catch (McdbDocNotFoundException e) {
280
				// ignore
281
			}
282
			if (obsoletedBySysMeta != null) {
283
				obsoletedBySysMeta.setObsoletes(identifier);
284
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
285
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
286
			}
287
		}
288
		if (obsoletes != null) {
289
			SystemMetadata obsoletesSysMeta = null;
290
			try {
291
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
292
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
293
			} catch (McdbDocNotFoundException e) {
294
				// ignore
295
			}
296
			if (obsoletesSysMeta != null) {
297
				obsoletesSysMeta.setObsoletedBy(identifier);
298
				obsoletesSysMeta.setArchived(true);
299
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
300
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
301
			}
302
		}
303
		
304
		// look up the access control policy we have in metacat
305
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
306
		try {
307
        List<AccessRule> allowList = accessPolicy.getAllowList();
308
        int listSize = allowList.size();
309
        sysMeta.setAccessPolicy(accessPolicy);
310
        
311
    } catch (NullPointerException npe) {
312
        logMetacat.info("The allow list is empty, can't include an empty " +
313
            "access policy in the system metadata for " + guid);
314
        
315
    }
316
		
317
		// authoritative node
318
		NodeReference nr = new NodeReference();
319
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
320
		sysMeta.setOriginMemberNode(nr);
321
		sysMeta.setAuthoritativeMemberNode(nr);
322
		
323
		// Set a default replication policy
324
        ReplicationPolicy rp = getDefaultReplicationPolicy();
325
        if (rp != null) {
326
            sysMeta.setReplicationPolicy(rp);
327
        }
328
		
329
		// further parse EML documents to get data object format,
330
		// describes and describedBy information
331
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
332
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
333
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
334
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
335
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
336
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
337
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
338
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
339

    
340
			try {
341
				
342
				// get it again to parse the document
343
				logMetacat.debug("Re-reading document inputStream");
344
				inputStream = MetacatHandler.read(localId);
345
				
346
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
347
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
348
				
349
				// iterate through the data objects in the EML doc and add sysmeta
350
				logMetacat.debug("In createSystemMetadata() the number of data "
351
								+ "entities is: "
352
								+ emlDocument.distributionMetadata);
353

    
354
				// for generating the ORE map
355
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
356
	            List<Identifier> dataIds = new ArrayList<Identifier>();
357
				
358
				// iterate through data objects described by the EML
359
	            if (emlDocument.distributionMetadata != null) {
360
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
361
	
362
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
363
				        String dataDocUrl = distMetadata.url;
364
				        String dataDocMimeType = distMetadata.mimeType;
365
						// default to binary
366
						if (dataDocMimeType == null) {
367
							dataDocMimeType = "application/octet-stream";
368
						}
369

    
370
						// process the data
371
						boolean remoteData = false;
372
						String dataDocLocalId = null;
373
						Identifier dataGuid = new Identifier();
374

    
375
						// handle ecogrid, or downloadable data
376
						String ecogridPrefix = "ecogrid://knb/";
377
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
378
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
379
						} else {
380
							// should we try downloading the remote data?
381
							if (downloadData) {
382
								InputStream dataObject = null;
383
								try {
384
									// download the data from the URL
385
									URL dataURL = new URL(dataDocUrl);
386
									URLConnection dataConnection = dataURL.openConnection();
387
									
388
									// default is to download the data
389
									dataObject = dataConnection.getInputStream();
390

    
391
									String detectedContentType = dataConnection.getContentType();
392
									logMetacat.info("Detected content type: " + detectedContentType);
393

    
394
									if (detectedContentType != null) {
395
										// seems to be HTML from the remote location
396
										if (detectedContentType.contains("html")) {
397
											// if we are not expecting it, we skip it
398
											if (!dataDocMimeType.contains("html")) {
399
												// set to null so we don't download it
400
												dataObject = null;
401
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
402
											}
403
										}
404
										
405
									} else {
406
										// if we don't know what it is, should we skip it?
407
										dataObject = null;
408
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
409
									}
410
									
411
								} catch (Exception e) {
412
									// error with the download
413
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
414
								}
415
								
416
								if (dataObject != null) {
417
									// create the local version of it
418
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
419
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
420
									dataGuid.setValue(dataDocLocalId);
421
									
422
									// save it locally
423
									Session session = new Session();
424
									session.setSubject(submitter);
425
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
426
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
427
									
428
									remoteData = true;
429
								}
430
							}
431
							
432
						}
433
						
434
						logMetacat.debug("Data local ID: " + dataDocLocalId);
435
						logMetacat.debug("Data URL     : " + dataDocUrl);
436
						logMetacat.debug("Data mime    : " + dataDocMimeType);
437
						
438
						// check for valid docid.rev
439
						String dataDocid = null;
440
						int dataRev = 0;
441
						if (dataDocLocalId != null) {
442
							// look up the guid for the data
443
							try {
444
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
445
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
446
							} catch (Exception e) {
447
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
448
								dataDocLocalId = null;
449
							}
450
						}
451
						
452
						// now we have a local id for the data
453
						if (dataDocLocalId != null) {
454
	
455
							// check if data system metadata exists already
456
							SystemMetadata dataSysMeta = null;
457
							String dataGuidString = null;
458
							try {
459
								// look for the identifier
460
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
461
								// set it
462
								dataGuid.setValue(dataGuidString);
463
								// look up the system metadata
464
								try {
465
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
466
								} catch (Exception e) {
467
									// probably not in the system
468
									dataSysMeta = null;
469
								}
470
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
471
							} catch (McdbDocNotFoundException nf) {
472
								// we didn't find it
473
								dataSysMeta = null;
474
							}
475
								
476
							// we'll have to generate it	
477
							if (dataSysMeta == null) {
478
								// System metadata for data doesn't exist yet, so create it
479
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
480
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
481

    
482
								// now look it up again
483
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
484

    
485
								// set the guid
486
								dataGuid.setValue(dataGuidString);
487
								
488
								// inherit access rules from metadata, if we don't have our own
489
								if (remoteData) {
490
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
491
									// TODO: use access rules defined in EML, per data file
492
								}
493
	
494
							}
495
							
496
							// set object format for the data file
497
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
498
							ObjectFormatIdentifier fmt = null;
499
							try {
500
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
501
							} catch (NotFound nfe) {
502
								logMetacat.debug("Couldn't find format identifier for: "
503
												+ dataDocMimeType
504
												+ ". Setting it to application/octet-stream.");
505
								fmt = new ObjectFormatIdentifier();
506
								fmt.setValue("application/octet-stream");
507
							}
508
							dataSysMeta.setFormatId(fmt);
509

    
510
							// update the values
511
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
512
							
513
							// include as part of the ORE package
514
							dataIds.add(dataGuid);
515
	
516
						} // end if (EML package)
517
	
518
					} // end for (data entities)
519
					
520
	            } // data entities not null
521
	            
522
				// ORE map
523
				if (includeORE) {
524
					// can we generate them?
525
			        if (!dataIds.isEmpty()) {
526
			        	// it doesn't exist in the system?
527
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
528
			        	
529
				            // generate the ORE map for this datapackage
530
				            Identifier resourceMapId = new Identifier();
531
				            // use the local id, not the guid in case we have DOIs for them already
532
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
533
				            idMap.put(sysMeta.getIdentifier(), dataIds);
534
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
535
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
536
				            // copy most of the same system metadata as the packaging metadata
537
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
538
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
539
				            resourceMapSysMeta.setIdentifier(resourceMapId);
540
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
541
							resourceMapSysMeta.setChecksum(oreChecksum);
542
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
543
							resourceMapSysMeta.setFormatId(formatId);
544
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
545
							
546
							// set the revision graph
547
							resourceMapSysMeta.setObsoletes(null);
548
							resourceMapSysMeta.setObsoletedBy(null);
549
							// look up the resource map that this one obsoletes
550
							if (sysMeta.getObsoletes() != null) {
551
								// use the localId in case we have a DOI
552
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
553
								Identifier resourceMapObsoletes = new Identifier();
554
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
555
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
556
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
557
								if (resourceMapObsoletesSystemMetadata != null) {
558
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
559
									resourceMapObsoletesSystemMetadata.setArchived(true);
560
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
561
								}
562
							}
563
							// look up the resource map that this one is obsoletedBy
564
							if (sysMeta.getObsoletedBy() != null) {
565
								// use the localId in case we have a DOI
566
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
567
								Identifier resourceMapObsoletedBy = new Identifier();
568
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
569
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
570
								resourceMapSysMeta.setArchived(true);
571
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
572
								if (resourceMapObsoletedBySystemMetadata != null) {
573
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
574
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
575
								}
576
							}
577
				            
578
							// save it locally, if it doesn't already exist
579
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
580
								Session session = new Session();
581
								session.setSubject(submitter);
582
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
583
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
584
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
585
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
586
							}
587
			        	}
588
			        }
589
				}
590

    
591
			} catch (ParserConfigurationException pce) {
592
				logMetacat.debug("There was a problem parsing the EML document. "
593
								+ "The error message was: " + pce.getMessage());
594

    
595
			} catch (SAXException saxe) {
596
				logMetacat.debug("There was a problem traversing the EML document. "
597
								+ "The error message was: " + saxe.getMessage());
598

    
599
			} catch (XPathExpressionException xpee) {
600
				logMetacat.debug("There was a problem searching the EML document. "
601
								+ "The error message was: " + xpee.getMessage());
602
			} catch (Exception e) {
603
				logMetacat.debug("There was a problem creating System Metadata. "
604
								+ "The error message was: " + e.getMessage());
605
				e.printStackTrace();
606
			} // end try()
607

    
608
		} // end if()
609

    
610
		return sysMeta;
611
	}
612

    
613
    /**
614
     * Generate SystemMetadata for any object in the object store that does
615
     * not already have it.  SystemMetadata documents themselves, are, of course,
616
     * exempt.  This is a utility method for migration of existing object 
617
     * stores to DataONE where SystemMetadata is required for all objects.
618
     * @param idList
619
     * @param includeOre
620
     * @param downloadData
621
     * @throws PropertyNotFoundException
622
     * @throws NoSuchAlgorithmException
623
     * @throws AccessionNumberException
624
     * @throws SQLException
625
	 * @throws SAXException 
626
	 * @throws HandlerException 
627
	 * @throws JiBXException 
628
	 * @throws BaseException 
629
	 * @throws ParseLSIDException 
630
	 * @throws InsufficientKarmaException 
631
	 * @throws ClassNotFoundException 
632
	 * @throws IOException 
633
	 * @throws McdbException 
634
	 * @throws AccessException 
635
	 * @throws AccessControlException 
636
     */
637
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
638
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, JiBXException, HandlerException, SAXException 
639
    {
640
        
641
        for (String localId : idList) { 
642
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
643
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
644

    
645
            SystemMetadata sm = null;
646

    
647
            //generate required system metadata fields from the document
648
            try {
649
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
650
            } catch (Exception e) {
651
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
652
				continue;
653
			}
654
            
655
            //insert the systemmetadata object or just update it as needed
656
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
657
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
658
            
659
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
660

    
661
        }
662
        logMetacat.info("done generating system metadata for given list");
663
    }
664
    
665
	/**
666
	 * Determines if we already have registered an ORE map for this package
667
	 * NOTE: uses a solr query to locate OREs for the object
668
	 * @param guid of the EML/packaging object
669
	 * @return true if there is an ORE map for the given package
670
	 */
671
	public static boolean oreExistsFor(Identifier guid) {
672
		// Search for the ORE if we can find it
673
		String pid = guid.getValue();
674
		MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
675
		String query = "fl=id,resourceMap&wt=xml&q=formatType:METADATA+-obsoletedBy:*+resourceMap:*+id:\"" + pid + "\"";
676
		try {
677
			InputStream results = MNodeService.getInstance(request).query("solr", query);
678
			Node rootNode = XMLUtilities.getXMLReaderAsDOMTreeRootNode(new InputStreamReader(results, "UTF-8"));
679
			//String resultString = XMLUtilities.getDOMTreeAsString(rootNode);
680
			NodeList nodeList = XMLUtilities.getNodeListWithXPath(rootNode, "//arr[@name=\"resourceMap\"]/str");
681
			if (nodeList != null && nodeList.getLength() > 0) {
682
				//String found = nodeList.item(0).getFirstChild().getNodeValue();
683
				return true;
684
			}
685
		} catch (Exception e) {
686
			logMetacat.error("Error checking for resourceMap[s] on pid " + pid + ". " + e.getMessage(), e);
687
		}
688
		
689
		return false;
690
	}
691

    
692
	/**
693
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
694
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
695
	 * evaluated.
696
	 * 
697
	 * @param is The InputStream of bytes
698
	 * 
699
	 * @return size The size in bytes of the input stream as a long
700
	 * 
701
	 * @throws IOException
702
	 */
703
	public static long sizeOfStream(InputStream is) throws IOException {
704

    
705
		long size = 0;
706
		byte[] b = new byte[1024];
707
		int numread = is.read(b, 0, 1024);
708
		while (numread != -1) {
709
			size += numread;
710
			numread = is.read(b, 0, 1024);
711
		}
712
		return size;
713

    
714
	}
715
	
716
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
717
		
718
		DocumentImpl doc = new DocumentImpl(docid, false);
719
		String filepath = null;
720
		String filename = null;
721

    
722
		// deal with data or metadata cases
723
		if (doc.getRootNodeID() == 0) {
724
			// this is a data file
725
			filepath = PropertyService.getProperty("application.datafilepath");
726
		} else {
727
			filepath = PropertyService.getProperty("application.documentfilepath");
728
		}
729
		// ensure it is a directory path
730
		if (!(filepath.endsWith("/"))) {
731
			filepath += "/";
732
		}
733
		filename = filepath + docid;
734
		File documentFile = new File(filename);
735
		
736
		return documentFile;
737
	}
738

    
739
	/**
740
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
741
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
742
	 * than or equal to zero, no policy needs to be set, so return null.
743
	 * @return ReplicationPolicy, or null if no replication policy is needed
744
	 */
745
    private static ReplicationPolicy getDefaultReplicationPolicy() {
746
        ReplicationPolicy rp = null;
747
        int numReplicas = -1;
748
        try {
749
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
750
        } catch (NumberFormatException e) {
751
            // The property is not a valid integer, so return a null policy
752
            return null;
753
        } catch (PropertyNotFoundException e) {
754
            // The property is not found, so return a null policy
755
            return null;
756
        }
757
        
758
        if (numReplicas > 0) {
759
            rp = new ReplicationPolicy();
760
            rp.setReplicationAllowed(true);
761
            rp.setNumberReplicas(numReplicas);
762
            try {
763
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
764
                if (preferredNodeList != null) {
765
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
766
                    if (pNodes != null && !pNodes.isEmpty()) {
767
                        rp.setPreferredMemberNodeList(pNodes);
768
                    }
769
                }
770
            } catch (PropertyNotFoundException e) {
771
                // No preferred list found in properties, so just ignore it; no action needed
772
            }
773
            try {
774
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
775
                if (blockedNodeList != null) {
776
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
777
                    if (bNodes != null && !bNodes.isEmpty()) {
778
                        rp.setBlockedMemberNodeList(bNodes);
779
                    }
780
                }
781
            } catch (PropertyNotFoundException e) {
782
                // No blocked list found in properties, so just ignore it; no action needed
783
            }
784
        }
785
        return rp;
786
    }
787

    
788
    /**
789
     * Extract a List of NodeReferences from a String listing the node identifiers where
790
     * each identifier is separated by whitespace, comma, or semicolon characters.
791
     * @param nodeString the string containing the list of nodes
792
     * @return the List of NodeReference objects parsed from the input string
793
     */
794
    private static List<NodeReference> extractNodeReferences(String nodeString) {
795
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
796
        String[] result = nodeString.split("[,;\\s]");
797
        for (String r : result) {
798
        	if (r != null && r.length() > 0) {
799
	            NodeReference noderef = new NodeReference();
800
	            noderef.setValue(r);
801
	            nodeList.add(noderef);
802
	        }
803
        }
804
        return nodeList;
805
    }
806
}
(6-6/6)