Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: jones $'
9
 *     '$Date: 2016-11-22 14:55:47 -0800 (Tue, 22 Nov 2016) $'
10
 * '$Revision: 10047 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52
import org.dataone.client.v2.formats.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.exceptions.MarshallingException;
57
import org.dataone.ore.ResourceMapFactory;
58
import org.dataone.service.exceptions.BaseException;
59
import org.dataone.service.exceptions.NotFound;
60
import org.dataone.service.types.v1.AccessPolicy;
61
import org.dataone.service.types.v1.AccessRule;
62
import org.dataone.service.types.v1.Checksum;
63
import org.dataone.service.types.v1.Identifier;
64
import org.dataone.service.types.v1.NodeReference;
65
import org.dataone.service.types.v1.ObjectFormatIdentifier;
66
import org.dataone.service.types.v1.ReplicationPolicy;
67
import org.dataone.service.types.v1.Session;
68
import org.dataone.service.types.v1.Subject;
69
import org.dataone.service.types.v2.SystemMetadata;
70
import org.dataone.service.types.v1.util.ChecksumUtil;
71
import org.dataone.service.util.DateTimeMarshaller;
72
import org.dspace.foresite.ResourceMap;
73
import org.xml.sax.SAXException;
74

    
75
import java.util.Calendar;
76

    
77
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79
import edu.ucsb.nceas.metacat.DBUtil;
80
import edu.ucsb.nceas.metacat.DocumentImpl;
81
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84
import edu.ucsb.nceas.metacat.MetaCatServlet;
85
import edu.ucsb.nceas.metacat.MetacatHandler;
86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
90
import edu.ucsb.nceas.metacat.properties.PropertyService;
91
import edu.ucsb.nceas.metacat.replication.ReplicationService;
92
import edu.ucsb.nceas.metacat.shared.AccessException;
93
import edu.ucsb.nceas.metacat.shared.HandlerException;
94
import edu.ucsb.nceas.metacat.util.DocumentUtil;
95
import edu.ucsb.nceas.utilities.ParseLSIDException;
96
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
97

    
98
public class SystemMetadataFactory {
99

    
100
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
101
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
102
	/**
103
	 * use this flag if you want to update any existing system metadata values with generated content
104
	 */
105
	private static boolean updateExisting = true;
106
	
107
	
108
	
109
	/**
110
	 * Create a system metadata object for insertion into metacat
111
	 * @param localId
112
	 * @param includeORE
113
	 * @param downloadData
114
	 * @return
115
	 * @throws McdbException
116
	 * @throws McdbDocNotFoundException
117
	 * @throws SQLException
118
	 * @throws IOException
119
	 * @throws AccessionNumberException
120
	 * @throws ClassNotFoundException
121
	 * @throws InsufficientKarmaException
122
	 * @throws ParseLSIDException
123
	 * @throws PropertyNotFoundException
124
	 * @throws BaseException
125
	 * @throws NoSuchAlgorithmException
126
	 * @throws MarshallingException
127
	 * @throws AccessControlException
128
	 * @throws HandlerException
129
	 * @throws SAXException
130
	 * @throws AccessException
131
	 */
132
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
133
            throws McdbException, McdbDocNotFoundException, SQLException,
134
            IOException, AccessionNumberException, ClassNotFoundException,
135
            InsufficientKarmaException, ParseLSIDException,
136
            PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
137
            MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
138
	        boolean indexDataFile = false;
139
	        return createSystemMetadata(indexDataFile, localId, includeORE, downloadData);
140
	}
141
	/**
142
	 * Creates a system metadata object for insertion into metacat
143
	 * @param indexDataFile
144
	 *            Indicate if we need to index data file.
145
	 * 
146
	 * @param localId
147
	 *            The local document identifier
148
	 * @param user
149
	 *            The user submitting the system metadata document
150
	 * @param groups
151
	 *            The groups the user belongs to
152
	 * 
153
	 * @return sysMeta The system metadata object created
154
	 * @throws SAXException 
155
	 * @throws HandlerException 
156
	 * @throws AccessControlException 
157
	 * @throws AccessException 
158
	 */
159
	public static SystemMetadata createSystemMetadata(boolean indexDataFile, String localId, boolean includeORE, boolean downloadData)
160
			throws McdbException, McdbDocNotFoundException, SQLException,
161
			IOException, AccessionNumberException, ClassNotFoundException,
162
			InsufficientKarmaException, ParseLSIDException,
163
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
164
			MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
165
		
166
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
167

    
168
		// check for system metadata
169
		SystemMetadata sysMeta = null;
170
		
171
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
172
		int rev = Integer.valueOf(accNum.getRev());
173
		
174
		// get/make the guid
175
		String guid = null;
176
		try {
177
			// get the guid if it exists
178
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
179
		} catch (McdbDocNotFoundException dnfe) {
180
			// otherwise create the mapping
181
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
182
			IdentifierManager.getInstance().createMapping(localId, localId);
183
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
184
		}
185
		
186
		// look up existing system metadata if it exists
187
		Identifier identifier = new Identifier();
188
		identifier.setValue(guid);
189
		try {
190
			logMetacat.debug("Using hazelcast to get system metadata");
191
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
192
			// TODO: if this is the case, we could return here -- what else do we gain?
193
			if (!updateExisting ) {
194
				return sysMeta;
195
			}
196
		} catch (Exception e) {
197
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
198

    
199
		}
200

    
201
		if (sysMeta == null) {
202
			// create system metadata
203
			sysMeta = new SystemMetadata();
204
			sysMeta.setIdentifier(identifier);
205
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
206
			sysMeta.setArchived(false);
207
		}
208
		
209
		// get additional docinfo
210
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
211
		// set the default object format
212
		String doctype = docInfo.get("doctype");
213
		ObjectFormatIdentifier fmtid = null;
214

    
215
		// set the object format, fall back to defaults
216
		if (doctype.trim().equals("BIN")) {
217
			// we don't know much about this file (yet)
218
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
219
		} else if (doctype.trim().equals("metadata")) {
220
			// special ESRI FGDC format
221
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
222
		} else {
223
			try {
224
				// do we know the given format?
225
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
226
			} catch (NotFound nfe) {
227
				// format is not registered, use default
228
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
229
			}
230
		}
231

    
232
		sysMeta.setFormatId(fmtid);
233
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
234

    
235
		// for retrieving the actual object
236
		InputStream inputStream = null;
237
		inputStream = MetacatHandler.read(localId);
238

    
239
		// create the checksum
240
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
241
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
242
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
243
		sysMeta.setChecksum(checksum);
244
		
245
		// set the size from file on disk, don't read bytes again
246
		File fileOnDisk = getFileOnDisk(localId);
247
		long fileSize = 0;
248
		if (fileOnDisk.exists()) {
249
			fileSize = fileOnDisk.length();
250
		}
251
		sysMeta.setSize(BigInteger.valueOf(fileSize));
252
		
253
		// submitter
254
		Subject submitter = new Subject();
255
		submitter.setValue(docInfo.get("user_updated"));
256
		sysMeta.setSubmitter(submitter);
257
		
258
		// rights holder
259
		Subject owner = new Subject();
260
		owner.setValue(docInfo.get("user_owner"));
261
		sysMeta.setRightsHolder(owner);
262

    
263
		// dates
264
		String createdDateString = docInfo.get("date_created");
265
		String updatedDateString = docInfo.get("date_updated");
266
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
267
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
268
		sysMeta.setDateUploaded(createdDate);
269
		//sysMeta.setDateSysMetadataModified(updatedDate);
270
		// use current datetime 
271
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
272
		
273
		// set the revision history
274
		String docidWithoutRev = accNum.getDocid();
275
		Identifier obsoletedBy = null;
276
		Identifier obsoletes = null;
277
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
278
		// ensure this ordering since processing depends on it
279
		Collections.sort(revisions);
280
		for (int existingRev: revisions) {
281
			// use the docid+rev as the guid
282
			String existingPid = docidWithoutRev + "." + existingRev;
283
			try {
284
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
285
			} catch (McdbDocNotFoundException mdfe) {
286
				// we'll be defaulting to the local id
287
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
288
			}
289
			if (existingRev < rev) {
290
				// it's the old docid, until it's not
291
				obsoletes = new Identifier();
292
				obsoletes.setValue(existingPid);
293
			}
294
			if (existingRev > rev) {
295
				// it's the newer docid
296
				obsoletedBy = new Identifier();
297
				obsoletedBy.setValue(existingPid);
298
				// only want the version just after it
299
				break;
300
			}
301
		}
302
		// set them on our object
303
		sysMeta.setObsoletedBy(obsoletedBy);
304
		sysMeta.setObsoletes(obsoletes);
305
		
306
		// update the system metadata for the object[s] we are revising
307
		if (obsoletedBy != null) {
308
			SystemMetadata obsoletedBySysMeta = null;
309
			try {
310
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
311
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
312
			} catch (McdbDocNotFoundException e) {
313
				// ignore
314
			}
315
			if (obsoletedBySysMeta != null) {
316
				obsoletedBySysMeta.setObsoletes(identifier);
317
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
318
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
319
			}
320
		}
321
		if (obsoletes != null) {
322
			SystemMetadata obsoletesSysMeta = null;
323
			try {
324
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
325
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
326
			} catch (McdbDocNotFoundException e) {
327
				// ignore
328
			}
329
			if (obsoletesSysMeta != null) {
330
				obsoletesSysMeta.setObsoletedBy(identifier);
331
				// DO NOT set archived to true -- it will have unintended consequences if the CN sees this.
332
				//obsoletesSysMeta.setArchived(true);
333
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
334
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
335
			}
336
		}
337
		
338
		// look up the access control policy we have in metacat
339
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
340
		try {
341
        List<AccessRule> allowList = accessPolicy.getAllowList();
342
        int listSize = allowList.size();
343
        sysMeta.setAccessPolicy(accessPolicy);
344
        
345
    } catch (NullPointerException npe) {
346
        logMetacat.info("The allow list is empty, can't include an empty " +
347
            "access policy in the system metadata for " + guid);
348
        
349
    }
350
		
351
		// authoritative node
352
		NodeReference nr = new NodeReference();
353
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
354
		sysMeta.setOriginMemberNode(nr);
355
		sysMeta.setAuthoritativeMemberNode(nr);
356
		
357
		// Set a default replication policy
358
        ReplicationPolicy rp = getDefaultReplicationPolicy();
359
        if (rp != null) {
360
            sysMeta.setReplicationPolicy(rp);
361
        }
362
		
363
		// further parse EML documents to get data object format,
364
		// describes and describedBy information
365
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
366
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
367
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
368
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
369
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
370
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
371
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
372
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
373

    
374
			try {
375
				
376
				// get it again to parse the document
377
				logMetacat.debug("Re-reading document inputStream");
378
				inputStream = MetacatHandler.read(localId);
379
				
380
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
381
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
382
				
383
				// iterate through the data objects in the EML doc and add sysmeta
384
				logMetacat.debug("In createSystemMetadata() the number of data "
385
								+ "entities is: "
386
								+ emlDocument.distributionMetadata);
387

    
388
				// for generating the ORE map
389
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
390
	            List<Identifier> dataIds = new ArrayList<Identifier>();
391
				
392
				// iterate through data objects described by the EML
393
	            if (emlDocument.distributionMetadata != null) {
394
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
395
	
396
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
397
				        String dataDocUrl = distMetadata.url;
398
				        String dataDocMimeType = distMetadata.mimeType;
399
						// default to binary
400
						if (dataDocMimeType == null) {
401
							dataDocMimeType = "application/octet-stream";
402
						}
403

    
404
						// process the data
405
						boolean remoteData = false;
406
						String dataDocLocalId = null;
407
						Identifier dataGuid = new Identifier();
408

    
409
						// handle ecogrid, or downloadable data
410
						String ecogridPrefix = "ecogrid://knb/";
411
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
412
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
413
						} else {
414
							// should we try downloading the remote data?
415
							if (downloadData) {
416
								InputStream dataObject = null;
417
								try {
418
									// download the data from the URL
419
									URL dataURL = new URL(dataDocUrl);
420
									URLConnection dataConnection = dataURL.openConnection();
421
									
422
									// default is to download the data
423
									dataObject = dataConnection.getInputStream();
424

    
425
									String detectedContentType = dataConnection.getContentType();
426
									logMetacat.info("Detected content type: " + detectedContentType);
427

    
428
									if (detectedContentType != null) {
429
										// seems to be HTML from the remote location
430
										if (detectedContentType.contains("html")) {
431
											// if we are not expecting it, we skip it
432
											if (!dataDocMimeType.contains("html")) {
433
												// set to null so we don't download it
434
												dataObject = null;
435
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
436
											}
437
										}
438
										
439
									} else {
440
										// if we don't know what it is, should we skip it?
441
										dataObject = null;
442
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
443
									}
444
									
445
								} catch (Exception e) {
446
									// error with the download
447
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
448
								}
449
								
450
								if (dataObject != null) {
451
									// create the local version of it
452
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
453
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
454
									dataGuid.setValue(dataDocLocalId);
455
									
456
									// save it locally
457
									Session session = new Session();
458
									session.setSubject(submitter);
459
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
460
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session);
461
									
462
									remoteData = true;
463
								}
464
							}
465
							
466
						}
467
						
468
						logMetacat.debug("Data local ID: " + dataDocLocalId);
469
						logMetacat.debug("Data URL     : " + dataDocUrl);
470
						logMetacat.debug("Data mime    : " + dataDocMimeType);
471
						
472
						// check for valid docid.rev
473
						String dataDocid = null;
474
						int dataRev = 0;
475
						if (dataDocLocalId != null) {
476
							// look up the guid for the data
477
							try {
478
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
479
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
480
							} catch (Exception e) {
481
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
482
								dataDocLocalId = null;
483
							}
484
						}
485
						
486
						// now we have a local id for the data
487
						if (dataDocLocalId != null) {
488
	
489
							// check if data system metadata exists already
490
							SystemMetadata dataSysMeta = null;
491
							String dataGuidString = null;
492
							try {
493
								// look for the identifier
494
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
495
								// set it
496
								dataGuid.setValue(dataGuidString);
497
								// look up the system metadata
498
								try {
499
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
500
								} catch (Exception e) {
501
									// probably not in the system
502
									dataSysMeta = null;
503
								}
504
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
505
							} catch (McdbDocNotFoundException nf) {
506
								// we didn't find it
507
								dataSysMeta = null;
508
							}
509
								
510
							// we'll have to generate it	
511
							if (dataSysMeta == null) {
512
								// System metadata for data doesn't exist yet, so create it
513
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
514
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
515

    
516
								// now look it up again
517
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
518

    
519
								// set the guid
520
								dataGuid.setValue(dataGuidString);
521
								
522
								// inherit access rules from metadata, if we don't have our own
523
								if (remoteData) {
524
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
525
									// TODO: use access rules defined in EML, per data file
526
								}
527
	
528
							}
529
							
530
							// set object format for the data file
531
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
532
							ObjectFormatIdentifier fmt = null;
533
							try {
534
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
535
							} catch (NotFound nfe) {
536
								logMetacat.debug("Couldn't find format identifier for: "
537
												+ dataDocMimeType
538
												+ ". Setting it to application/octet-stream.");
539
								fmt = new ObjectFormatIdentifier();
540
								fmt.setValue("application/octet-stream");
541
							}
542
							dataSysMeta.setFormatId(fmt);
543

    
544
							// update the values
545
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
546
							
547
							// reindex data file if need it.
548
							logMetacat.debug("do we need to reindex guid "+dataGuid.getValue()+"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~?"+indexDataFile);
549
							if(indexDataFile) {
550
							    reindexDataFile(dataSysMeta.getIdentifier(), dataSysMeta);
551
							}
552

    
553
							// include as part of the ORE package
554
							dataIds.add(dataGuid);
555
	
556
						} // end if (EML package)
557
	
558
					} // end for (data entities)
559
					
560
	            } // data entities not null
561
	            
562
				// ORE map
563
				if (includeORE) {
564
					// can we generate them?
565
			        if (!dataIds.isEmpty()) {
566
			        	// it doesn't exist in the system?
567
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
568
			        	
569
				            // generate the ORE map for this datapackage
570
				            Identifier resourceMapId = new Identifier();
571
				            // use the local id, not the guid in case we have DOIs for them already
572
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
573
				            idMap.put(sysMeta.getIdentifier(), dataIds);
574
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
575
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
576
				            // copy most of the same system metadata as the packaging metadata
577
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
578
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
579
				            resourceMapSysMeta.setIdentifier(resourceMapId);
580
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
581
							resourceMapSysMeta.setChecksum(oreChecksum);
582
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
583
							resourceMapSysMeta.setFormatId(formatId);
584
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
585
							
586
							// set the revision graph
587
							resourceMapSysMeta.setObsoletes(null);
588
							resourceMapSysMeta.setObsoletedBy(null);
589
							// look up the resource map that this one obsoletes
590
							if (sysMeta.getObsoletes() != null) {
591
								// use the localId in case we have a DOI
592
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
593
								Identifier resourceMapObsoletes = new Identifier();
594
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
595
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
596
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
597
								if (resourceMapObsoletesSystemMetadata != null) {
598
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
599
									resourceMapObsoletesSystemMetadata.setArchived(true);
600
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
601
								}
602
							}
603
							// look up the resource map that this one is obsoletedBy
604
							if (sysMeta.getObsoletedBy() != null) {
605
								// use the localId in case we have a DOI
606
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
607
								Identifier resourceMapObsoletedBy = new Identifier();
608
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
609
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
610
								resourceMapSysMeta.setArchived(true);
611
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
612
								if (resourceMapObsoletedBySystemMetadata != null) {
613
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
614
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
615
								}
616
							}
617
				            
618
							// save it locally, if it doesn't already exist
619
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
620
								Session session = new Session();
621
								session.setSubject(submitter);
622
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
623
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session);
624
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
625
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
626
							}
627
			        	}
628
			        }
629
				}
630

    
631
			} catch (ParserConfigurationException pce) {
632
				logMetacat.debug("There was a problem parsing the EML document. "
633
								+ "The error message was: " + pce.getMessage());
634

    
635
			} catch (SAXException saxe) {
636
				logMetacat.debug("There was a problem traversing the EML document. "
637
								+ "The error message was: " + saxe.getMessage());
638

    
639
			} catch (XPathExpressionException xpee) {
640
				logMetacat.debug("There was a problem searching the EML document. "
641
								+ "The error message was: " + xpee.getMessage());
642
			} catch (Exception e) {
643
				logMetacat.debug("There was a problem creating System Metadata. "
644
								+ "The error message was: " + e.getMessage());
645
				e.printStackTrace();
646
			} // end try()
647

    
648
		} // end if()
649

    
650
		return sysMeta;
651
	}
652
	
653
	/*
654
	 * Re-index the data file since the access rule was changed during the inserting of the eml document.
655
	 * (During first time to index the data file in Metacat API, the eml hasn't been inserted)
656
	 */
657
	private static void reindexDataFile(Identifier id, SystemMetadata sysmeta) {
658
	    try {
659
	        logMetacat.debug("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ reindex"+id.getValue());
660
	        if(sysmeta != null) {
661
	            if(!sysmeta.getArchived()) {
662
	                //set the archive to true to remove index.
663
	                sysmeta.setArchived(true);
664
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
665
	                //re-insert the index
666
	                sysmeta.setArchived(false);
667
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
668
	            } else {
669
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
670
	            }
671
	        }
672
	       
673
        } catch (Exception e) {
674
            // TODO Auto-generated catch block
675
            logMetacat.warn("Can't reindex the data object "+id.getValue()+" since "+e.getMessage());
676
            //e.printStackTrace();
677
        }
678
	}
679

    
680
	/**
681
	 * Checks for potential ORE object existence 
682
	 * @param identifier
683
	 * @return
684
	 */
685
    public static boolean oreExistsFor(Identifier identifier) {
686
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
687
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier, true);
688
		return (ids != null && ids.size() > 0);
689
	}
690

    
691
	/**
692
     * Generate SystemMetadata for any object in the object store that does
693
     * not already have it.  SystemMetadata documents themselves, are, of course,
694
     * exempt.  This is a utility method for migration of existing object 
695
     * stores to DataONE where SystemMetadata is required for all objects.
696
     * @param idList
697
     * @param includeOre
698
     * @param downloadData
699
     * @throws PropertyNotFoundException
700
     * @throws NoSuchAlgorithmException
701
     * @throws AccessionNumberException
702
     * @throws SQLException
703
	 * @throws SAXException 
704
	 * @throws HandlerException 
705
	 * @throws MarshallingException 
706
	 * @throws BaseException 
707
	 * @throws ParseLSIDException 
708
	 * @throws InsufficientKarmaException 
709
	 * @throws ClassNotFoundException 
710
	 * @throws IOException 
711
	 * @throws McdbException 
712
	 * @throws AccessException 
713
	 * @throws AccessControlException 
714
     */
715
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
716
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, MarshallingException, HandlerException, SAXException 
717
    {
718
        
719
        for (String localId : idList) { 
720
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
721
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
722

    
723
            SystemMetadata sm = null;
724

    
725
            //generate required system metadata fields from the document
726
            try {
727
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
728
            } catch (Exception e) {
729
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
730
				continue;
731
			}
732
            
733
            //insert the systemmetadata object or just update it as needed
734
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
735
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
736
            
737
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
738

    
739
        }
740
        logMetacat.info("done generating system metadata for given list");
741
    }
742

    
743
	/**
744
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
745
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
746
	 * evaluated.
747
	 * 
748
	 * @param is The InputStream of bytes
749
	 * 
750
	 * @return size The size in bytes of the input stream as a long
751
	 * 
752
	 * @throws IOException
753
	 */
754
	public static long sizeOfStream(InputStream is) throws IOException {
755

    
756
		long size = 0;
757
		byte[] b = new byte[1024];
758
		int numread = is.read(b, 0, 1024);
759
		while (numread != -1) {
760
			size += numread;
761
			numread = is.read(b, 0, 1024);
762
		}
763
		return size;
764

    
765
	}
766
	
767
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
768
		
769
		DocumentImpl doc = new DocumentImpl(docid, false);
770
		String filepath = null;
771
		String filename = null;
772

    
773
		// deal with data or metadata cases
774
		if (doc.getRootNodeID() == 0) {
775
			// this is a data file
776
			filepath = PropertyService.getProperty("application.datafilepath");
777
		} else {
778
			filepath = PropertyService.getProperty("application.documentfilepath");
779
		}
780
		// ensure it is a directory path
781
		if (!(filepath.endsWith("/"))) {
782
			filepath += "/";
783
		}
784
		filename = filepath + docid;
785
		File documentFile = new File(filename);
786
		
787
		return documentFile;
788
	}
789

    
790
	/**
791
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
792
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
793
	 * than or equal to zero, no policy needs to be set, so return null.
794
	 * @return ReplicationPolicy, or null if no replication policy is needed
795
	 */
796
    protected static ReplicationPolicy getDefaultReplicationPolicy() {
797
        ReplicationPolicy rp = null;
798
        int numReplicas = -1;
799
        try {
800
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
801
        } catch (NumberFormatException e) {
802
            // The property is not a valid integer, so set it to 0
803
            numReplicas = 0;
804
        } catch (PropertyNotFoundException e) {
805
            // The property is not found, so set it to 0
806
            numReplicas = 0;
807
        }
808
        
809
        rp = new ReplicationPolicy();
810
        if (numReplicas > 0) {
811
            rp.setReplicationAllowed(true);
812
            rp.setNumberReplicas(numReplicas);
813
            try {
814
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
815
                if (preferredNodeList != null) {
816
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
817
                    if (pNodes != null && !pNodes.isEmpty()) {
818
                        rp.setPreferredMemberNodeList(pNodes);
819
                    }
820
                }
821
            } catch (PropertyNotFoundException e) {
822
                // No preferred list found in properties, so just ignore it; no action needed
823
            }
824
            try {
825
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
826
                if (blockedNodeList != null) {
827
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
828
                    if (bNodes != null && !bNodes.isEmpty()) {
829
                        rp.setBlockedMemberNodeList(bNodes);
830
                    }
831
                }
832
            } catch (PropertyNotFoundException e) {
833
                // No blocked list found in properties, so just ignore it; no action needed
834
            }
835
        } else {
836
            rp.setReplicationAllowed(false);
837
            rp.setNumberReplicas(0);
838
        }
839
        return rp;
840
    }
841

    
842
    /**
843
     * Extract a List of NodeReferences from a String listing the node identifiers where
844
     * each identifier is separated by whitespace, comma, or semicolon characters.
845
     * @param nodeString the string containing the list of nodes
846
     * @return the List of NodeReference objects parsed from the input string
847
     */
848
    private static List<NodeReference> extractNodeReferences(String nodeString) {
849
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
850
        String[] result = nodeString.split("[,;\\s]");
851
        for (String r : result) {
852
        	if (r != null && r.length() > 0) {
853
	            NodeReference noderef = new NodeReference();
854
	            noderef.setValue(r);
855
	            nodeList.add(noderef);
856
	        }
857
        }
858
        return nodeList;
859
    }
860
}
(8-8/8)