Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class for upgrading the database to version 1.5
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Saurabh Garg
7
 *
8
 *   '$Author: tao $'
9
 *     '$Date: 2017-05-17 15:15:23 -0700 (Wed, 17 May 2017) $'
10
 * '$Revision: 10276 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.dataone;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.math.BigInteger;
32
import java.net.URL;
33
import java.net.URLConnection;
34
import java.security.NoSuchAlgorithmException;
35
import java.sql.SQLException;
36
import java.util.ArrayList;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.HashMap;
40
import java.util.Hashtable;
41
import java.util.List;
42
import java.util.Map;
43
import java.util.Vector;
44

    
45
import javax.xml.parsers.ParserConfigurationException;
46
import javax.xml.xpath.XPathExpressionException;
47

    
48
import org.apache.commons.beanutils.BeanUtils;
49
import org.apache.commons.io.IOUtils;
50
import org.apache.log4j.Logger;
51
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
52
import org.dataone.client.v2.formats.ObjectFormatCache;
53
import org.dataone.eml.DataoneEMLParser;
54
import org.dataone.eml.EMLDocument;
55
import org.dataone.eml.EMLDocument.DistributionMetadata;
56
import org.dataone.exceptions.MarshallingException;
57
import org.dataone.ore.ResourceMapFactory;
58
import org.dataone.service.exceptions.BaseException;
59
import org.dataone.service.exceptions.NotFound;
60
import org.dataone.service.types.v1.AccessPolicy;
61
import org.dataone.service.types.v1.AccessRule;
62
import org.dataone.service.types.v1.Checksum;
63
import org.dataone.service.types.v1.Identifier;
64
import org.dataone.service.types.v1.NodeReference;
65
import org.dataone.service.types.v1.ObjectFormatIdentifier;
66
import org.dataone.service.types.v1.ReplicationPolicy;
67
import org.dataone.service.types.v1.Session;
68
import org.dataone.service.types.v1.Subject;
69
import org.dataone.service.types.v2.SystemMetadata;
70
import org.dataone.service.types.v1.util.ChecksumUtil;
71
import org.dataone.service.util.DateTimeMarshaller;
72
import org.dspace.foresite.ResourceMap;
73
import org.xml.sax.SAXException;
74

    
75
import java.util.Calendar;
76

    
77
import edu.ucsb.nceas.metacat.AccessionNumber;
78
import edu.ucsb.nceas.metacat.AccessionNumberException;
79
import edu.ucsb.nceas.metacat.DBUtil;
80
import edu.ucsb.nceas.metacat.DocumentImpl;
81
import edu.ucsb.nceas.metacat.IdentifierManager;
82
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
83
import edu.ucsb.nceas.metacat.McdbException;
84
import edu.ucsb.nceas.metacat.MetaCatServlet;
85
import edu.ucsb.nceas.metacat.MetacatHandler;
86
import edu.ucsb.nceas.metacat.accesscontrol.AccessControlException;
87
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
88
import edu.ucsb.nceas.metacat.dataone.hazelcast.HazelcastService;
89
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
90
import edu.ucsb.nceas.metacat.properties.PropertyService;
91
import edu.ucsb.nceas.metacat.replication.ReplicationService;
92
import edu.ucsb.nceas.metacat.shared.AccessException;
93
import edu.ucsb.nceas.metacat.shared.HandlerException;
94
import edu.ucsb.nceas.metacat.util.DocumentUtil;
95
import edu.ucsb.nceas.utilities.ParseLSIDException;
96
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
97

    
98
public class SystemMetadataFactory {
99

    
100
	public static final String RESOURCE_MAP_PREFIX = "resourceMap_";
101
	private static Logger logMetacat = Logger.getLogger(SystemMetadataFactory.class);
102
	/**
103
	 * use this flag if you want to update any existing system metadata values with generated content
104
	 */
105
	private static boolean updateExisting = true;
106
	
107
	
108
	
109
	/**
110
	 * Create a system metadata object for insertion into metacat
111
	 * @param localId
112
	 * @param includeORE
113
	 * @param downloadData
114
	 * @return
115
	 * @throws McdbException
116
	 * @throws McdbDocNotFoundException
117
	 * @throws SQLException
118
	 * @throws IOException
119
	 * @throws AccessionNumberException
120
	 * @throws ClassNotFoundException
121
	 * @throws InsufficientKarmaException
122
	 * @throws ParseLSIDException
123
	 * @throws PropertyNotFoundException
124
	 * @throws BaseException
125
	 * @throws NoSuchAlgorithmException
126
	 * @throws MarshallingException
127
	 * @throws AccessControlException
128
	 * @throws HandlerException
129
	 * @throws SAXException
130
	 * @throws AccessException
131
	 */
132
	public static SystemMetadata createSystemMetadata(String localId, boolean includeORE, boolean downloadData)
133
            throws McdbException, McdbDocNotFoundException, SQLException,
134
            IOException, AccessionNumberException, ClassNotFoundException,
135
            InsufficientKarmaException, ParseLSIDException,
136
            PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
137
            MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
138
	        boolean indexDataFile = false;
139
	        return createSystemMetadata(indexDataFile, localId, includeORE, downloadData);
140
	}
141
	/**
142
	 * Creates a system metadata object for insertion into metacat
143
	 * @param indexDataFile
144
	 *            Indicate if we need to index data file.
145
	 * 
146
	 * @param localId
147
	 *            The local document identifier
148
	 * @param user
149
	 *            The user submitting the system metadata document
150
	 * @param groups
151
	 *            The groups the user belongs to
152
	 * 
153
	 * @return sysMeta The system metadata object created
154
	 * @throws SAXException 
155
	 * @throws HandlerException 
156
	 * @throws AccessControlException 
157
	 * @throws AccessException 
158
	 */
159
	public static SystemMetadata createSystemMetadata(boolean indexDataFile, String localId, boolean includeORE, boolean downloadData)
160
			throws McdbException, McdbDocNotFoundException, SQLException,
161
			IOException, AccessionNumberException, ClassNotFoundException,
162
			InsufficientKarmaException, ParseLSIDException,
163
			PropertyNotFoundException, BaseException, NoSuchAlgorithmException,
164
			MarshallingException, AccessControlException, HandlerException, SAXException, AccessException {
165
		
166
		logMetacat.debug("createSystemMetadata() called for localId " + localId);
167

    
168
		// check for system metadata
169
		SystemMetadata sysMeta = null;
170
		
171
		AccessionNumber accNum = new AccessionNumber(localId, "NONE");
172
		int rev = Integer.valueOf(accNum.getRev());
173
		
174
		// get/make the guid
175
		String guid = null;
176
		try {
177
			// get the guid if it exists
178
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);
179
		} catch (McdbDocNotFoundException dnfe) {
180
			// otherwise create the mapping
181
			logMetacat.debug("No guid found in the identifier table.  Creating mapping for " + localId);
182
			IdentifierManager.getInstance().createMapping(localId, localId);
183
			guid = IdentifierManager.getInstance().getGUID(accNum.getDocid(), rev);			
184
		}
185
		
186
		// look up existing system metadata if it exists
187
		Identifier identifier = new Identifier();
188
		identifier.setValue(guid);
189
		try {
190
			logMetacat.debug("Using hazelcast to get system metadata");
191
			sysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(identifier);
192
			// TODO: if this is the case, we could return here -- what else do we gain?
193
			if (!updateExisting ) {
194
				return sysMeta;
195
			}
196
		} catch (Exception e) {
197
			logMetacat.debug("No system metadata found in hz: " + e.getMessage());
198

    
199
		}
200

    
201
		if (sysMeta == null) {
202
			// create system metadata
203
			sysMeta = new SystemMetadata();
204
			sysMeta.setIdentifier(identifier);
205
			sysMeta.setSerialVersion(BigInteger.valueOf(1));
206
			sysMeta.setArchived(false);
207
		}
208
		
209
		// get additional docinfo
210
		Hashtable<String, String> docInfo = ReplicationService.getDocumentInfoMap(localId);
211
		// set the default object format
212
		String doctype = docInfo.get("doctype");
213
		ObjectFormatIdentifier fmtid = null;
214

    
215
		// set the object format, fall back to defaults
216
		if (doctype.trim().equals("BIN")) {
217
			// we don't know much about this file (yet)
218
			fmtid = ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId();
219
		} else if (doctype.trim().equals("metadata")) {
220
			// special ESRI FGDC format
221
			fmtid = ObjectFormatCache.getInstance().getFormat("FGDC-STD-001-1998").getFormatId();
222
		} else {
223
			try {
224
				// do we know the given format?
225
				fmtid = ObjectFormatCache.getInstance().getFormat(doctype).getFormatId();
226
			} catch (NotFound nfe) {
227
				// format is not registered, use default
228
				fmtid = ObjectFormatCache.getInstance().getFormat("text/plain").getFormatId();
229
			}
230
		}
231

    
232
		sysMeta.setFormatId(fmtid);
233
		logMetacat.debug("The ObjectFormat for " + localId + " is " + fmtid.getValue());
234

    
235
		// for retrieving the actual object
236
		InputStream inputStream = null;
237
		inputStream = MetacatHandler.read(localId);
238

    
239
		// create the checksum
240
		String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
241
		Checksum checksum = ChecksumUtil.checksum(inputStream, algorithm);
242
		logMetacat.debug("The checksum for " + localId + " is " + checksum.getValue());
243
		sysMeta.setChecksum(checksum);
244
		
245
		// set the size from file on disk, don't read bytes again
246
		File fileOnDisk = getFileOnDisk(localId);
247
		long fileSize = 0;
248
		if (fileOnDisk.exists()) {
249
			fileSize = fileOnDisk.length();
250
		}
251
		sysMeta.setSize(BigInteger.valueOf(fileSize));
252
		
253
		// submitter
254
		Subject submitter = new Subject();
255
		submitter.setValue(docInfo.get("user_updated"));
256
		sysMeta.setSubmitter(submitter);
257
		
258
		// rights holder
259
		Subject owner = new Subject();
260
		owner.setValue(docInfo.get("user_owner"));
261
		sysMeta.setRightsHolder(owner);
262

    
263
		// dates
264
		String createdDateString = docInfo.get("date_created");
265
		String updatedDateString = docInfo.get("date_updated");
266
		Date createdDate = DateTimeMarshaller.deserializeDateToUTC(createdDateString);
267
		Date updatedDate = DateTimeMarshaller.deserializeDateToUTC(updatedDateString);  
268
		sysMeta.setDateUploaded(createdDate);
269
		//sysMeta.setDateSysMetadataModified(updatedDate);
270
		// use current datetime 
271
		sysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
272
		
273
		// set the revision history
274
		String docidWithoutRev = accNum.getDocid();
275
		Identifier obsoletedBy = null;
276
		Identifier obsoletes = null;
277
		Vector<Integer> revisions = DBUtil.getRevListFromRevisionTable(docidWithoutRev);
278
		// ensure this ordering since processing depends on it
279
		Collections.sort(revisions);
280
		for (int existingRev: revisions) {
281
			// use the docid+rev as the guid
282
			String existingPid = docidWithoutRev + "." + existingRev;
283
			try {
284
				existingPid = IdentifierManager.getInstance().getGUID(docidWithoutRev, existingRev);
285
			} catch (McdbDocNotFoundException mdfe) {
286
				// we'll be defaulting to the local id
287
				logMetacat.warn("could not locate guid when processing revision history for localId: " + localId);
288
			}
289
			if (existingRev < rev) {
290
				// it's the old docid, until it's not
291
				obsoletes = new Identifier();
292
				obsoletes.setValue(existingPid);
293
			}
294
			if (existingRev > rev) {
295
				// it's the newer docid
296
				obsoletedBy = new Identifier();
297
				obsoletedBy.setValue(existingPid);
298
				// only want the version just after it
299
				break;
300
			}
301
		}
302
		// set them on our object
303
		sysMeta.setObsoletedBy(obsoletedBy);
304
		sysMeta.setObsoletes(obsoletes);
305
		
306
		// update the system metadata for the object[s] we are revising
307
		if (obsoletedBy != null) {
308
			SystemMetadata obsoletedBySysMeta = null;
309
			try {
310
				//obsoletedBySysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletedBy);
311
				obsoletedBySysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletedBy.getValue());
312
			} catch (McdbDocNotFoundException e) {
313
				// ignore
314
			}
315
			if (obsoletedBySysMeta != null) {
316
				obsoletedBySysMeta.setObsoletes(identifier);
317
				obsoletedBySysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
318
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletedBy, obsoletedBySysMeta);
319
			}
320
		}
321
		if (obsoletes != null) {
322
			SystemMetadata obsoletesSysMeta = null;
323
			try {
324
				//obsoletesSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(obsoletes);
325
				obsoletesSysMeta = IdentifierManager.getInstance().getSystemMetadata(obsoletes.getValue());
326
			} catch (McdbDocNotFoundException e) {
327
				// ignore
328
			}
329
			if (obsoletesSysMeta != null) {
330
				obsoletesSysMeta.setObsoletedBy(identifier);
331
				// DO NOT set archived to true -- it will have unintended consequences if the CN sees this.
332
				//obsoletesSysMeta.setArchived(true);
333
				obsoletesSysMeta.setDateSysMetadataModified(Calendar.getInstance().getTime());
334
				HazelcastService.getInstance().getSystemMetadataMap().put(obsoletes, obsoletesSysMeta);
335
			}
336
		}
337
		
338
		// look up the access control policy we have in metacat
339
		AccessPolicy accessPolicy = IdentifierManager.getInstance().getAccessPolicy(guid);
340
		try {
341
        List<AccessRule> allowList = accessPolicy.getAllowList();
342
        int listSize = allowList.size();
343
        sysMeta.setAccessPolicy(accessPolicy);
344
        
345
    } catch (NullPointerException npe) {
346
        logMetacat.info("The allow list is empty, can't include an empty " +
347
            "access policy in the system metadata for " + guid);
348
        
349
    }
350
		
351
		// authoritative node
352
		NodeReference nr = new NodeReference();
353
		nr.setValue(PropertyService.getProperty("dataone.nodeId"));
354
		sysMeta.setOriginMemberNode(nr);
355
		sysMeta.setAuthoritativeMemberNode(nr);
356
		
357
		// Set a default replication policy
358
        ReplicationPolicy rp = getDefaultReplicationPolicy();
359
        if (rp != null) {
360
            sysMeta.setReplicationPolicy(rp);
361
        }
362
		
363
		// further parse EML documents to get data object format,
364
		// describes and describedBy information
365
		if (fmtid == ObjectFormatCache.getInstance().getFormat(
366
				"eml://ecoinformatics.org/eml-2.0.0").getFormatId()
367
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
368
						"eml://ecoinformatics.org/eml-2.0.1").getFormatId()
369
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
370
						"eml://ecoinformatics.org/eml-2.1.0").getFormatId()
371
				|| fmtid == ObjectFormatCache.getInstance().getFormat(
372
						"eml://ecoinformatics.org/eml-2.1.1").getFormatId()) {
373

    
374
			try {
375
				
376
				// get it again to parse the document
377
				logMetacat.debug("Re-reading document inputStream");
378
				inputStream = MetacatHandler.read(localId);
379
				
380
				DataoneEMLParser emlParser = DataoneEMLParser.getInstance();
381
		        EMLDocument emlDocument = emlParser.parseDocument(inputStream);
382
				
383
				// iterate through the data objects in the EML doc and add sysmeta
384
				logMetacat.debug("In createSystemMetadata() the number of data "
385
								+ "entities is: "
386
								+ emlDocument.distributionMetadata);
387

    
388
				// for generating the ORE map
389
	            Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
390
	            List<Identifier> dataIds = new ArrayList<Identifier>();
391
				
392
				// iterate through data objects described by the EML
393
	            if (emlDocument.distributionMetadata != null) {
394
					for (int j = 0; j < emlDocument.distributionMetadata.size(); j++) {
395
	
396
						DistributionMetadata distMetadata = emlDocument.distributionMetadata.elementAt(j);
397
				        String dataDocUrl = distMetadata.url;
398
				        String dataDocMimeType = distMetadata.mimeType;
399
						// default to binary
400
						if (dataDocMimeType == null) {
401
							dataDocMimeType = "application/octet-stream";
402
						}
403

    
404
						// process the data
405
						boolean remoteData = false;
406
						String dataDocLocalId = null;
407
						Identifier dataGuid = new Identifier();
408

    
409
						// handle ecogrid, or downloadable data
410
						String ecogridPrefix = "ecogrid://knb/";
411
						if (dataDocUrl.trim().startsWith(ecogridPrefix)) {
412
							dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf(ecogridPrefix) + ecogridPrefix.length());
413
						} else {
414
							// should we try downloading the remote data?
415
							if (downloadData) {
416
								InputStream dataObject = null;
417
								try {
418
									// download the data from the URL
419
									URL dataURL = new URL(dataDocUrl);
420
									URLConnection dataConnection = dataURL.openConnection();
421
									
422
									// default is to download the data
423
									dataObject = dataConnection.getInputStream();
424

    
425
									String detectedContentType = dataConnection.getContentType();
426
									logMetacat.info("Detected content type: " + detectedContentType);
427

    
428
									if (detectedContentType != null) {
429
										// seems to be HTML from the remote location
430
										if (detectedContentType.contains("html")) {
431
											// if we are not expecting it, we skip it
432
											if (!dataDocMimeType.contains("html")) {
433
												// set to null so we don't download it
434
												dataObject = null;
435
												logMetacat.warn("Skipping remote resource, unexpected HTML content type at: " + dataDocUrl);
436
											}
437
										}
438
										
439
									} else {
440
										// if we don't know what it is, should we skip it?
441
										dataObject = null;
442
										logMetacat.warn("Skipping remote resource, unknown content type at: " + dataDocUrl);
443
									}
444
									
445
								} catch (Exception e) {
446
									// error with the download
447
									logMetacat.warn("Error downloading remote data. " + e.getMessage());
448
								}
449
								
450
								if (dataObject != null) {
451
									// create the local version of it
452
									dataDocLocalId = DocumentUtil.generateDocumentId(1);
453
									IdentifierManager.getInstance().createMapping(dataDocLocalId, dataDocLocalId);
454
									dataGuid.setValue(dataDocLocalId);
455
									
456
									// save it locally
457
									Session session = new Session();
458
									session.setSubject(submitter);
459
									MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
460
									Checksum sum = null;
461
									MNodeService.getInstance(request).insertDataObject(dataObject, dataGuid, session, sum);
462
									
463
									remoteData = true;
464
								}
465
							}
466
							
467
						}
468
						
469
						logMetacat.debug("Data local ID: " + dataDocLocalId);
470
						logMetacat.debug("Data URL     : " + dataDocUrl);
471
						logMetacat.debug("Data mime    : " + dataDocMimeType);
472
						
473
						// check for valid docid.rev
474
						String dataDocid = null;
475
						int dataRev = 0;
476
						if (dataDocLocalId != null) {
477
							// look up the guid for the data
478
							try {
479
								dataDocid = DocumentUtil.getSmartDocId(dataDocLocalId);
480
								dataRev = DocumentUtil.getRevisionFromAccessionNumber(dataDocLocalId);
481
							} catch (Exception e) {
482
								logMetacat.warn(e.getClass().getName() + " - Problem parsing accession number for: " + dataDocLocalId + ". Message: " + e.getMessage());
483
								dataDocLocalId = null;
484
							}
485
						}
486
						
487
						// now we have a local id for the data
488
						if (dataDocLocalId != null) {
489
	
490
							// check if data system metadata exists already
491
							SystemMetadata dataSysMeta = null;
492
							String dataGuidString = null;
493
							try {
494
								// look for the identifier
495
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
496
								// set it
497
								dataGuid.setValue(dataGuidString);
498
								// look up the system metadata
499
								try {
500
									dataSysMeta = HazelcastService.getInstance().getSystemMetadataMap().get(dataGuid);
501
								} catch (Exception e) {
502
									// probably not in the system
503
									dataSysMeta = null;
504
								}
505
								//dataSysMeta = IdentifierManager.getInstance().getSystemMetadata(dataGuidString);
506
							} catch (McdbDocNotFoundException nf) {
507
								// we didn't find it
508
								dataSysMeta = null;
509
							}
510
								
511
							// we'll have to generate it	
512
							if (dataSysMeta == null) {
513
								// System metadata for data doesn't exist yet, so create it
514
								logMetacat.debug("No exisiting SystemMetdata found, creating for: " + dataDocLocalId);
515
								dataSysMeta = createSystemMetadata(dataDocLocalId, includeORE, false);
516

    
517
								// now look it up again
518
								dataGuidString = IdentifierManager.getInstance().getGUID(dataDocid, dataRev);
519

    
520
								// set the guid
521
								dataGuid.setValue(dataGuidString);
522
								
523
								// inherit access rules from metadata, if we don't have our own
524
								if (remoteData) {
525
									dataSysMeta.setAccessPolicy(sysMeta.getAccessPolicy());
526
									// TODO: use access rules defined in EML, per data file
527
								}
528
	
529
							}
530
							
531
							// set object format for the data file
532
							logMetacat.debug("Updating system metadata for " + dataGuid.getValue() + " to " + dataDocMimeType);
533
							ObjectFormatIdentifier fmt = null;
534
							try {
535
								fmt = ObjectFormatCache.getInstance().getFormat(dataDocMimeType).getFormatId();
536
							} catch (NotFound nfe) {
537
								logMetacat.debug("Couldn't find format identifier for: "
538
												+ dataDocMimeType
539
												+ ". Setting it to application/octet-stream.");
540
								fmt = new ObjectFormatIdentifier();
541
								fmt.setValue("application/octet-stream");
542
							}
543
							dataSysMeta.setFormatId(fmt);
544

    
545
							// update the values
546
							HazelcastService.getInstance().getSystemMetadataMap().put(dataSysMeta.getIdentifier(), dataSysMeta);
547
							
548
							// reindex data file if need it.
549
							logMetacat.debug("do we need to reindex guid "+dataGuid.getValue()+"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~?"+indexDataFile);
550
							if(indexDataFile) {
551
							    reindexDataFile(dataSysMeta.getIdentifier(), dataSysMeta);
552
							}
553

    
554
							// include as part of the ORE package
555
							dataIds.add(dataGuid);
556
	
557
						} // end if (EML package)
558
	
559
					} // end for (data entities)
560
					
561
	            } // data entities not null
562
	            
563
				// ORE map
564
				if (includeORE) {
565
					// can we generate them?
566
			        if (!dataIds.isEmpty()) {
567
			        	// it doesn't exist in the system?
568
			        	if (!oreExistsFor(sysMeta.getIdentifier())) {
569
			        	
570
				            // generate the ORE map for this datapackage
571
				            Identifier resourceMapId = new Identifier();
572
				            // use the local id, not the guid in case we have DOIs for them already
573
				            resourceMapId.setValue(RESOURCE_MAP_PREFIX + localId);
574
				            idMap.put(sysMeta.getIdentifier(), dataIds);
575
				            ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
576
				            String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
577
				            // copy most of the same system metadata as the packaging metadata
578
				            SystemMetadata resourceMapSysMeta = new SystemMetadata();
579
				            BeanUtils.copyProperties(resourceMapSysMeta, sysMeta);
580
				            resourceMapSysMeta.setIdentifier(resourceMapId);
581
				            Checksum oreChecksum = ChecksumUtil.checksum(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), algorithm);
582
							resourceMapSysMeta.setChecksum(oreChecksum);
583
				            ObjectFormatIdentifier formatId = ObjectFormatCache.getInstance().getFormat("http://www.openarchives.org/ore/terms").getFormatId();
584
							resourceMapSysMeta.setFormatId(formatId);
585
							resourceMapSysMeta.setSize(BigInteger.valueOf(sizeOfStream(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING))));
586
							
587
							// set the revision graph
588
							resourceMapSysMeta.setObsoletes(null);
589
							resourceMapSysMeta.setObsoletedBy(null);
590
							// look up the resource map that this one obsoletes
591
							if (sysMeta.getObsoletes() != null) {
592
								// use the localId in case we have a DOI
593
								String obsoletesLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletes().getValue());
594
								Identifier resourceMapObsoletes = new Identifier();
595
								resourceMapObsoletes.setValue(RESOURCE_MAP_PREFIX + obsoletesLocalId );
596
								resourceMapSysMeta.setObsoletes(resourceMapObsoletes);
597
								SystemMetadata resourceMapObsoletesSystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletes);
598
								if (resourceMapObsoletesSystemMetadata != null) {
599
									resourceMapObsoletesSystemMetadata.setObsoletedBy(resourceMapId);
600
									resourceMapObsoletesSystemMetadata.setArchived(true);
601
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletes, resourceMapObsoletesSystemMetadata);
602
								}
603
							}
604
							// look up the resource map that this one is obsoletedBy
605
							if (sysMeta.getObsoletedBy() != null) {
606
								// use the localId in case we have a DOI
607
								String obsoletedByLocalId = IdentifierManager.getInstance().getLocalId(sysMeta.getObsoletedBy().getValue());
608
								Identifier resourceMapObsoletedBy = new Identifier();
609
								resourceMapObsoletedBy.setValue(RESOURCE_MAP_PREFIX + obsoletedByLocalId);
610
								resourceMapSysMeta.setObsoletedBy(resourceMapObsoletedBy);
611
								resourceMapSysMeta.setArchived(true);
612
								SystemMetadata resourceMapObsoletedBySystemMetadata = HazelcastService.getInstance().getSystemMetadataMap().get(resourceMapObsoletedBy);
613
								if (resourceMapObsoletedBySystemMetadata != null) {
614
									resourceMapObsoletedBySystemMetadata.setObsoletes(resourceMapId);
615
									HazelcastService.getInstance().getSystemMetadataMap().put(resourceMapObsoletedBy, resourceMapObsoletedBySystemMetadata);
616
								}
617
							}
618
				            
619
							// save it locally, if it doesn't already exist
620
							if (!IdentifierManager.getInstance().identifierExists(resourceMapId.getValue())) {
621
								Session session = new Session();
622
								session.setSubject(submitter);
623
								MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
624
								MNodeService.getInstance(request).insertDataObject(IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapId, session, resourceMapSysMeta.getChecksum());
625
								MNodeService.getInstance(request).insertSystemMetadata(resourceMapSysMeta);
626
								logMetacat.info("Inserted ORE package: " + resourceMapId.getValue());
627
							}
628
			        	}
629
			        }
630
				}
631

    
632
			} catch (ParserConfigurationException pce) {
633
				logMetacat.debug("There was a problem parsing the EML document. "
634
								+ "The error message was: " + pce.getMessage());
635

    
636
			} catch (SAXException saxe) {
637
				logMetacat.debug("There was a problem traversing the EML document. "
638
								+ "The error message was: " + saxe.getMessage());
639

    
640
			} catch (XPathExpressionException xpee) {
641
				logMetacat.debug("There was a problem searching the EML document. "
642
								+ "The error message was: " + xpee.getMessage());
643
			} catch (Exception e) {
644
				logMetacat.debug("There was a problem creating System Metadata. "
645
								+ "The error message was: " + e.getMessage());
646
				e.printStackTrace();
647
			} // end try()
648

    
649
		} // end if()
650

    
651
		return sysMeta;
652
	}
653
	
654
	/*
655
	 * Re-index the data file since the access rule was changed during the inserting of the eml document.
656
	 * (During first time to index the data file in Metacat API, the eml hasn't been inserted)
657
	 */
658
	private static void reindexDataFile(Identifier id, SystemMetadata sysmeta) {
659
	    try {
660
	        logMetacat.debug("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ reindex"+id.getValue());
661
	        if(sysmeta != null) {
662
	            if(!sysmeta.getArchived()) {
663
	                //set the archive to true to remove index.
664
	                sysmeta.setArchived(true);
665
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
666
	                //re-insert the index
667
	                sysmeta.setArchived(false);
668
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
669
	            } else {
670
	                MetacatSolrIndex.getInstance().submit(id, sysmeta, null, true);
671
	            }
672
	        }
673
	       
674
        } catch (Exception e) {
675
            // TODO Auto-generated catch block
676
            logMetacat.warn("Can't reindex the data object "+id.getValue()+" since "+e.getMessage());
677
            //e.printStackTrace();
678
        }
679
	}
680

    
681
	/**
682
	 * Checks for potential ORE object existence 
683
	 * @param identifier
684
	 * @return
685
	 */
686
    public static boolean oreExistsFor(Identifier identifier) {
687
    	MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
688
		List<Identifier> ids = MNodeService.getInstance(request).lookupOreFor(identifier, true);
689
		return (ids != null && ids.size() > 0);
690
	}
691

    
692
	/**
693
     * Generate SystemMetadata for any object in the object store that does
694
     * not already have it.  SystemMetadata documents themselves, are, of course,
695
     * exempt.  This is a utility method for migration of existing object 
696
     * stores to DataONE where SystemMetadata is required for all objects.
697
     * @param idList
698
     * @param includeOre
699
     * @param downloadData
700
     * @throws PropertyNotFoundException
701
     * @throws NoSuchAlgorithmException
702
     * @throws AccessionNumberException
703
     * @throws SQLException
704
	 * @throws SAXException 
705
	 * @throws HandlerException 
706
	 * @throws MarshallingException 
707
	 * @throws BaseException 
708
	 * @throws ParseLSIDException 
709
	 * @throws InsufficientKarmaException 
710
	 * @throws ClassNotFoundException 
711
	 * @throws IOException 
712
	 * @throws McdbException 
713
	 * @throws AccessException 
714
	 * @throws AccessControlException 
715
     */
716
    public static void generateSystemMetadata(List<String> idList, boolean includeOre, boolean downloadData) 
717
    throws PropertyNotFoundException, NoSuchAlgorithmException, AccessionNumberException, SQLException, AccessControlException, AccessException, McdbException, IOException, ClassNotFoundException, InsufficientKarmaException, ParseLSIDException, BaseException, MarshallingException, HandlerException, SAXException 
718
    {
719
        
720
        for (String localId : idList) { 
721
        	logMetacat.debug("Creating SystemMetadata for localId " + localId);
722
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tBEGIN:\tLOCALID:\t" + localId);
723

    
724
            SystemMetadata sm = null;
725

    
726
            //generate required system metadata fields from the document
727
            try {
728
            	sm = SystemMetadataFactory.createSystemMetadata(localId, includeOre, downloadData);
729
            } catch (Exception e) {
730
				logMetacat.error("Could not create/process system metadata for docid: " + localId, e);
731
				continue;
732
			}
733
            
734
            //insert the systemmetadata object or just update it as needed
735
        	IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
736
        	logMetacat.info("Generated or Updated SystemMetadata for " + localId);
737
            
738
        	logMetacat.trace("METRICS:\tGENERATE_SYSTEM_METADATA:\tEND:\tLOCALID:\t" + localId);
739

    
740
        }
741
        logMetacat.info("done generating system metadata for given list");
742
    }
743

    
744
	/**
745
	 * Find the size (in bytes) of a stream. Note: This needs to refactored out
746
	 * of MetacatHandler and into a utility when stream i/o in Metacat is
747
	 * evaluated.
748
	 * 
749
	 * @param is The InputStream of bytes
750
	 * 
751
	 * @return size The size in bytes of the input stream as a long
752
	 * 
753
	 * @throws IOException
754
	 */
755
	public static long sizeOfStream(InputStream is) throws IOException {
756

    
757
		long size = 0;
758
		byte[] b = new byte[1024];
759
		int numread = is.read(b, 0, 1024);
760
		while (numread != -1) {
761
			size += numread;
762
			numread = is.read(b, 0, 1024);
763
		}
764
		return size;
765

    
766
	}
767
	
768
	private static File getFileOnDisk(String docid) throws McdbException, PropertyNotFoundException {
769
		
770
		DocumentImpl doc = new DocumentImpl(docid, false);
771
		String filepath = null;
772
		String filename = null;
773

    
774
		// deal with data or metadata cases
775
		if (doc.getRootNodeID() == 0) {
776
			// this is a data file
777
			filepath = PropertyService.getProperty("application.datafilepath");
778
		} else {
779
			filepath = PropertyService.getProperty("application.documentfilepath");
780
		}
781
		// ensure it is a directory path
782
		if (!(filepath.endsWith("/"))) {
783
			filepath += "/";
784
		}
785
		filename = filepath + docid;
786
		File documentFile = new File(filename);
787
		
788
		return documentFile;
789
	}
790

    
791
	/**
792
	 * Create a default ReplicationPolicy by reading properties from metacat's configuration
793
	 * and using those defaults. If the numReplicas property is not found, malformed, or less
794
	 * than or equal to zero, no policy needs to be set, so return null.
795
	 * @return ReplicationPolicy, or null if no replication policy is needed
796
	 */
797
    protected static ReplicationPolicy getDefaultReplicationPolicy() {
798
        ReplicationPolicy rp = null;
799
        int numReplicas = -1;
800
        try {
801
            numReplicas = new Integer(PropertyService.getProperty("dataone.replicationpolicy.default.numreplicas"));
802
        } catch (NumberFormatException e) {
803
            // The property is not a valid integer, so set it to 0
804
            numReplicas = 0;
805
        } catch (PropertyNotFoundException e) {
806
            // The property is not found, so set it to 0
807
            numReplicas = 0;
808
        }
809
        
810
        rp = new ReplicationPolicy();
811
        if (numReplicas > 0) {
812
            rp.setReplicationAllowed(true);
813
            rp.setNumberReplicas(numReplicas);
814
            try {
815
                String preferredNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.preferredNodeList");
816
                if (preferredNodeList != null) {
817
                    List<NodeReference> pNodes = extractNodeReferences(preferredNodeList);
818
                    if (pNodes != null && !pNodes.isEmpty()) {
819
                        rp.setPreferredMemberNodeList(pNodes);
820
                    }
821
                }
822
            } catch (PropertyNotFoundException e) {
823
                // No preferred list found in properties, so just ignore it; no action needed
824
            }
825
            try {
826
                String blockedNodeList = PropertyService.getProperty("dataone.replicationpolicy.default.blockedNodeList");
827
                if (blockedNodeList != null) {
828
                    List<NodeReference> bNodes = extractNodeReferences(blockedNodeList);
829
                    if (bNodes != null && !bNodes.isEmpty()) {
830
                        rp.setBlockedMemberNodeList(bNodes);
831
                    }
832
                }
833
            } catch (PropertyNotFoundException e) {
834
                // No blocked list found in properties, so just ignore it; no action needed
835
            }
836
        } else {
837
            rp.setReplicationAllowed(false);
838
            rp.setNumberReplicas(0);
839
        }
840
        return rp;
841
    }
842

    
843
    /**
844
     * Extract a List of NodeReferences from a String listing the node identifiers where
845
     * each identifier is separated by whitespace, comma, or semicolon characters.
846
     * @param nodeString the string containing the list of nodes
847
     * @return the List of NodeReference objects parsed from the input string
848
     */
849
    private static List<NodeReference> extractNodeReferences(String nodeString) {
850
        List<NodeReference> nodeList = new ArrayList<NodeReference>();
851
        String[] result = nodeString.split("[,;\\s]");
852
        for (String r : result) {
853
        	if (r != null && r.length() > 0) {
854
	            NodeReference noderef = new NodeReference();
855
	            noderef.setValue(r);
856
	            nodeList.add(noderef);
857
	        }
858
        }
859
        return nodeList;
860
    }
861
}
(8-8/8)