1
|
/**
|
2
|
* '$RCSfile$'
|
3
|
* Purpose: A Class that implements administrative methods
|
4
|
* Copyright: 2010 Regents of the University of California and the
|
5
|
* National Center for Ecological Analysis and Synthesis
|
6
|
* Authors: Michael Daigle
|
7
|
*
|
8
|
* '$Author: berkley $'
|
9
|
* '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
|
10
|
* '$Revision: 5374 $'
|
11
|
*
|
12
|
* This program is free software; you can redistribute it and/or modify
|
13
|
* it under the terms of the GNU General Public License as published by
|
14
|
* the Free Software Foundation; either version 2 of the License, or
|
15
|
* (at your option) any later version.
|
16
|
*
|
17
|
* This program is distributed in the hope that it will be useful,
|
18
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
19
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
20
|
* GNU General Public License for more details.
|
21
|
*
|
22
|
* You should have received a copy of the GNU General Public License
|
23
|
* along with this program; if not, write to the Free Software
|
24
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
25
|
*/
|
26
|
package edu.ucsb.nceas.metacat.util;
|
27
|
|
28
|
import java.io.ByteArrayInputStream;
|
29
|
import java.io.InputStream;
|
30
|
import java.math.BigInteger;
|
31
|
import java.net.HttpURLConnection;
|
32
|
import java.net.URL;
|
33
|
import java.util.ArrayList;
|
34
|
import java.util.Calendar;
|
35
|
import java.util.Date;
|
36
|
import java.util.HashMap;
|
37
|
import java.util.List;
|
38
|
import java.util.Map;
|
39
|
import java.util.Vector;
|
40
|
|
41
|
import org.apache.commons.io.IOUtils;
|
42
|
import org.dataone.client.v2.itk.D1Client;
|
43
|
import org.dataone.client.v2.MNode;
|
44
|
import org.dataone.client.v2.formats.ObjectFormatCache;
|
45
|
import org.dataone.client.auth.CertificateManager;
|
46
|
import org.dataone.ore.ResourceMapFactory;
|
47
|
import org.dataone.service.exceptions.NotFound;
|
48
|
import org.dataone.service.types.v1.AccessPolicy;
|
49
|
import org.dataone.service.types.v1.AccessRule;
|
50
|
import org.dataone.service.types.v1.Checksum;
|
51
|
import org.dataone.service.types.v1.Identifier;
|
52
|
import org.dataone.service.types.v1.NodeReference;
|
53
|
import org.dataone.service.types.v1.ObjectFormat;
|
54
|
import org.dataone.service.types.v1.Permission;
|
55
|
import org.dataone.service.types.v1.Session;
|
56
|
import org.dataone.service.types.v1.Subject;
|
57
|
import org.dataone.service.types.v2.SystemMetadata;
|
58
|
import org.dataone.service.types.v1.util.ChecksumUtil;
|
59
|
import org.dataone.service.util.Constants;
|
60
|
import org.dspace.foresite.ResourceMap;
|
61
|
import org.ecoinformatics.datamanager.DataManager;
|
62
|
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
|
63
|
import org.ecoinformatics.datamanager.parser.DataPackage;
|
64
|
|
65
|
import edu.ucsb.nceas.metacat.MetaCatServlet;
|
66
|
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
|
67
|
import edu.ucsb.nceas.metacat.properties.PropertyService;
|
68
|
|
69
|
/**
|
70
|
* @author berkley
|
71
|
* A class to populate a metacat instance based on documents returned from a query
|
72
|
*/
|
73
|
public class MetacatPopulator
|
74
|
{
|
75
|
private String sourceUrl = null;
|
76
|
private String destinationUrl = null;
|
77
|
private String query = null;
|
78
|
private String username = null;
|
79
|
private String password = null;
|
80
|
private Session session = null;
|
81
|
private String subjectDN = null;
|
82
|
|
83
|
/**
|
84
|
* create a new MetacatPopulator with given source and destination urls.
|
85
|
* These should be
|
86
|
* of the form "http://<url>/<metacat_instance>"
|
87
|
* If username and/or password is null, the query will be run as public
|
88
|
* @param sourceUrl
|
89
|
* @param destUrl
|
90
|
* @param query
|
91
|
* @param username
|
92
|
* @param password
|
93
|
*/
|
94
|
public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
|
95
|
{
|
96
|
this.sourceUrl = sourceUrl;
|
97
|
this.query = query;
|
98
|
this.username = username;
|
99
|
this.password = password;
|
100
|
this.destinationUrl = destUrl;
|
101
|
// TODO: use specific certificate?
|
102
|
this.session = null; //new Session();
|
103
|
this.subjectDN = CertificateManager.getInstance().getSubjectDN(CertificateManager.getInstance().loadCertificate());
|
104
|
}
|
105
|
|
106
|
/**
|
107
|
* populate from the source
|
108
|
*/
|
109
|
public void populate()
|
110
|
throws Exception
|
111
|
{
|
112
|
//String sourceSessionid = login();
|
113
|
|
114
|
//do a query
|
115
|
String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
|
116
|
"returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
|
117
|
"returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
|
118
|
params += "action=query&";
|
119
|
params += "qformat=xml&";
|
120
|
params += "anyfield=" + query;
|
121
|
|
122
|
printHeader("Searching source");
|
123
|
System.out.println("searching '" + sourceUrl + "' for '" + query + "'");
|
124
|
InputStream is = getResponse(sourceUrl, "/metacat", params, "POST");
|
125
|
String response = IOUtils.toString(is, MetaCatServlet.DEFAULT_ENCODING);
|
126
|
//System.out.println("response: " + response);
|
127
|
Vector<Document> docs = parseResponse(response);
|
128
|
|
129
|
printHeader("Parsing source results");
|
130
|
System.out.println("creating MN with url: " + destinationUrl + "/");
|
131
|
MNode mn = D1Client.getMN(destinationUrl + "/");
|
132
|
|
133
|
printHeader("Processing " + docs.size() + " results.");
|
134
|
for (int i=0; i<docs.size(); i++) {
|
135
|
|
136
|
// for generating the ORE map
|
137
|
Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
|
138
|
List<Identifier> dataIds = new ArrayList<Identifier>();
|
139
|
|
140
|
//for each document in the query
|
141
|
Document doc = docs.get(i);
|
142
|
String docid = doc.docid;
|
143
|
//get the doc from source
|
144
|
printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
|
145
|
params = "action=read&qformat=xml&docid=" + docid;
|
146
|
is = getResponse(sourceUrl, "/metacat", params, "POST");
|
147
|
String doctext = IOUtils.toString(is, MetaCatServlet.DEFAULT_ENCODING);
|
148
|
System.out.println("doctext: " + doctext);
|
149
|
is = IOUtils.toInputStream(doctext, MetaCatServlet.DEFAULT_ENCODING);
|
150
|
//parse the document
|
151
|
DatabaseConnectionPoolInterface connectionPool = MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
|
152
|
DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
|
153
|
DataPackage dataPackage = dataManager.parseMetadata(is);
|
154
|
|
155
|
if (dataPackage == null) {
|
156
|
continue;
|
157
|
}
|
158
|
|
159
|
//go through the DistributionMetadata and download any described data
|
160
|
is = IOUtils.toInputStream(doctext, MetaCatServlet.DEFAULT_ENCODING);
|
161
|
doc.doctext = doctext;
|
162
|
|
163
|
printHeader("creating document on destination " + destinationUrl);
|
164
|
SystemMetadata sysmeta = generateSystemMetadata(doc);
|
165
|
|
166
|
// iterate through the data objects
|
167
|
if (dataPackage.getEntityList() != null) {
|
168
|
for (int j=0; j < dataPackage.getEntityList().length; j++) {
|
169
|
String dataDocUrl = dataPackage.getEntityList()[j].getURL();
|
170
|
String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
|
171
|
if (dataDocMimeType == null) {
|
172
|
dataDocMimeType =
|
173
|
ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFormatId().getValue();
|
174
|
}
|
175
|
String dataDocLocalId = "";
|
176
|
if (dataDocUrl.trim().startsWith("ecogrid://knb/")) { //we only handle ecogrid urls right now
|
177
|
dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") +
|
178
|
"ecogrid://knb/".length(), dataDocUrl.length());
|
179
|
//get the file
|
180
|
params = "action=read&qformat=xml&docid=" + dataDocLocalId;
|
181
|
InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
|
182
|
String dataDocText = IOUtils.toString(dataDocIs, MetaCatServlet.DEFAULT_ENCODING);
|
183
|
|
184
|
//set the id
|
185
|
Identifier did = new Identifier();
|
186
|
did.setValue(dataDocLocalId);
|
187
|
|
188
|
// add the data identifier for ORE map
|
189
|
dataIds.add(did);
|
190
|
|
191
|
//create sysmeta for the data doc
|
192
|
SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
|
193
|
//overwrite the bogus values from the last call
|
194
|
dataDocSysMeta.setIdentifier(did);
|
195
|
ObjectFormat format = null;
|
196
|
try {
|
197
|
format = ObjectFormatCache.getInstance().getFormat(dataDocMimeType);
|
198
|
dataDocSysMeta.setFormatId(format.getFormatId());
|
199
|
} catch (NotFound e) {
|
200
|
System.out.println(e.getMessage());
|
201
|
}
|
202
|
dataDocIs = IOUtils.toInputStream(dataDocText, MetaCatServlet.DEFAULT_ENCODING);
|
203
|
String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
|
204
|
Checksum checksum = ChecksumUtil.checksum(dataDocIs, algorithm);
|
205
|
dataDocSysMeta.setChecksum(checksum);
|
206
|
String sizeStr =
|
207
|
Long.toString(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
|
208
|
dataDocSysMeta.setSize(new BigInteger(sizeStr));
|
209
|
|
210
|
boolean error = false;
|
211
|
|
212
|
//create the data doc on d1
|
213
|
try {
|
214
|
mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText, MetaCatServlet.DEFAULT_ENCODING), dataDocSysMeta);
|
215
|
}
|
216
|
catch(Exception e) {
|
217
|
error = true;
|
218
|
System.out.println("ERROR: Could not create data document with id " +
|
219
|
dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
|
220
|
}
|
221
|
finally {
|
222
|
if (error) {
|
223
|
printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() +
|
224
|
"FAILED.");
|
225
|
}
|
226
|
else {
|
227
|
printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
|
228
|
" which is described by " + sysmeta.getIdentifier().getValue());
|
229
|
}
|
230
|
}
|
231
|
}
|
232
|
else {
|
233
|
System.out.println("WARNING: Could not process describes url " +
|
234
|
dataDocUrl + " for document " + doc.docid +
|
235
|
". Only ecogrid://knb/ urls are currently supported.");
|
236
|
}
|
237
|
}
|
238
|
}
|
239
|
|
240
|
try {
|
241
|
Identifier id =
|
242
|
mn.create(session, sysmeta.getIdentifier(), IOUtils.toInputStream(doc.doctext, MetaCatServlet.DEFAULT_ENCODING), sysmeta);
|
243
|
System.out.println("Success inserting document " + id.getValue());
|
244
|
|
245
|
// no need for an ORE map if there's no data
|
246
|
if (!dataIds.isEmpty()) {
|
247
|
// generate the ORE map for this datapackage
|
248
|
Identifier resourceMapId = new Identifier();
|
249
|
resourceMapId.setValue("resourceMap_" + sysmeta.getIdentifier().getValue());
|
250
|
idMap.put(sysmeta.getIdentifier(), dataIds);
|
251
|
ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
|
252
|
String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
|
253
|
Document rmDoc = new Document(resourceMapId.getValue(), "http://www.openarchives.org/ore/terms", "", "");
|
254
|
rmDoc.doctext = resourceMapXML;
|
255
|
SystemMetadata resourceMapSysMeta = generateSystemMetadata(rmDoc);
|
256
|
mn.create(session, resourceMapId, IOUtils.toInputStream(resourceMapXML, MetaCatServlet.DEFAULT_ENCODING), resourceMapSysMeta);
|
257
|
|
258
|
}
|
259
|
|
260
|
}
|
261
|
catch(Exception e) {
|
262
|
e.printStackTrace();
|
263
|
System.out.println("Could not create document with id " +
|
264
|
sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
|
265
|
}
|
266
|
finally {
|
267
|
printHeader("Done processing document " + sysmeta.getIdentifier().getValue());
|
268
|
}
|
269
|
}
|
270
|
|
271
|
//logout();
|
272
|
}
|
273
|
|
274
|
|
275
|
|
276
|
/**
|
277
|
* @param doc
|
278
|
* @return
|
279
|
*/
|
280
|
private SystemMetadata generateSystemMetadata(Document doc)
|
281
|
throws Exception {
|
282
|
SystemMetadata sm = new SystemMetadata();
|
283
|
sm.setSerialVersion(BigInteger.valueOf(1));
|
284
|
//set the id
|
285
|
Identifier id = new Identifier();
|
286
|
id.setValue(doc.docid.trim());
|
287
|
sm.setIdentifier(id);
|
288
|
|
289
|
//set the object format
|
290
|
ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype);
|
291
|
if (format == null) {
|
292
|
if (doc.doctype.trim().equals("BIN")) {
|
293
|
format = ObjectFormatCache.getInstance().getFormat("application/octet-stream");
|
294
|
}
|
295
|
else {
|
296
|
format = ObjectFormatCache.getInstance().getFormat("text/plain");
|
297
|
}
|
298
|
}
|
299
|
sm.setFormatId(format.getFormatId());
|
300
|
|
301
|
//create the checksum
|
302
|
ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
|
303
|
String algorithm = PropertyService.getProperty("dataone.checksumAlgorithm.default");
|
304
|
Checksum checksum = ChecksumUtil.checksum(bais, algorithm);
|
305
|
sm.setChecksum(checksum);
|
306
|
|
307
|
//set the size
|
308
|
String sizeStr = Long.toString(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
|
309
|
sm.setSize(new BigInteger(sizeStr));
|
310
|
|
311
|
//submitter, rights holder
|
312
|
Subject p = new Subject();
|
313
|
p.setValue(subjectDN);
|
314
|
sm.setSubmitter(p);
|
315
|
sm.setRightsHolder(p);
|
316
|
try {
|
317
|
Date dateCreated = parseMetacatDate(doc.createDate);
|
318
|
sm.setDateUploaded(dateCreated);
|
319
|
Date dateUpdated = parseMetacatDate(doc.updateDate);
|
320
|
sm.setDateSysMetadataModified(dateUpdated);
|
321
|
}
|
322
|
catch(Exception e) {
|
323
|
System.out.println("couldn't parse a date: " + e.getMessage());
|
324
|
Date dateCreated = new Date();
|
325
|
sm.setDateUploaded(dateCreated);
|
326
|
Date dateUpdated = new Date();
|
327
|
sm.setDateSysMetadataModified(dateUpdated);
|
328
|
}
|
329
|
NodeReference nr = new NodeReference();
|
330
|
nr.setValue(PropertyService.getProperty("dataone.nodeId"));
|
331
|
sm.setOriginMemberNode(nr);
|
332
|
sm.setAuthoritativeMemberNode(nr);
|
333
|
|
334
|
// create access policy
|
335
|
AccessPolicy accessPolicy = new AccessPolicy();
|
336
|
AccessRule accessRule = new AccessRule();
|
337
|
accessRule.addPermission(Permission.READ);
|
338
|
Subject subject = new Subject();
|
339
|
subject.setValue(Constants.SUBJECT_PUBLIC);
|
340
|
accessRule.addSubject(subject);
|
341
|
accessPolicy.addAllow(accessRule);
|
342
|
|
343
|
sm.setAccessPolicy(accessPolicy);
|
344
|
|
345
|
return sm;
|
346
|
}
|
347
|
|
348
|
private void printHeader(String s) {
|
349
|
System.out.println("****** " + s + " *******");
|
350
|
}
|
351
|
|
352
|
/**
|
353
|
* parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
|
354
|
* a proper date object
|
355
|
* @param date
|
356
|
* @return
|
357
|
*/
|
358
|
private Date parseMetacatDate(String date)
|
359
|
{
|
360
|
String year = date.substring(0, 4);
|
361
|
String month = date.substring(5, 7);
|
362
|
String day = date.substring(8, 10);
|
363
|
Calendar c = Calendar.getInstance();
|
364
|
c.set(new Integer(year).intValue(),
|
365
|
new Integer(month).intValue(),
|
366
|
new Integer(day).intValue());
|
367
|
return c.getTime();
|
368
|
}
|
369
|
|
370
|
/**
|
371
|
* parse a metacat query response and return a vector of docids
|
372
|
* @param response
|
373
|
* @return
|
374
|
*/
|
375
|
private Vector<Document> parseResponse(String response)
|
376
|
{
|
377
|
Vector<Document> v = new Vector<Document>();
|
378
|
int dstart = response.indexOf("<document>");
|
379
|
int dend = response.indexOf("</document>", dstart);
|
380
|
while(dstart != -1)
|
381
|
{
|
382
|
String doc = response.substring(dstart + "<document>".length(), dend);
|
383
|
//System.out.println("adding " + docid);
|
384
|
Document d = new Document(getFieldFromDoc(doc, "docid"),
|
385
|
getFieldFromDoc(doc, "doctype"),
|
386
|
getFieldFromDoc(doc, "createdate"),
|
387
|
getFieldFromDoc(doc, "updatedate"));
|
388
|
v.add(d);
|
389
|
dstart = response.indexOf("<document>", dend);
|
390
|
dend = response.indexOf("</document>", dstart);
|
391
|
}
|
392
|
|
393
|
return v;
|
394
|
}
|
395
|
|
396
|
private String getFieldFromDoc(String doc, String fieldname)
|
397
|
{
|
398
|
String field = "<" + fieldname + ">";
|
399
|
String fieldend = "</" + fieldname + ">";
|
400
|
int start = doc.indexOf(field);
|
401
|
int end = doc.indexOf(fieldend);
|
402
|
String s = doc.substring(start + field.length(), end);
|
403
|
//System.out.println("field: " + fieldname + " : " + s);
|
404
|
return s;
|
405
|
}
|
406
|
|
407
|
|
408
|
/**
|
409
|
* returns a sessionid
|
410
|
* @return
|
411
|
*/
|
412
|
private String login()
|
413
|
throws Exception
|
414
|
{
|
415
|
InputStream is = getResponse(sourceUrl, "/metacat",
|
416
|
"action=login&username=" + username + "&password=" + password + "&qformat=xml", "POST");
|
417
|
String response = IOUtils.toString(is, MetaCatServlet.DEFAULT_ENCODING);
|
418
|
//System.out.println("response: " + response);
|
419
|
if(response.indexOf("sessionId") == -1)
|
420
|
{
|
421
|
throw new Exception("Error logging into " + sourceUrl);
|
422
|
}
|
423
|
|
424
|
String sessionid = response.substring(
|
425
|
response.indexOf("<sessionId>") + "<sessionId>".length(),
|
426
|
response.indexOf("</sessionId>"));
|
427
|
System.out.println("sessionid: " + sessionid);
|
428
|
return sessionid;
|
429
|
}
|
430
|
|
431
|
/**
|
432
|
* logout both the source and destination
|
433
|
* @throws Exception
|
434
|
*/
|
435
|
private void logout()
|
436
|
throws Exception
|
437
|
{
|
438
|
getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
|
439
|
}
|
440
|
|
441
|
/**
|
442
|
* get an http response
|
443
|
* @param contextRootUrl
|
444
|
* @param resource
|
445
|
* @param urlParameters
|
446
|
* @param method
|
447
|
* @return
|
448
|
* @throws Exception
|
449
|
*/
|
450
|
private InputStream getResponse(String contextRootUrl, String resource,
|
451
|
String urlParameters, String method)
|
452
|
throws Exception
|
453
|
{
|
454
|
HttpURLConnection connection = null ;
|
455
|
|
456
|
String restURL = contextRootUrl+resource;
|
457
|
|
458
|
if (urlParameters != null) {
|
459
|
if (restURL.indexOf("?") == -1)
|
460
|
restURL += "?";
|
461
|
restURL += urlParameters;
|
462
|
if(restURL.indexOf(" ") != -1)
|
463
|
{
|
464
|
restURL = restURL.replaceAll("\\s", "%20");
|
465
|
}
|
466
|
}
|
467
|
|
468
|
URL u = null;
|
469
|
InputStream content = null;
|
470
|
System.out.println("url: " + restURL);
|
471
|
System.out.println("method: " + method);
|
472
|
u = new URL(restURL);
|
473
|
connection = (HttpURLConnection) u.openConnection();
|
474
|
connection.setDoOutput(true);
|
475
|
connection.setDoInput(true);
|
476
|
connection.setRequestMethod(method);
|
477
|
content = connection.getInputStream();
|
478
|
return content;
|
479
|
}
|
480
|
|
481
|
private class Document
|
482
|
{
|
483
|
public String docid;
|
484
|
public String doctype;
|
485
|
public String createDate;
|
486
|
public String updateDate;
|
487
|
public String doctext;
|
488
|
|
489
|
public Document(String docid, String doctype, String createDate, String updateDate)
|
490
|
{
|
491
|
this.docid = docid.trim();
|
492
|
this.doctype = doctype.trim();
|
493
|
this.createDate = createDate.trim();
|
494
|
this.updateDate = updateDate.trim();
|
495
|
}
|
496
|
}
|
497
|
}
|