31 |
31 |
import java.math.BigInteger;
|
32 |
32 |
import java.net.HttpURLConnection;
|
33 |
33 |
import java.net.URL;
|
|
34 |
import java.util.ArrayList;
|
34 |
35 |
import java.util.Calendar;
|
35 |
36 |
import java.util.Date;
|
|
37 |
import java.util.HashMap;
|
|
38 |
import java.util.List;
|
|
39 |
import java.util.Map;
|
36 |
40 |
import java.util.Vector;
|
37 |
41 |
|
38 |
42 |
import javax.activation.DataHandler;
|
... | ... | |
45 |
49 |
import org.dataone.client.MNode;
|
46 |
50 |
import org.dataone.client.ObjectFormatCache;
|
47 |
51 |
import org.dataone.client.auth.CertificateManager;
|
|
52 |
import org.dataone.ore.ResourceMapFactory;
|
48 |
53 |
import org.dataone.service.exceptions.NotFound;
|
49 |
54 |
import org.dataone.service.types.v1.AccessPolicy;
|
50 |
55 |
import org.dataone.service.types.v1.AccessRule;
|
... | ... | |
58 |
63 |
import org.dataone.service.types.v1.SystemMetadata;
|
59 |
64 |
import org.dataone.service.types.v1.util.ChecksumUtil;
|
60 |
65 |
import org.dataone.service.util.Constants;
|
|
66 |
import org.dspace.foresite.ResourceMap;
|
61 |
67 |
import org.ecoinformatics.datamanager.DataManager;
|
62 |
68 |
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
|
63 |
69 |
import org.ecoinformatics.datamanager.parser.DataPackage;
|
... | ... | |
79 |
85 |
private String username = null;
|
80 |
86 |
private String password = null;
|
81 |
87 |
private Session session = null;
|
|
88 |
private String subjectDN = null;
|
82 |
89 |
|
83 |
90 |
/**
|
84 |
91 |
* create a new MetacatPopulator with given source and destination urls.
|
... | ... | |
100 |
107 |
this.destinationUrl = destUrl;
|
101 |
108 |
// TODO: use specific certificate?
|
102 |
109 |
this.session = null; //new Session();
|
|
110 |
this.subjectDN = CertificateManager.getInstance().getSubjectDN(CertificateManager.getInstance().loadCertificate());
|
103 |
111 |
}
|
104 |
112 |
|
105 |
113 |
/**
|
... | ... | |
130 |
138 |
MNode mn = D1Client.getMN(destinationUrl + "/");
|
131 |
139 |
|
132 |
140 |
printHeader("Processing " + docs.size() + " results.");
|
133 |
|
for(int i=0; i<docs.size(); i++)
|
134 |
|
{
|
|
141 |
for (int i=0; i<docs.size(); i++) {
|
|
142 |
|
|
143 |
// for generating the ORE map
|
|
144 |
Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>();
|
|
145 |
List<Identifier> dataIds = new ArrayList<Identifier>();
|
|
146 |
|
135 |
147 |
//for each document in the query
|
136 |
148 |
Document doc = docs.get(i);
|
137 |
149 |
String docid = doc.docid;
|
... | ... | |
147 |
159 |
DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
|
148 |
160 |
DataPackage dataPackage = dataManager.parseMetadata(is);
|
149 |
161 |
|
150 |
|
if (dataPackage == null)
|
151 |
|
{
|
|
162 |
if (dataPackage == null) {
|
152 |
163 |
continue;
|
153 |
164 |
}
|
|
165 |
|
154 |
166 |
//go through the DistributionMetadata and download any described data
|
155 |
|
|
156 |
167 |
is = stringToStream(doctext);
|
157 |
168 |
doc.doctext = doctext;
|
158 |
169 |
|
159 |
170 |
printHeader("creating document on destination " + destinationUrl);
|
160 |
171 |
SystemMetadata sysmeta = generateSystemMetadata(doc);
|
|
172 |
|
|
173 |
// iterate through the data objects
|
161 |
174 |
if (dataPackage.getEntityList() != null) {
|
162 |
|
for(int j=0; j < dataPackage.getEntityList().length; j++)
|
163 |
|
{
|
|
175 |
for (int j=0; j < dataPackage.getEntityList().length; j++) {
|
164 |
176 |
String dataDocUrl = dataPackage.getEntityList()[j].getURL();
|
165 |
|
String dataDocMimeType =
|
166 |
|
dataPackage.getEntityList()[j].getDataFormat();
|
|
177 |
String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat();
|
167 |
178 |
if (dataDocMimeType == null) {
|
168 |
179 |
dataDocMimeType =
|
169 |
180 |
ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFmtid().getValue();
|
170 |
181 |
}
|
171 |
182 |
String dataDocLocalId = "";
|
172 |
|
if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
|
173 |
|
{ //we only handle ecogrid urls right now
|
|
183 |
if (dataDocUrl.trim().startsWith("ecogrid://knb/")) { //we only handle ecogrid urls right now
|
174 |
184 |
dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") +
|
175 |
185 |
"ecogrid://knb/".length(), dataDocUrl.length());
|
176 |
186 |
//get the file
|
... | ... | |
182 |
192 |
Identifier did = new Identifier();
|
183 |
193 |
did.setValue(dataDocLocalId);
|
184 |
194 |
|
185 |
|
//add the desribeby to the eml's sysmeta
|
186 |
|
// TODO Use ORE
|
187 |
|
// System.out.println("adding describe for doc " +
|
188 |
|
// sysmeta.getIdentifier().getValue() + " :" + did.getValue());
|
189 |
|
// sysmeta.addDescribe(did);
|
|
195 |
// add the data identifier for ORE map
|
|
196 |
dataIds.add(did);
|
190 |
197 |
|
191 |
198 |
//create sysmeta for the data doc
|
192 |
199 |
SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
|
... | ... | |
205 |
212 |
String sizeStr =
|
206 |
213 |
Long.toString(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
|
207 |
214 |
dataDocSysMeta.setSize(new BigInteger(sizeStr));
|
208 |
|
// TODO use ORE map
|
209 |
|
//dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
|
|
215 |
|
210 |
216 |
boolean error = false;
|
211 |
217 |
|
212 |
218 |
//create the data doc on d1
|
213 |
|
try
|
214 |
|
{
|
|
219 |
try {
|
215 |
220 |
mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
|
216 |
221 |
}
|
217 |
|
catch(Exception e)
|
218 |
|
{
|
|
222 |
catch(Exception e) {
|
219 |
223 |
error = true;
|
220 |
224 |
System.out.println("ERROR: Could not create data document with id " +
|
221 |
225 |
dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
|
222 |
226 |
}
|
223 |
|
finally
|
224 |
|
{
|
225 |
|
if (error)
|
226 |
|
{
|
|
227 |
finally {
|
|
228 |
if (error) {
|
227 |
229 |
printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() +
|
228 |
230 |
"FAILED.");
|
229 |
231 |
}
|
230 |
|
else
|
231 |
|
{
|
|
232 |
else {
|
232 |
233 |
printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
|
233 |
234 |
" which is described by " + sysmeta.getIdentifier().getValue());
|
234 |
235 |
}
|
235 |
236 |
}
|
236 |
237 |
}
|
237 |
|
else
|
238 |
|
{
|
|
238 |
else {
|
239 |
239 |
System.out.println("WARNING: Could not process describes url " +
|
240 |
240 |
dataDocUrl + " for document " + doc.docid +
|
241 |
241 |
". Only ecogrid://knb/ urls are currently supported.");
|
... | ... | |
243 |
243 |
}
|
244 |
244 |
}
|
245 |
245 |
|
246 |
|
try
|
247 |
|
{
|
248 |
|
Identifier id = mn.create(session, sysmeta.getIdentifier(),
|
249 |
|
IOUtils.toInputStream(doc.doctext), sysmeta);
|
|
246 |
try {
|
|
247 |
Identifier id =
|
|
248 |
mn.create(session, sysmeta.getIdentifier(), IOUtils.toInputStream(doc.doctext), sysmeta);
|
250 |
249 |
System.out.println("Success inserting document " + id.getValue());
|
251 |
250 |
|
|
251 |
// no need for an ORE map if there's no data
|
|
252 |
if (!dataIds.isEmpty()) {
|
|
253 |
// generate the ORE map for this datapackage
|
|
254 |
Identifier resourceMapId = new Identifier();
|
|
255 |
resourceMapId.setValue("resourceMap_" + sysmeta.getIdentifier().getValue());
|
|
256 |
idMap.put(sysmeta.getIdentifier(), dataIds);
|
|
257 |
ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap);
|
|
258 |
String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm);
|
|
259 |
Document rmDoc = new Document(resourceMapId.getValue(), "http://www.openarchives.org/ore/terms", "", "");
|
|
260 |
rmDoc.doctext = resourceMapXML;
|
|
261 |
SystemMetadata resourceMapSysMeta = generateSystemMetadata(rmDoc);
|
|
262 |
mn.create(session, resourceMapId, IOUtils.toInputStream(resourceMapXML), resourceMapSysMeta);
|
|
263 |
|
|
264 |
// clean up the permissions (FORCE public read)
|
|
265 |
for (Identifier dataId: dataIds) {
|
|
266 |
mn.setAccessPolicy(session, dataId, sysmeta.getAccessPolicy());
|
|
267 |
System.out.println("Set public access policy for: " + dataId.getValue());
|
|
268 |
}
|
252 |
269 |
}
|
253 |
|
catch(Exception e)
|
254 |
|
{
|
|
270 |
|
|
271 |
}
|
|
272 |
catch(Exception e) {
|
255 |
273 |
e.printStackTrace();
|
256 |
274 |
System.out.println("Could not create document with id " +
|
257 |
275 |
sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
|
258 |
|
|
259 |
276 |
}
|
260 |
|
finally
|
261 |
|
{
|
262 |
|
printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
|
|
277 |
finally {
|
|
278 |
printHeader("Done processing document " + sysmeta.getIdentifier().getValue());
|
263 |
279 |
}
|
264 |
280 |
}
|
265 |
281 |
|
... | ... | |
273 |
289 |
* @return
|
274 |
290 |
*/
|
275 |
291 |
private SystemMetadata generateSystemMetadata(Document doc)
|
276 |
|
throws Exception
|
277 |
|
{
|
|
292 |
throws Exception {
|
278 |
293 |
SystemMetadata sm = new SystemMetadata();
|
279 |
294 |
//set the id
|
280 |
295 |
Identifier id = new Identifier();
|
... | ... | |
283 |
298 |
|
284 |
299 |
//set the object format
|
285 |
300 |
ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype);
|
286 |
|
if(format == null)
|
287 |
|
{
|
288 |
|
if(doc.doctype.trim().equals("BIN"))
|
289 |
|
{
|
|
301 |
if (format == null) {
|
|
302 |
if (doc.doctype.trim().equals("BIN")) {
|
290 |
303 |
format = ObjectFormatCache.getInstance().getFormat("application/octet-stream");
|
291 |
304 |
}
|
292 |
|
else
|
293 |
|
{
|
|
305 |
else {
|
294 |
306 |
format = ObjectFormatCache.getInstance().getFormat("text/plain");
|
295 |
307 |
}
|
296 |
308 |
}
|
... | ... | |
305 |
317 |
String sizeStr = Long.toString(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
|
306 |
318 |
sm.setSize(new BigInteger(sizeStr));
|
307 |
319 |
|
308 |
|
//submitter
|
|
320 |
//submitter, rights holder
|
309 |
321 |
Subject p = new Subject();
|
310 |
|
p.setValue("unknown");
|
|
322 |
p.setValue(subjectDN);
|
311 |
323 |
sm.setSubmitter(p);
|
312 |
324 |
sm.setRightsHolder(p);
|
313 |
|
try
|
314 |
|
{
|
|
325 |
try {
|
315 |
326 |
Date dateCreated = parseMetacatDate(doc.createDate);
|
316 |
327 |
sm.setDateUploaded(dateCreated);
|
317 |
328 |
Date dateUpdated = parseMetacatDate(doc.updateDate);
|
318 |
329 |
sm.setDateSysMetadataModified(dateUpdated);
|
319 |
330 |
}
|
320 |
|
catch(Exception e)
|
321 |
|
{
|
|
331 |
catch(Exception e) {
|
322 |
332 |
System.out.println("couldn't parse a date: " + e.getMessage());
|
323 |
333 |
Date dateCreated = new Date();
|
324 |
334 |
sm.setDateUploaded(dateCreated);
|
... | ... | |
338 |
348 |
subject.setValue(Constants.SUBJECT_PUBLIC);
|
339 |
349 |
accessRule.addSubject(subject);
|
340 |
350 |
accessPolicy.addAllow(accessRule);
|
|
351 |
|
341 |
352 |
sm.setAccessPolicy(accessPolicy);
|
342 |
353 |
|
343 |
354 |
return sm;
|
344 |
355 |
}
|
345 |
356 |
|
346 |
|
private void printHeader(String s)
|
347 |
|
{
|
|
357 |
private void printHeader(String s) {
|
348 |
358 |
System.out.println("****** " + s + " *******");
|
349 |
359 |
}
|
350 |
360 |
|
351 |
|
|
352 |
|
|
353 |
361 |
/**
|
354 |
362 |
* parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
|
355 |
363 |
* a proper date object
|
including newer d1 libclient that uses Foresite (and Jena) to construct/parse ORE resource maps for DataONE