Revision 6538
Added by ben leinfelder over 12 years ago
MetacatPopulator.java | ||
---|---|---|
31 | 31 |
import java.math.BigInteger; |
32 | 32 |
import java.net.HttpURLConnection; |
33 | 33 |
import java.net.URL; |
34 |
import java.util.ArrayList; |
|
34 | 35 |
import java.util.Calendar; |
35 | 36 |
import java.util.Date; |
37 |
import java.util.HashMap; |
|
38 |
import java.util.List; |
|
39 |
import java.util.Map; |
|
36 | 40 |
import java.util.Vector; |
37 | 41 |
|
38 | 42 |
import javax.activation.DataHandler; |
... | ... | |
45 | 49 |
import org.dataone.client.MNode; |
46 | 50 |
import org.dataone.client.ObjectFormatCache; |
47 | 51 |
import org.dataone.client.auth.CertificateManager; |
52 |
import org.dataone.ore.ResourceMapFactory; |
|
48 | 53 |
import org.dataone.service.exceptions.NotFound; |
49 | 54 |
import org.dataone.service.types.v1.AccessPolicy; |
50 | 55 |
import org.dataone.service.types.v1.AccessRule; |
... | ... | |
58 | 63 |
import org.dataone.service.types.v1.SystemMetadata; |
59 | 64 |
import org.dataone.service.types.v1.util.ChecksumUtil; |
60 | 65 |
import org.dataone.service.util.Constants; |
66 |
import org.dspace.foresite.ResourceMap; |
|
61 | 67 |
import org.ecoinformatics.datamanager.DataManager; |
62 | 68 |
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface; |
63 | 69 |
import org.ecoinformatics.datamanager.parser.DataPackage; |
... | ... | |
79 | 85 |
private String username = null; |
80 | 86 |
private String password = null; |
81 | 87 |
private Session session = null; |
88 |
private String subjectDN = null; |
|
82 | 89 |
|
83 | 90 |
/** |
84 | 91 |
* create a new MetacatPopulator with given source and destination urls. |
... | ... | |
100 | 107 |
this.destinationUrl = destUrl; |
101 | 108 |
// TODO: use specific certificate? |
102 | 109 |
this.session = null; //new Session(); |
110 |
this.subjectDN = CertificateManager.getInstance().getSubjectDN(CertificateManager.getInstance().loadCertificate()); |
|
103 | 111 |
} |
104 | 112 |
|
105 | 113 |
/** |
... | ... | |
130 | 138 |
MNode mn = D1Client.getMN(destinationUrl + "/"); |
131 | 139 |
|
132 | 140 |
printHeader("Processing " + docs.size() + " results."); |
133 |
for(int i=0; i<docs.size(); i++) |
|
134 |
{ |
|
141 |
for (int i=0; i<docs.size(); i++) { |
|
142 |
|
|
143 |
// for generating the ORE map |
|
144 |
Map<Identifier, List<Identifier>> idMap = new HashMap<Identifier, List<Identifier>>(); |
|
145 |
List<Identifier> dataIds = new ArrayList<Identifier>(); |
|
146 |
|
|
135 | 147 |
//for each document in the query |
136 | 148 |
Document doc = docs.get(i); |
137 | 149 |
String docid = doc.docid; |
... | ... | |
147 | 159 |
DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName()); |
148 | 160 |
DataPackage dataPackage = dataManager.parseMetadata(is); |
149 | 161 |
|
150 |
if (dataPackage == null) |
|
151 |
{ |
|
162 |
if (dataPackage == null) { |
|
152 | 163 |
continue; |
153 | 164 |
} |
165 |
|
|
154 | 166 |
//go through the DistributionMetadata and download any described data |
155 |
|
|
156 | 167 |
is = stringToStream(doctext); |
157 | 168 |
doc.doctext = doctext; |
158 | 169 |
|
159 | 170 |
printHeader("creating document on destination " + destinationUrl); |
160 | 171 |
SystemMetadata sysmeta = generateSystemMetadata(doc); |
172 |
|
|
173 |
// iterate through the data objects |
|
161 | 174 |
if (dataPackage.getEntityList() != null) { |
162 |
for(int j=0; j < dataPackage.getEntityList().length; j++) |
|
163 |
{ |
|
175 |
for (int j=0; j < dataPackage.getEntityList().length; j++) { |
|
164 | 176 |
String dataDocUrl = dataPackage.getEntityList()[j].getURL(); |
165 |
String dataDocMimeType = |
|
166 |
dataPackage.getEntityList()[j].getDataFormat(); |
|
177 |
String dataDocMimeType = dataPackage.getEntityList()[j].getDataFormat(); |
|
167 | 178 |
if (dataDocMimeType == null) { |
168 | 179 |
dataDocMimeType = |
169 | 180 |
ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFmtid().getValue(); |
170 | 181 |
} |
171 | 182 |
String dataDocLocalId = ""; |
172 |
if(dataDocUrl.trim().startsWith("ecogrid://knb/")) |
|
173 |
{ //we only handle ecogrid urls right now |
|
183 |
if (dataDocUrl.trim().startsWith("ecogrid://knb/")) { //we only handle ecogrid urls right now |
|
174 | 184 |
dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + |
175 | 185 |
"ecogrid://knb/".length(), dataDocUrl.length()); |
176 | 186 |
//get the file |
... | ... | |
182 | 192 |
Identifier did = new Identifier(); |
183 | 193 |
did.setValue(dataDocLocalId); |
184 | 194 |
|
185 |
//add the desribeby to the eml's sysmeta |
|
186 |
// TODO Use ORE |
|
187 |
// System.out.println("adding describe for doc " + |
|
188 |
// sysmeta.getIdentifier().getValue() + " :" + did.getValue()); |
|
189 |
// sysmeta.addDescribe(did); |
|
195 |
// add the data identifier for ORE map |
|
196 |
dataIds.add(did); |
|
190 | 197 |
|
191 | 198 |
//create sysmeta for the data doc |
192 | 199 |
SystemMetadata dataDocSysMeta = generateSystemMetadata(doc); |
... | ... | |
205 | 212 |
String sizeStr = |
206 | 213 |
Long.toString(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length); |
207 | 214 |
dataDocSysMeta.setSize(new BigInteger(sizeStr)); |
208 |
// TODO use ORE map |
|
209 |
//dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier()); |
|
215 |
|
|
210 | 216 |
boolean error = false; |
211 | 217 |
|
212 | 218 |
//create the data doc on d1 |
213 |
try |
|
214 |
{ |
|
219 |
try { |
|
215 | 220 |
mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta); |
216 | 221 |
} |
217 |
catch(Exception e) |
|
218 |
{ |
|
222 |
catch(Exception e) { |
|
219 | 223 |
error = true; |
220 | 224 |
System.out.println("ERROR: Could not create data document with id " + |
221 | 225 |
dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage()); |
222 | 226 |
} |
223 |
finally |
|
224 |
{ |
|
225 |
if (error) |
|
226 |
{ |
|
227 |
finally { |
|
228 |
if (error) { |
|
227 | 229 |
printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + |
228 | 230 |
"FAILED."); |
229 | 231 |
} |
230 |
else |
|
231 |
{ |
|
232 |
else { |
|
232 | 233 |
printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() + |
233 | 234 |
" which is described by " + sysmeta.getIdentifier().getValue()); |
234 | 235 |
} |
235 | 236 |
} |
236 | 237 |
} |
237 |
else |
|
238 |
{ |
|
238 |
else { |
|
239 | 239 |
System.out.println("WARNING: Could not process describes url " + |
240 | 240 |
dataDocUrl + " for document " + doc.docid + |
241 | 241 |
". Only ecogrid://knb/ urls are currently supported."); |
... | ... | |
243 | 243 |
} |
244 | 244 |
} |
245 | 245 |
|
246 |
try |
|
247 |
{ |
|
248 |
Identifier id = mn.create(session, sysmeta.getIdentifier(), |
|
249 |
IOUtils.toInputStream(doc.doctext), sysmeta); |
|
246 |
try { |
|
247 |
Identifier id = |
|
248 |
mn.create(session, sysmeta.getIdentifier(), IOUtils.toInputStream(doc.doctext), sysmeta); |
|
250 | 249 |
System.out.println("Success inserting document " + id.getValue()); |
251 | 250 |
|
251 |
// no need for an ORE map if there's no data |
|
252 |
if (!dataIds.isEmpty()) { |
|
253 |
// generate the ORE map for this datapackage |
|
254 |
Identifier resourceMapId = new Identifier(); |
|
255 |
resourceMapId.setValue("resourceMap_" + sysmeta.getIdentifier().getValue()); |
|
256 |
idMap.put(sysmeta.getIdentifier(), dataIds); |
|
257 |
ResourceMap rm = ResourceMapFactory.getInstance().createResourceMap(resourceMapId, idMap); |
|
258 |
String resourceMapXML = ResourceMapFactory.getInstance().serializeResourceMap(rm); |
|
259 |
Document rmDoc = new Document(resourceMapId.getValue(), "http://www.openarchives.org/ore/terms", "", ""); |
|
260 |
rmDoc.doctext = resourceMapXML; |
|
261 |
SystemMetadata resourceMapSysMeta = generateSystemMetadata(rmDoc); |
|
262 |
mn.create(session, resourceMapId, IOUtils.toInputStream(resourceMapXML), resourceMapSysMeta); |
|
263 |
|
|
264 |
// clean up the permissions (FORCE public read) |
|
265 |
for (Identifier dataId: dataIds) { |
|
266 |
mn.setAccessPolicy(session, dataId, sysmeta.getAccessPolicy()); |
|
267 |
System.out.println("Set public access policy for: " + dataId.getValue()); |
|
268 |
} |
|
252 | 269 |
} |
253 |
catch(Exception e) |
|
254 |
{ |
|
270 |
|
|
271 |
} |
|
272 |
catch(Exception e) { |
|
255 | 273 |
e.printStackTrace(); |
256 | 274 |
System.out.println("Could not create document with id " + |
257 | 275 |
sysmeta.getIdentifier().getValue() + " : " + e.getMessage()); |
258 |
|
|
259 | 276 |
} |
260 |
finally |
|
261 |
{ |
|
262 |
printHeader("Done inserting document " + sysmeta.getIdentifier().getValue()); |
|
277 |
finally { |
|
278 |
printHeader("Done processing document " + sysmeta.getIdentifier().getValue()); |
|
263 | 279 |
} |
264 | 280 |
} |
265 | 281 |
|
... | ... | |
273 | 289 |
* @return |
274 | 290 |
*/ |
275 | 291 |
private SystemMetadata generateSystemMetadata(Document doc) |
276 |
throws Exception |
|
277 |
{ |
|
292 |
throws Exception { |
|
278 | 293 |
SystemMetadata sm = new SystemMetadata(); |
279 | 294 |
//set the id |
280 | 295 |
Identifier id = new Identifier(); |
... | ... | |
283 | 298 |
|
284 | 299 |
//set the object format |
285 | 300 |
ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype); |
286 |
if(format == null) |
|
287 |
{ |
|
288 |
if(doc.doctype.trim().equals("BIN")) |
|
289 |
{ |
|
301 |
if (format == null) { |
|
302 |
if (doc.doctype.trim().equals("BIN")) { |
|
290 | 303 |
format = ObjectFormatCache.getInstance().getFormat("application/octet-stream"); |
291 | 304 |
} |
292 |
else |
|
293 |
{ |
|
305 |
else { |
|
294 | 306 |
format = ObjectFormatCache.getInstance().getFormat("text/plain"); |
295 | 307 |
} |
296 | 308 |
} |
... | ... | |
305 | 317 |
String sizeStr = Long.toString(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length); |
306 | 318 |
sm.setSize(new BigInteger(sizeStr)); |
307 | 319 |
|
308 |
//submitter |
|
320 |
//submitter, rights holder
|
|
309 | 321 |
Subject p = new Subject(); |
310 |
p.setValue("unknown");
|
|
322 |
p.setValue(subjectDN);
|
|
311 | 323 |
sm.setSubmitter(p); |
312 | 324 |
sm.setRightsHolder(p); |
313 |
try |
|
314 |
{ |
|
325 |
try { |
|
315 | 326 |
Date dateCreated = parseMetacatDate(doc.createDate); |
316 | 327 |
sm.setDateUploaded(dateCreated); |
317 | 328 |
Date dateUpdated = parseMetacatDate(doc.updateDate); |
318 | 329 |
sm.setDateSysMetadataModified(dateUpdated); |
319 | 330 |
} |
320 |
catch(Exception e) |
|
321 |
{ |
|
331 |
catch(Exception e) { |
|
322 | 332 |
System.out.println("couldn't parse a date: " + e.getMessage()); |
323 | 333 |
Date dateCreated = new Date(); |
324 | 334 |
sm.setDateUploaded(dateCreated); |
... | ... | |
338 | 348 |
subject.setValue(Constants.SUBJECT_PUBLIC); |
339 | 349 |
accessRule.addSubject(subject); |
340 | 350 |
accessPolicy.addAllow(accessRule); |
351 |
|
|
341 | 352 |
sm.setAccessPolicy(accessPolicy); |
342 | 353 |
|
343 | 354 |
return sm; |
344 | 355 |
} |
345 | 356 |
|
346 |
private void printHeader(String s) |
|
347 |
{ |
|
357 |
private void printHeader(String s) { |
|
348 | 358 |
System.out.println("****** " + s + " *******"); |
349 | 359 |
} |
350 | 360 |
|
351 |
|
|
352 |
|
|
353 | 361 |
/** |
354 | 362 |
* parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into |
355 | 363 |
* a proper date object |
Also available in: Unified diff
including newer d1 libclient that uses Foresite (and Jena) to construct/parse ORE resource maps for DataONE