58 |
58 |
import org.dataone.service.types.SystemMetadata;
|
59 |
59 |
import org.dataone.service.types.Identifier;
|
60 |
60 |
import org.dataone.client.D1Client;
|
|
61 |
import org.dataone.client.MNode;
|
|
62 |
import org.dataone.eml.DataoneEMLParser;
|
|
63 |
import org.dataone.eml.EMLDocument;
|
|
64 |
import org.dataone.eml.EMLDocument.DistributionMetadata;
|
61 |
65 |
|
62 |
66 |
//import sun.tools.jstat.Identifier;
|
63 |
67 |
|
... | ... | |
105 |
109 |
String sourceSessionid = loginSource();
|
106 |
110 |
|
107 |
111 |
//do a query
|
108 |
|
String params = "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
|
109 |
|
"returndoctype=eml://ecoinformatics.org/eml-2.0.0&" +
|
110 |
|
"returndoctype=BIN&" +
|
111 |
|
"returndoctype=http://dataone.org/service/types/SystemMetadata/0.1&";
|
|
112 |
String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
|
|
113 |
"returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
|
|
114 |
"returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
|
112 |
115 |
params += "action=query&";
|
113 |
116 |
params += "qformat=xml&";
|
114 |
117 |
params += "anyfield=" + query;
|
... | ... | |
124 |
127 |
|
125 |
128 |
printHeader("Parsing source results");
|
126 |
129 |
D1Client d1 = new D1Client(destinationUrl + "/");
|
|
130 |
MNode mn = d1.getMN(destinationUrl + "/");
|
|
131 |
|
127 |
132 |
printHeader("Processing " + docs.size() + " results.");
|
128 |
133 |
printHeader("logging in to the destination " + destinationUrl);
|
129 |
|
AuthToken authtoken = d1.login(username, password);
|
|
134 |
AuthToken authtoken = mn.login(username, password);
|
130 |
135 |
for(int i=0; i<docs.size(); i++)
|
131 |
136 |
{
|
132 |
137 |
//for each document in the query
|
... | ... | |
137 |
142 |
params = "action=read&qformat=xml&docid=" + docid;
|
138 |
143 |
is = getResponse(sourceUrl, "/metacat", params, "POST");
|
139 |
144 |
String doctext = streamToString(is);
|
140 |
|
//System.out.println("Done retrieving document: " + doctext);
|
|
145 |
System.out.println("doctext: " + doctext);
|
141 |
146 |
is = stringToStream(doctext);
|
|
147 |
//parse the document
|
|
148 |
DataoneEMLParser parser = DataoneEMLParser.getInstance();
|
|
149 |
EMLDocument emld = parser.parseDocument(is);
|
|
150 |
//go through the DistributionMetadata and download any described data
|
|
151 |
|
|
152 |
is = stringToStream(doctext);
|
142 |
153 |
doc.doctext = doctext;
|
143 |
154 |
|
144 |
155 |
printHeader("creating document on destination " + destinationUrl);
|
145 |
156 |
SystemMetadata sysmeta = generateSystemMetadata(doc);
|
|
157 |
for(int j=0; j<emld.distributionMetadata.size(); j++)
|
|
158 |
{
|
|
159 |
Identifier emlId = sysmeta.getIdentifier();
|
|
160 |
DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
|
|
161 |
String dataDocUrl = dm.url;
|
|
162 |
String dataDocMimeType = dm.mimeType;
|
|
163 |
String dataDocLocalId = "";
|
|
164 |
if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
|
|
165 |
{ //we only handle ecogrid urls right now
|
|
166 |
dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") +
|
|
167 |
"ecogrid://knb/".length(), dataDocUrl.length());
|
|
168 |
//get the file
|
|
169 |
params = "action=read&qformat=xml&docid=" + dataDocLocalId;
|
|
170 |
InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
|
|
171 |
String dataDocText = streamToString(dataDocIs);
|
|
172 |
|
|
173 |
//set the id
|
|
174 |
Identifier did = new Identifier();
|
|
175 |
did.setValue(dataDocLocalId);
|
|
176 |
|
|
177 |
//add the desribeby to the eml's sysmeta
|
|
178 |
System.out.println("adding describe for doc " +
|
|
179 |
sysmeta.getIdentifier().getValue() + " :" + did.getValue());
|
|
180 |
sysmeta.addDescribe(did);
|
|
181 |
|
|
182 |
//create sysmeta for the data doc
|
|
183 |
SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
|
|
184 |
//overwrite the bogus values from the last call
|
|
185 |
dataDocSysMeta.setIdentifier(did);
|
|
186 |
dataDocSysMeta.setObjectFormat(ObjectFormat.convert(dataDocMimeType));
|
|
187 |
Checksum checksum = new Checksum();
|
|
188 |
dataDocIs = stringToStream(dataDocText);
|
|
189 |
ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
|
|
190 |
checksum.setAlgorithm(ca);
|
|
191 |
checksum.setValue(checksum(dataDocIs));
|
|
192 |
dataDocSysMeta.setChecksum(checksum);
|
|
193 |
dataDocSysMeta.setSize(dataDocText.getBytes().length);
|
|
194 |
dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
|
|
195 |
boolean error = false;
|
|
196 |
//create the data doc on d1
|
|
197 |
try
|
|
198 |
{
|
|
199 |
mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
|
|
200 |
mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
|
|
201 |
}
|
|
202 |
catch(Exception e)
|
|
203 |
{
|
|
204 |
error = true;
|
|
205 |
System.out.println("ERROR: Could not create data document with id " +
|
|
206 |
dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
|
|
207 |
}
|
|
208 |
finally
|
|
209 |
{
|
|
210 |
if(error)
|
|
211 |
{
|
|
212 |
printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() +
|
|
213 |
"FAILED.");
|
|
214 |
}
|
|
215 |
else
|
|
216 |
{
|
|
217 |
printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
|
|
218 |
" which is described by " + sysmeta.getIdentifier().getValue());
|
|
219 |
}
|
|
220 |
}
|
|
221 |
}
|
|
222 |
else
|
|
223 |
{
|
|
224 |
System.out.println("WARNING: Could not process describes url " +
|
|
225 |
dataDocUrl + " for document " + doc.docid +
|
|
226 |
". Only ecogrid://knb/ urls are currently supported.");
|
|
227 |
}
|
|
228 |
}
|
|
229 |
|
146 |
230 |
try
|
147 |
231 |
{
|
148 |
|
Identifier id = d1.create(authtoken, sysmeta.getIdentifier(),
|
|
232 |
Identifier id = mn.create(authtoken, sysmeta.getIdentifier(),
|
149 |
233 |
IOUtils.toInputStream(doc.doctext), sysmeta);
|
150 |
234 |
System.out.println("Success inserting document " + id.getValue());
|
|
235 |
|
151 |
236 |
}
|
152 |
237 |
catch(Exception e)
|
153 |
238 |
{
|
|
239 |
e.printStackTrace();
|
154 |
240 |
System.out.println("Could not create document with id " +
|
155 |
241 |
sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
|
|
242 |
|
156 |
243 |
}
|
157 |
244 |
finally
|
158 |
245 |
{
|
... | ... | |
163 |
250 |
logout();
|
164 |
251 |
}
|
165 |
252 |
|
166 |
|
private void printHeader(String s)
|
167 |
|
{
|
168 |
|
System.out.println("****** " + s + " *******");
|
169 |
|
}
|
170 |
|
|
171 |
253 |
/**
|
172 |
|
* produce an md5 checksum for item
|
|
254 |
* create the documents listed by an eml document as described in the
|
|
255 |
* new system
|
|
256 |
* @param doc
|
|
257 |
* @param emld
|
173 |
258 |
*/
|
174 |
|
private String checksum(InputStream is)
|
175 |
|
throws Exception
|
176 |
|
{
|
177 |
|
byte[] buffer = new byte[1024];
|
178 |
|
MessageDigest complete = MessageDigest.getInstance("MD5");
|
179 |
|
int numRead;
|
|
259 |
private void createDescribedDocuments(Document doc, EMLDocument emld)
|
|
260 |
{
|
180 |
261 |
|
181 |
|
do
|
182 |
|
{
|
183 |
|
numRead = is.read(buffer);
|
184 |
|
if (numRead > 0)
|
185 |
|
{
|
186 |
|
complete.update(buffer, 0, numRead);
|
187 |
|
}
|
188 |
|
} while (numRead != -1);
|
189 |
|
|
190 |
|
|
191 |
|
return getHex(complete.digest());
|
192 |
262 |
}
|
193 |
263 |
|
194 |
264 |
/**
|
195 |
|
* convert a byte array to a hex string
|
196 |
|
*/
|
197 |
|
private static String getHex( byte [] raw )
|
198 |
|
{
|
199 |
|
final String HEXES = "0123456789ABCDEF";
|
200 |
|
if ( raw == null ) {
|
201 |
|
return null;
|
202 |
|
}
|
203 |
|
final StringBuilder hex = new StringBuilder( 2 * raw.length );
|
204 |
|
for ( final byte b : raw ) {
|
205 |
|
hex.append(HEXES.charAt((b & 0xF0) >> 4))
|
206 |
|
.append(HEXES.charAt((b & 0x0F)));
|
207 |
|
}
|
208 |
|
return hex.toString();
|
209 |
|
}
|
210 |
|
|
211 |
|
/**
|
212 |
265 |
* @param doc
|
213 |
266 |
* @return
|
214 |
267 |
*/
|
... | ... | |
231 |
284 |
}
|
232 |
285 |
else
|
233 |
286 |
{
|
234 |
|
format = ObjectFormat.convert("text/plain");
|
|
287 |
format = ObjectFormat.TEXT_PLAIN;
|
235 |
288 |
}
|
236 |
289 |
}
|
237 |
290 |
sm.setObjectFormat(format);
|
... | ... | |
269 |
322 |
sm.setDateSysMetadataModified(dateUpdated);
|
270 |
323 |
}
|
271 |
324 |
NodeReference nr = new NodeReference();
|
272 |
|
nr.setValue(sourceUrl);
|
|
325 |
nr.setValue("KNB");
|
273 |
326 |
sm.setOriginMemberNode(nr);
|
274 |
327 |
sm.setAuthoritativeMemberNode(nr);
|
|
328 |
|
275 |
329 |
return sm;
|
276 |
330 |
}
|
277 |
331 |
|
|
332 |
private void printHeader(String s)
|
|
333 |
{
|
|
334 |
System.out.println("****** " + s + " *******");
|
|
335 |
}
|
|
336 |
|
278 |
337 |
/**
|
|
338 |
* produce an md5 checksum for item
|
|
339 |
*/
|
|
340 |
private String checksum(InputStream is)
|
|
341 |
throws Exception
|
|
342 |
{
|
|
343 |
byte[] buffer = new byte[1024];
|
|
344 |
MessageDigest complete = MessageDigest.getInstance("MD5");
|
|
345 |
int numRead;
|
|
346 |
|
|
347 |
do
|
|
348 |
{
|
|
349 |
numRead = is.read(buffer);
|
|
350 |
if (numRead > 0)
|
|
351 |
{
|
|
352 |
complete.update(buffer, 0, numRead);
|
|
353 |
}
|
|
354 |
} while (numRead != -1);
|
|
355 |
|
|
356 |
|
|
357 |
return getHex(complete.digest());
|
|
358 |
}
|
|
359 |
|
|
360 |
/**
|
|
361 |
* convert a byte array to a hex string
|
|
362 |
*/
|
|
363 |
private static String getHex( byte [] raw )
|
|
364 |
{
|
|
365 |
final String HEXES = "0123456789ABCDEF";
|
|
366 |
if ( raw == null ) {
|
|
367 |
return null;
|
|
368 |
}
|
|
369 |
final StringBuilder hex = new StringBuilder( 2 * raw.length );
|
|
370 |
for ( final byte b : raw ) {
|
|
371 |
hex.append(HEXES.charAt((b & 0xF0) >> 4))
|
|
372 |
.append(HEXES.charAt((b & 0x0F)));
|
|
373 |
}
|
|
374 |
return hex.toString();
|
|
375 |
}
|
|
376 |
|
|
377 |
/**
|
279 |
378 |
* parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
|
280 |
379 |
* a proper date object
|
281 |
380 |
* @param date
|
updated the metacatpopulator to use the d1 eml parser to create system metadata in a smarter fashion.