Revision 3665
Added by Jing Tao about 17 years ago
test/edu/ucsb/nceas/metacattest/UploadIPCCDataTest.java | ||
---|---|---|
45 | 45 |
|
46 | 46 |
import java.io.*; |
47 | 47 |
import java.net.*; |
48 |
import java.text.SimpleDateFormat; |
|
48 | 49 |
import java.util.*; |
49 | 50 |
|
50 | 51 |
|
... | ... | |
58 | 59 |
* 2. A list of IPCC docid(with revision number) text file. If the text file is not available, it need |
59 | 60 |
* a metacat query file to search metacat to get the doicd list. |
60 | 61 |
* What the class will do: |
61 |
* 1. After getting a eml docid form the docid list, it will read the eml from Metacat. |
|
62 |
* 2. DOM parser will get online URL information from eml document. |
|
63 |
* 3. It will generate docid for data file and modify the URL in eml base on the generated docid. |
|
64 |
* 4. Base on old URL information, this program will find the data file in the direcotry which contains |
|
65 |
* the srb data file, then upload the download srb data file to Metacat with assigned docid. |
|
66 |
* 5. Update eml document with the new URL information (pointing to knb). |
|
62 |
* 1. It will read the eml from Metacat. |
|
63 |
* 2. Get online URL information from eml document by DOM parser. |
|
64 |
* 3. Base on the URL information, this program will find the data file in |
|
65 |
* the direcotry which contains the srb data file. |
|
66 |
* 4. It will generate docid for the data file. |
|
67 |
* 5. Upload the download srb data file to Metacat with assigned docid. |
|
68 |
* 6. Modify the eml document with the new URL information (pointing to |
|
69 |
* knb) and new version number in eml |
|
70 |
* 7. Update it to a new version in Metacat. |
|
71 |
* 8 . Go through above 7 steps for every eml document in the list. |
|
67 | 72 |
* |
68 | 73 |
*/ |
69 | 74 |
public class UploadIPCCDataTest extends TestCase |
70 | 75 |
{ |
71 | 76 |
|
72 |
private static final Log log = LogFactory.getLog("edu.ucsb.nceas.metacattest.UploadIPCCDataTest"); |
|
77 |
|
|
73 | 78 |
/* Initialize Options*/ |
74 | 79 |
static |
75 | 80 |
{ |
... | ... | |
91 | 96 |
private static String METACATURL = "http://chico.dyndns.org/knb/metacat"; |
92 | 97 |
private static String USERNAME = "uid=tao,o=NCEAS,dc=ecoinformatics,dc=org"; |
93 | 98 |
private static String PASSWORD = "password"; |
94 |
private static String TABLEONLINEURL= "/eml/dataset/dataTable/physical/distribution/online/url"; |
|
95 |
private static String SPATIALONLINEURL = "/eml/dataset/spatialRaster/physical/distribution/online/url"; |
|
99 |
private static String TABLEONLINEURL= "/eml:eml/dataset/dataTable/physical/distribution/online/url"; |
|
100 |
private static String SPATIALONLINEURL = "/eml:eml/dataset/spatialRaster/physical/distribution/online/url"; |
|
101 |
private static String PACKAGEID ="/eml:eml/@packageId"; |
|
96 | 102 |
private static String SRB = "srb://"; |
103 |
private static String KNB = "ecogrid://knb/"; |
|
104 |
private static String DATAIDPREFIX = "IPCC"; |
|
105 |
private static String DOT = "."; |
|
106 |
private static String SUCCESSLOG = "update.log"; |
|
107 |
private static String ERRORLOG = "error.log"; |
|
108 |
private File log = new File(SUCCESSLOG); |
|
109 |
private File error = new File (ERRORLOG); |
|
97 | 110 |
|
111 |
|
|
98 | 112 |
/** |
99 | 113 |
* Constructor to build the test |
100 | 114 |
* |
... | ... | |
103 | 117 |
public UploadIPCCDataTest(String name) |
104 | 118 |
{ |
105 | 119 |
super(name); |
120 |
|
|
106 | 121 |
} |
107 | 122 |
|
108 | 123 |
|
... | ... | |
132 | 147 |
// 3. Base on the URL information, this program will find the data file in |
133 | 148 |
// the direcotry which contains the srb data file. |
134 | 149 |
// 4. It will generate docid for the data file |
135 |
// 5. Modify the eml document with the new URL information (pointing to |
|
136 |
// knb) and new version number in eml, then update it to a new version in Metacat. |
|
137 |
//6. At last upload the download srb data file to Metacat with assigned docid. |
|
150 |
// 5. At last upload the download srb data file to Metacat with assigned docid. |
|
151 |
// 6. Modify the eml document with the new URL information (pointing to |
|
152 |
// knb) and new version number in eml. |
|
153 |
// 7.Update it to a new version in Metacat. |
|
154 |
|
|
138 | 155 |
if (list != null && !list.isEmpty()) |
139 | 156 |
{ |
140 | 157 |
int size = list.size(); |
... | ... | |
144 | 161 |
try |
145 | 162 |
{ |
146 | 163 |
docid = (String)list.elementAt(i); |
147 |
handleSingleEML(docid); |
|
164 |
String dataId = handleSingleEML(docid); |
|
165 |
String message = "Successfully update eml "+docid + " with data id "+dataId; |
|
166 |
writeLog(log, message); |
|
148 | 167 |
} |
149 | 168 |
catch(Exception e) |
150 | 169 |
{ |
151 | 170 |
System.err.println("Failed to handle eml document "+docid + " since "+ |
152 | 171 |
e.getMessage()); |
172 |
String message = "failed to update eml "+docid + "\n "+e.getMessage(); |
|
173 |
writeLog(error, message); |
|
153 | 174 |
} |
154 | 175 |
} |
155 | 176 |
} |
... | ... | |
159 | 180 |
} |
160 | 181 |
|
161 | 182 |
} |
183 |
|
|
162 | 184 |
/* |
163 | 185 |
* Does actually job to upload data file and modify eml document for a given id. |
164 |
* Here are its tasts:
|
|
186 |
* Here are its tasks:
|
|
165 | 187 |
* 1. It will read the eml from Metacat. |
166 | 188 |
* 2. Get online URL information from eml document by DOM parser. |
167 | 189 |
* 3. Base on the URL information, this program will find the data file in |
168 | 190 |
* the direcotry which contains the srb data file. |
169 | 191 |
* 4. It will generate docid for the data file. |
170 |
* 5. Modify the eml document with the new URL information (pointing to |
|
171 |
* knb) and new version number in eml, then update it to a new version in Metacat. |
|
172 |
* 6. At last upload the download srb data file to Metacat with assigned docid. |
|
192 |
* 5. Upload the download srb data file to Metacat with assigned docid. |
|
193 |
* 6. Modify the eml document with the new URL information (pointing to |
|
194 |
* knb) and new version number in eml |
|
195 |
* 7. Update it to a new version in Metacat. |
|
196 |
* |
|
173 | 197 |
*/ |
174 |
private void handleSingleEML(String docid) throws Exception
|
|
198 |
private String handleSingleEML(String docid) throws Exception
|
|
175 | 199 |
{ |
176 | 200 |
Metacat metacat = MetacatFactory.createMetacatConnection(METACATURL); |
177 | 201 |
// login metacat |
178 |
String loginResponse = metacat.login(USERNAME, PASSWORD);
|
|
179 |
if (loginResponse.indexOf("<login>") == -1)
|
|
202 |
String response = metacat.login(USERNAME, PASSWORD);
|
|
203 |
if (response.indexOf("<login>") == -1)
|
|
180 | 204 |
{ |
181 |
throw new Exception("login failed "+loginResponse);
|
|
205 |
throw new Exception("login failed "+response);
|
|
182 | 206 |
} |
183 |
// Reads eml document from metacat |
|
207 |
// 1. Reads eml document from metacat
|
|
184 | 208 |
Reader r = metacat.read(docid); |
185 | 209 |
Document DOMdoc = XMLUtilities.getXMLReaderAsDOMDocument(r); |
186 |
// Gets online url information. If onlineUrl is not SRB, through an exception |
|
187 |
String onlineUrl = getOnLineURL(DOMdoc); |
|
188 |
// Find the srb data file name |
|
210 |
Node rootNode = (Node)DOMdoc.getDocumentElement(); |
|
189 | 211 |
|
190 |
// Generate docid for data file |
|
191 |
//String dataId = generateId(); |
|
192 |
// Updates eml online url and package id |
|
193 |
//updateEMLDoc(); |
|
194 |
// update EML document in metacat |
|
212 |
//2. Gets online url information. If onlineUrl is not SRB, through an exception |
|
213 |
String onlineUrl = getOnLineURL(rootNode); |
|
214 |
//System.out.println("=================The url is "+onlineUrl); |
|
195 | 215 |
|
196 |
// upload data file to Metacat |
|
216 |
//3. Find the srb data file |
|
217 |
String dataFileName = getDataFileNameFromURL(onlineUrl); |
|
218 |
//System.out.println("=================The data file is "+dataFileName); |
|
219 |
File dataFile = null; |
|
220 |
dataFile = new File(SRBDATAFILEDIR,dataFileName); |
|
221 |
if (!dataFile.exists()) |
|
222 |
{ |
|
223 |
throw new Exception("Couldn't find the data file in srb data directory "+dataFile); |
|
224 |
} |
|
225 |
|
|
226 |
//4. Generate docid for data file |
|
227 |
String dataId = generateId(); |
|
228 |
//System.out.println("=======The docid for data file will be "+dataId); |
|
197 | 229 |
|
230 |
//5. upload data file to Metacat |
|
231 |
response = metacat.upload(dataId, dataFile); |
|
232 |
if (response.indexOf("<success>") == -1) |
|
233 |
{ |
|
234 |
throw new Exception("Couldn't upload data file "+dataFileName + |
|
235 |
" with id "+dataId+ " into Metacat since "+response); |
|
236 |
} |
|
237 |
|
|
238 |
//6. Updates eml online url and package id in DOM |
|
239 |
String newId = updateEMLDoc(rootNode, docid, dataId); |
|
240 |
//System.out.println("The new docid is ========"+newId); |
|
241 |
|
|
242 |
//Put EML DOM with the new packagId and oneline url into a StringWriter and store it to String |
|
243 |
StringWriter stringWriter = new StringWriter(); |
|
244 |
PrintWriter printWriter = new PrintWriter(stringWriter); |
|
245 |
XMLUtilities.print(rootNode, printWriter); |
|
246 |
String xml = stringWriter.toString(); |
|
247 |
//System.out.println("the xml is "+xml); |
|
248 |
|
|
249 |
//7.insert new (update) EML document into Metacat |
|
250 |
StringReader xmlReader = new StringReader(xml); |
|
251 |
response = metacat.update(newId, xmlReader, null); |
|
252 |
if (response.indexOf("<success>") == -1) |
|
253 |
{ |
|
254 |
throw new Exception("Upload data file "+dataFileName + |
|
255 |
" with id "+dataId+ " successfully but update eml "+newId +" failed since "+ response); |
|
256 |
} |
|
198 | 257 |
metacat.logout(); |
258 |
return dataId; |
|
199 | 259 |
} |
200 | 260 |
|
201 | 261 |
/* |
... | ... | |
203 | 263 |
* The online url xpath can be "/eml/dataset/dataTable/physical/distribution/online/url" |
204 | 264 |
* or "/eml/dataset/spatialRaster/physical/distribution/online/url" |
205 | 265 |
*/ |
206 |
private String getOnLineURL(Document doc) throws Exception
|
|
266 |
private String getOnLineURL(Node root) throws Exception
|
|
207 | 267 |
{ |
208 | 268 |
String url = null; |
209 |
if (doc == null) |
|
210 |
{ |
|
211 |
throw new Exception("DOM document for this EML is null and couldn't get online url from it"); |
|
212 |
} |
|
213 |
Node root = (Node)doc.getDocumentElement(); |
|
214 | 269 |
if (root == null) |
215 | 270 |
{ |
216 | 271 |
throw new Exception("root node for this EML is null and couldn't get online url from it"); |
... | ... | |
234 | 289 |
} |
235 | 290 |
return url; |
236 | 291 |
} |
292 |
|
|
293 |
/* |
|
294 |
* Automatically to generate a unique id for ddata file. |
|
295 |
* This id will be looked like - DATAIDPREFIX.numberBaseonTime.1, e.g |
|
296 |
* IPCC.20072321.1 |
|
297 |
*/ |
|
298 |
private String generateId() |
|
299 |
{ |
|
300 |
int version = 1; |
|
301 |
StringBuffer docid = new StringBuffer(DATAIDPREFIX); |
|
302 |
docid.append(DOT); |
|
303 |
|
|
304 |
// Create a calendar to get the date formatted properly |
|
305 |
String[] ids = TimeZone.getAvailableIDs(-8 * 60 * 60 * 1000); |
|
306 |
SimpleTimeZone pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, ids[0]); |
|
307 |
pdt.setStartRule(Calendar.APRIL, 1, Calendar.SUNDAY, 2*60*60*1000); |
|
308 |
pdt.setEndRule(Calendar.OCTOBER, -1, Calendar.SUNDAY, 2*60*60*1000); |
|
309 |
Calendar calendar = new GregorianCalendar(pdt); |
|
310 |
Date trialTime = new Date(); |
|
311 |
calendar.setTime(trialTime); |
|
312 |
|
|
313 |
int time = 0; |
|
314 |
|
|
315 |
docid.append(calendar.get(Calendar.YEAR)); |
|
316 |
|
|
317 |
time = calendar.get(Calendar.DAY_OF_YEAR); |
|
318 |
if(time < 10){ |
|
319 |
docid.append("0"); |
|
320 |
docid.append("0"); |
|
321 |
docid.append(time); |
|
322 |
} else if(time < 100) { |
|
323 |
docid.append("0"); |
|
324 |
docid.append(time); |
|
325 |
} else { |
|
326 |
docid.append(time); |
|
327 |
} |
|
328 |
|
|
329 |
time = calendar.get(Calendar.HOUR_OF_DAY); |
|
330 |
if(time < 10){ |
|
331 |
docid.append("0"); |
|
332 |
docid.append(time); |
|
333 |
} else { |
|
334 |
docid.append(time); |
|
335 |
} |
|
336 |
|
|
337 |
time = calendar.get(Calendar.MINUTE); |
|
338 |
if(time < 10){ |
|
339 |
docid.append("0"); |
|
340 |
docid.append(time); |
|
341 |
} else { |
|
342 |
docid.append(time); |
|
343 |
} |
|
344 |
|
|
345 |
time = calendar.get(Calendar.SECOND); |
|
346 |
if(time < 10){ |
|
347 |
docid.append("0"); |
|
348 |
docid.append(time); |
|
349 |
} else { |
|
350 |
docid.append(time); |
|
351 |
} |
|
352 |
//sometimes this number is not unique, so we append a random number |
|
353 |
int random = (new Double(Math.random()*100)).intValue(); |
|
354 |
docid.append(random); |
|
355 |
docid.append(DOT); |
|
356 |
docid.append(version); |
|
357 |
|
|
358 |
return docid.toString(); |
|
359 |
|
|
360 |
} |
|
361 |
/* |
|
362 |
* Get data file name from online url. SRB oneline url will looks like - |
|
363 |
* srb://seek:/home/beam.seek/IPCC_climate/Present/ccld6190.dat. |
|
364 |
* The last part - ccld6190.dat is the file name. This method will get |
|
365 |
* the file name from the give url |
|
366 |
*/ |
|
367 |
private String getDataFileNameFromURL(String onlineUrl) throws Exception |
|
368 |
{ |
|
369 |
String dataFile = null; |
|
370 |
String slash = "/"; |
|
371 |
if (onlineUrl != null) |
|
372 |
{ |
|
373 |
int index = onlineUrl.lastIndexOf(slash); |
|
374 |
try |
|
375 |
{ |
|
376 |
dataFile = onlineUrl.substring(index+1); |
|
377 |
} |
|
378 |
catch(Exception e) |
|
379 |
{ |
|
380 |
throw new Exception("Couldn't get data file name from the given url "+onlineUrl+ |
|
381 |
" since "+e.getMessage()); |
|
382 |
} |
|
383 |
} |
|
384 |
return dataFile; |
|
385 |
} |
|
386 |
|
|
387 |
|
|
237 | 388 |
|
238 | 389 |
/* |
239 | 390 |
* Gets eml document list from text file. The text file format should be: |
... | ... | |
261 | 412 |
} |
262 | 413 |
|
263 | 414 |
/* |
415 |
* Update the given eml document (in DOM). There are two places to be updated |
|
416 |
* The package id will be increased 1, i.e., from 1 to 2. The distribution online url will |
|
417 |
* point to the new ecogrid id, i.e. , ecogrid://knb/IPCC.2007.1 |
|
418 |
*/ |
|
419 |
private String updateEMLDoc(Node root, String docid, String dataId) throws Exception |
|
420 |
{ |
|
421 |
// update package id |
|
422 |
docid = getIncreasedNewDocid(docid); |
|
423 |
XMLUtilities.addAttributeNodeToDOMTree( root, PACKAGEID, docid); |
|
424 |
// update online url. oneline url should either in spatialRaster or dataTable. |
|
425 |
// First try to see if spatialRaster exist or not. If not try data table |
|
426 |
String newUrl = KNB+dataId; //new url looks like ecogrid://knb/IPCC.2007.1 |
|
427 |
boolean isSpatialRaster = true; |
|
428 |
boolean isDataTable = false; |
|
429 |
Node urlNode = XMLUtilities.getTextNodeWithXPath(root, SPATIALONLINEURL); |
|
430 |
if (urlNode == null) |
|
431 |
{ |
|
432 |
// has no spatialRaster |
|
433 |
isSpatialRaster = false; |
|
434 |
} |
|
435 |
else |
|
436 |
{ |
|
437 |
// has spatialRaster |
|
438 |
isSpatialRaster = true; |
|
439 |
} |
|
440 |
// determin if has datable or not |
|
441 |
urlNode = XMLUtilities.getTextNodeWithXPath(root,TABLEONLINEURL); |
|
442 |
if (urlNode != null) |
|
443 |
{ |
|
444 |
isDataTable = true; |
|
445 |
} |
|
446 |
|
|
447 |
if (isSpatialRaster && !isDataTable) |
|
448 |
{ |
|
449 |
//only has spatialRaster and no dataTable, update spatialRaster online url |
|
450 |
XMLUtilities.addTextNodeToDOMTree(root, SPATIALONLINEURL, newUrl); |
|
451 |
} |
|
452 |
else if (!isSpatialRaster && isDataTable) |
|
453 |
{ |
|
454 |
//only has dataTable and no spatialRaster, update dataTable online url |
|
455 |
XMLUtilities.addTextNodeToDOMTree(root, TABLEONLINEURL, newUrl); |
|
456 |
} |
|
457 |
else |
|
458 |
{ |
|
459 |
//some strange things happen |
|
460 |
throw new Exception("The eml either has both dataTable or spatialRaster OR doesn't has any entity"); |
|
461 |
} |
|
462 |
return docid; |
|
463 |
} |
|
464 |
|
|
465 |
/* |
|
466 |
* Gets new docid with increased version. Docid looks like tao.1.1. The new docid will be |
|
467 |
* tao.1.2. |
|
468 |
*/ |
|
469 |
private String getIncreasedNewDocid(String docid) throws Exception |
|
470 |
{ |
|
471 |
int rev = 1; |
|
472 |
String revision = null; |
|
473 |
String prefix = null; |
|
474 |
String newId = null; |
|
475 |
if (docid != null) |
|
476 |
{ |
|
477 |
int index = docid.lastIndexOf(DOT); |
|
478 |
try |
|
479 |
{ |
|
480 |
// Get revsion part(1) |
|
481 |
revision = docid.substring(index+1); |
|
482 |
// Get prefix part (tao.1.) |
|
483 |
prefix = docid.substring(0, index+1); |
|
484 |
// increase version from 1 to 2 |
|
485 |
rev = (new Integer(revision)).intValue(); |
|
486 |
rev++; |
|
487 |
// combines the prefix tao.1. and new revision2 to get tao.1.2 |
|
488 |
newId= prefix+rev; |
|
489 |
|
|
490 |
} |
|
491 |
catch(Exception e) |
|
492 |
{ |
|
493 |
throw new Exception("Couldn't increase revsion number from the given docid "+docid+ |
|
494 |
" since "+e.getMessage()); |
|
495 |
|
|
496 |
} |
|
497 |
} |
|
498 |
return newId; |
|
499 |
} |
|
500 |
|
|
501 |
/* |
|
264 | 502 |
* Gets eml document list from searching Metacat |
265 | 503 |
* TO-DO: This method need to be implemented |
266 | 504 |
*/ |
... | ... | |
299 | 537 |
} |
300 | 538 |
return list; |
301 | 539 |
} |
302 |
|
|
540 |
|
|
541 |
/* |
|
542 |
* Writes error message into log file. |
|
543 |
*/ |
|
544 |
private void writeLog(File file, String message) |
|
545 |
{ |
|
546 |
try |
|
547 |
{ |
|
548 |
FileOutputStream fos = new FileOutputStream(file, true); |
|
549 |
PrintWriter pw = new PrintWriter(fos); |
|
550 |
SimpleDateFormat formatter = new SimpleDateFormat ("yy-MM-dd HH:mm:ss"); |
|
551 |
java.util.Date localtime = new java.util.Date(); |
|
552 |
String dateString = formatter.format(localtime); |
|
553 |
dateString += " :: " + message; |
|
554 |
//time stamp each entry |
|
555 |
pw.println(dateString); |
|
556 |
pw.flush(); |
|
557 |
pw.close(); |
|
558 |
fos.close(); |
|
559 |
} |
|
560 |
catch(Exception e) |
|
561 |
{ |
|
562 |
System.out.println("error writing to replication log from " + |
|
563 |
"MetacatReplication.replLog: " + e.getMessage()); |
|
564 |
//e.printStackTrace(System.out); |
|
565 |
} |
|
566 |
} |
|
303 | 567 |
} |
Also available in: Unified diff
Fixed the implement of java class which will upload ipcc data.