Project

General

Profile

« Previous | Next » 

Revision 3665

Added by Jing Tao almost 17 years ago

Fixed the implement of java class which will upload ipcc data.

View differences:

test/edu/ucsb/nceas/metacattest/UploadIPCCDataTest.java
45 45

  
46 46
import java.io.*;
47 47
import java.net.*;
48
import java.text.SimpleDateFormat;
48 49
import java.util.*;
49 50

  
50 51

  
......
58 59
 * 2. A list of IPCC docid(with revision number) text file. If the text file is not available, it need
59 60
 * a metacat query file to search metacat to get the doicd list.
60 61
 * What the class will do:
61
 * 1. After getting a eml docid form the docid list, it will read the eml from Metacat.
62
 * 2. DOM parser will get online URL information from eml document.
63
 * 3. It will generate docid for data file and modify the URL in eml base on the generated docid.
64
 * 4. Base on old URL information, this program will find the data file in the direcotry which contains
65
 * the srb data file, then upload the download srb data file to Metacat with assigned docid.
66
 *  5. Update eml document with the new URL information (pointing to knb).
62
 * 1. It will read the eml from Metacat.
63
 * 2. Get online URL information from eml document by DOM parser.
64
 * 3. Base on the URL information, this program will find the data file in
65
 *     the direcotry which contains the srb data file.
66
* 4. It will generate docid for the data file.
67
* 5. Upload the download srb data file to Metacat with assigned docid.
68
* 6. Modify the eml document with the new URL information (pointing to
69
 *     knb) and new version number in eml
70
 * 7. Update it to a new version in Metacat.
71
 * 8 . Go through above 7 steps for every eml document in the list.
67 72
 * 
68 73
 */ 
69 74
public class UploadIPCCDataTest extends TestCase
70 75
{
71 76
  
72
	  private static final Log log = LogFactory.getLog("edu.ucsb.nceas.metacattest.UploadIPCCDataTest");
77
	 
73 78
	  /* Initialize Options*/
74 79
	  static
75 80
	  {
......
91 96
	  private static String METACATURL      = "http://chico.dyndns.org/knb/metacat";
92 97
	  private static String USERNAME          = "uid=tao,o=NCEAS,dc=ecoinformatics,dc=org";
93 98
	  private static String PASSWORD           = "password";
94
	  private static String TABLEONLINEURL= "/eml/dataset/dataTable/physical/distribution/online/url";
95
	  private static String SPATIALONLINEURL = "/eml/dataset/spatialRaster/physical/distribution/online/url";
99
	  private static String TABLEONLINEURL= "/eml:eml/dataset/dataTable/physical/distribution/online/url";
100
	  private static String SPATIALONLINEURL = "/eml:eml/dataset/spatialRaster/physical/distribution/online/url";
101
	  private static String PACKAGEID               ="/eml:eml/@packageId";
96 102
	  private static String SRB                           = "srb://";
103
	  private static String KNB                           = "ecogrid://knb/";
104
	  private static String DATAIDPREFIX          = "IPCC";
105
	  private static String DOT                           = ".";
106
	  private static String SUCCESSLOG             = "update.log";
107
	  private static String ERRORLOG                = "error.log";
108
	  private File log = new File(SUCCESSLOG);
109
	  private File error = new File (ERRORLOG);
97 110
	  
111
	  
98 112
	  /**
99 113
	   * Constructor to build the test
100 114
	   *
......
103 117
	  public UploadIPCCDataTest(String name)
104 118
	  {
105 119
	    super(name);
120
	    
106 121
	  }
107 122

  
108 123

  
......
132 147
			  // 3. Base on the URL information, this program will find the data file in
133 148
			  // the direcotry which contains the srb data file.
134 149
			  // 4. It will generate docid for the data file
135
			  // 5. Modify the eml document with the new URL information (pointing to
136
			  // knb) and new version number in eml,  then update it to a new version in Metacat.
137
			  //6. At last upload the download srb data file to Metacat with assigned docid.
150
              // 5. At last upload the download srb data file to Metacat with assigned docid.
151
			  // 6. Modify the eml document with the new URL information (pointing to
152
			  // knb) and new version number in eml.
153
			  // 7.Update it to a new version in Metacat.
154
			  
138 155
              if (list != null && !list.isEmpty())
139 156
              {
140 157
            	   int size = list.size();
......
144 161
            		   try
145 162
            		   {
146 163
            			   docid = (String)list.elementAt(i);
147
            			   handleSingleEML(docid);
164
            			   String dataId = handleSingleEML(docid);
165
            			   String message = "Successfully update eml "+docid + " with data id "+dataId;
166
            			   writeLog(log, message);
148 167
            		   }
149 168
            		   catch(Exception e)
150 169
            		   {
151 170
            			   System.err.println("Failed to handle eml document "+docid + " since "+
152 171
            					   e.getMessage());
172
            			   String message = "failed to update eml "+docid + "\n "+e.getMessage();
173
            			   writeLog(error, message);
153 174
            		   }
154 175
            	   }
155 176
              }
......
159 180
              }
160 181
		
161 182
	  }
183
	  
162 184
	  /*
163 185
	   * Does actually job to upload data file and modify eml document for a given id.
164
	   * Here are its tasts:
186
	   * Here are its tasks:
165 187
	   * 1. It will read the eml from Metacat.
166 188
	   * 2. Get online URL information from eml document by DOM parser.
167 189
	   * 3. Base on the URL information, this program will find the data file in
168 190
	   *     the direcotry which contains the srb data file.
169 191
	   * 4. It will generate docid for the data file.
170
	   * 5. Modify the eml document with the new URL information (pointing to
171
	   *     knb) and new version number in eml,  then update it to a new version in Metacat.
172
	   * 6.  At last upload the download srb data file to Metacat with assigned docid.
192
	   * 5. Upload the download srb data file to Metacat with assigned docid.
193
	   * 6. Modify the eml document with the new URL information (pointing to
194
	   *     knb) and new version number in eml
195
	   * 7. Update it to a new version in Metacat.
196
	   * 
173 197
	   */	 
174
	  private void handleSingleEML(String docid) throws Exception
198
	  private String handleSingleEML(String docid) throws Exception
175 199
	  {
176 200
		  Metacat metacat = MetacatFactory.createMetacatConnection(METACATURL);
177 201
		  // login metacat 
178
		  String loginResponse = metacat.login(USERNAME, PASSWORD);
179
		  if (loginResponse.indexOf("<login>") == -1)
202
		  String response = metacat.login(USERNAME, PASSWORD);
203
		  if (response.indexOf("<login>") == -1)
180 204
		  {
181
			  throw new Exception("login failed "+loginResponse);
205
			  throw new Exception("login failed "+response);
182 206
		  }
183
		  // Reads eml document from metacat
207
		  // 1. Reads eml document from metacat
184 208
		  Reader r = metacat.read(docid);
185 209
          Document DOMdoc = XMLUtilities.getXMLReaderAsDOMDocument(r);
186
          // Gets online url information. If onlineUrl is not SRB, through an exception
187
          String onlineUrl = getOnLineURL(DOMdoc);
188
          // Find the srb data file name
210
          Node rootNode = (Node)DOMdoc.getDocumentElement();
189 211
          
190
          // Generate docid for data file
191
          //String dataId = generateId();
192
          // Updates eml online url and package id
193
          //updateEMLDoc();
194
          // update EML document in metacat
212
          //2.  Gets online url information. If onlineUrl is not SRB, through an exception
213
          String onlineUrl = getOnLineURL(rootNode);
214
          //System.out.println("=================The url is "+onlineUrl);
195 215
          
196
          // upload data file to Metacat
216
          //3. Find the srb data file 
217
          String dataFileName = getDataFileNameFromURL(onlineUrl);
218
          //System.out.println("=================The data file is "+dataFileName);
219
          File dataFile = null;
220
          dataFile = new File(SRBDATAFILEDIR,dataFileName);
221
           if (!dataFile.exists())
222
           {
223
        	  throw new Exception("Couldn't find the data file in srb data directory "+dataFile);
224
          }
225
           
226
          //4. Generate docid for data file
227
          String dataId = generateId();
228
          //System.out.println("=======The docid for data file will be "+dataId);
197 229
          
230
          //5. upload data file to Metacat
231
          response = metacat.upload(dataId, dataFile);
232
          if (response.indexOf("<success>") == -1)
233
          {
234
        	  throw new Exception("Couldn't upload data file "+dataFileName +
235
        			  " with id "+dataId+ " into Metacat since "+response);
236
          }
237
          
238
          //6. Updates eml online url and package id in DOM
239
          String newId = updateEMLDoc(rootNode, docid, dataId);
240
          //System.out.println("The new docid is ========"+newId);
241
          
242
          //Put EML DOM with the new packagId and oneline url into a StringWriter and store it to String
243
          StringWriter stringWriter = new StringWriter();
244
          PrintWriter printWriter = new PrintWriter(stringWriter);
245
		  XMLUtilities.print(rootNode, printWriter);
246
		  String xml = stringWriter.toString();
247
		  //System.out.println("the xml is "+xml);		  
248
		  
249
		  //7.insert new (update) EML document into Metacat
250
          StringReader xmlReader = new StringReader(xml);
251
          response = metacat.update(newId, xmlReader, null);
252
          if (response.indexOf("<success>") == -1)
253
          {
254
        	  throw new Exception("Upload data file "+dataFileName +
255
        			  " with id "+dataId+ " successfully but update eml "+newId +" failed since "+ response);
256
          }
198 257
          metacat.logout();
258
          return dataId;
199 259
	  }
200 260
	  
201 261
	  /*
......
203 263
	   * The online url xpath can be "/eml/dataset/dataTable/physical/distribution/online/url"
204 264
	   * or "/eml/dataset/spatialRaster/physical/distribution/online/url"
205 265
	   */
206
	  private String getOnLineURL(Document doc) throws Exception
266
	  private String getOnLineURL(Node root) throws Exception
207 267
	  {
208 268
		  String url = null;
209
		  if (doc == null)
210
		  {
211
			  throw new Exception("DOM document for this EML is null and couldn't get online url from it");
212
		  }
213
		  Node root = (Node)doc.getDocumentElement();
214 269
		  if (root == null)
215 270
		  {
216 271
			  throw new Exception("root node for this EML is null and couldn't get online url from it");
......
234 289
		 }
235 290
		  return url;
236 291
	  }
292
	  
293
	  /*
294
	   * Automatically to generate a unique id for ddata file. 
295
	   * This id will be looked like - DATAIDPREFIX.numberBaseonTime.1, e.g
296
	   * IPCC.20072321.1
297
	   */
298
	  private String generateId()
299
	  {
300
		  int version = 1;
301
		  StringBuffer docid = new StringBuffer(DATAIDPREFIX);
302
		  docid.append(DOT);
303
				     
304
		  // Create a calendar to get the date formatted properly
305
		  String[] ids = TimeZone.getAvailableIDs(-8 * 60 * 60 * 1000);
306
		  SimpleTimeZone pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, ids[0]);
307
		  pdt.setStartRule(Calendar.APRIL, 1, Calendar.SUNDAY, 2*60*60*1000);
308
		  pdt.setEndRule(Calendar.OCTOBER, -1, Calendar.SUNDAY, 2*60*60*1000);
309
		  Calendar calendar = new GregorianCalendar(pdt);
310
		  Date trialTime = new Date();
311
		  calendar.setTime(trialTime);
312

  
313
			int time = 0; 
314
			
315
			docid.append(calendar.get(Calendar.YEAR));
316
			
317
			time = calendar.get(Calendar.DAY_OF_YEAR);
318
			if(time < 10){
319
				docid.append("0");
320
				docid.append("0");
321
				docid.append(time);
322
			} else if(time < 100) {
323
				docid.append("0");
324
				docid.append(time);
325
			} else {
326
				docid.append(time);
327
			}
328
			
329
			time = calendar.get(Calendar.HOUR_OF_DAY);
330
			if(time < 10){
331
				docid.append("0");
332
				docid.append(time);
333
			} else {
334
				docid.append(time);
335
			}
336
			
337
			time = calendar.get(Calendar.MINUTE);
338
			if(time < 10){
339
				docid.append("0");
340
				docid.append(time);
341
			} else {
342
				docid.append(time);
343
			}
344
			
345
			time = calendar.get(Calendar.SECOND);
346
			if(time < 10){
347
				docid.append("0");
348
				docid.append(time);
349
			} else {
350
				docid.append(time);
351
			}		    
352
			 //sometimes this number is not unique, so we append a random number
353
			int random = (new Double(Math.random()*100)).intValue();
354
			docid.append(random);
355
			docid.append(DOT);
356
			docid.append(version);
357
			
358
			return docid.toString();
359
		 
360
	  }
361
	  /*
362
	   * Get data file name from online url. SRB oneline url will looks like -
363
	   * srb://seek:/home/beam.seek/IPCC_climate/Present/ccld6190.dat.
364
	   * The last part - ccld6190.dat is the file name. This method will get
365
	   * the file name from the give url
366
	   */
367
	  private String getDataFileNameFromURL(String onlineUrl) throws Exception
368
	  {
369
		  String dataFile = null;
370
		  String slash = "/";
371
		  if (onlineUrl != null)
372
		  {
373
			  int index = onlineUrl.lastIndexOf(slash);
374
			  try
375
			  {
376
			     dataFile = onlineUrl.substring(index+1);
377
			  }
378
			  catch(Exception e)
379
			  {
380
				  throw new Exception("Couldn't get data file name from the given url "+onlineUrl+
381
						  " since "+e.getMessage());
382
			  }
383
		  }
384
		  return dataFile;
385
	  }
386
	  
387
	  
237 388
     
238 389
      /*
239 390
       * Gets eml document list from text file. The text file format should be:
......
261 412
	  }
262 413
	  
263 414
	  /*
415
	   * Update the given eml document (in DOM). There are two places to be updated 
416
	   * The package id will be increased 1, i.e.,  from 1 to 2. The distribution online url will
417
	   * point to the new ecogrid id, i.e. , ecogrid://knb/IPCC.2007.1 
418
	   */
419
	  private String updateEMLDoc(Node root, String docid, String dataId) throws Exception 
420
	  {
421
		  // update package id
422
		  docid = getIncreasedNewDocid(docid);
423
		  XMLUtilities.addAttributeNodeToDOMTree( root, PACKAGEID, docid);
424
	      // update online url.  oneline url should either in spatialRaster or dataTable.
425
		  // First try to see if spatialRaster exist or not. If not try data table
426
		  String newUrl = KNB+dataId; //new url looks like ecogrid://knb/IPCC.2007.1
427
		  boolean isSpatialRaster = true;
428
		  boolean isDataTable = false;
429
		  Node urlNode = XMLUtilities.getTextNodeWithXPath(root, SPATIALONLINEURL);
430
		  if (urlNode == null)
431
		  {
432
			  // has no spatialRaster 
433
			 isSpatialRaster = false;		
434
		  }
435
		  else
436
		  {
437
			  // has spatialRaster
438
			  isSpatialRaster = true;		  
439
		  }
440
		  // determin if has datable or not
441
		  urlNode = XMLUtilities.getTextNodeWithXPath(root,TABLEONLINEURL);
442
		  if (urlNode != null)
443
		  {
444
				 isDataTable = true;
445
		  }
446
		  
447
		  if (isSpatialRaster && !isDataTable)
448
		  {
449
			 //only has spatialRaster and no dataTable, update spatialRaster online url
450
			  XMLUtilities.addTextNodeToDOMTree(root, SPATIALONLINEURL, newUrl);
451
		  }
452
		  else if (!isSpatialRaster && isDataTable)
453
		  {
454
              //only has dataTable and no spatialRaster, update dataTable online url
455
			  XMLUtilities.addTextNodeToDOMTree(root, TABLEONLINEURL, newUrl);
456
		  }
457
		  else
458
		  {
459
			  //some strange things happen
460
			  throw new Exception("The eml either has both dataTable or spatialRaster OR doesn't has any entity");
461
		  }
462
		  return docid;
463
	  }
464
	  
465
	  /*
466
	   * Gets new docid with increased version. Docid looks like tao.1.1. The new docid will be
467
	   * tao.1.2.
468
	   */
469
	  private String getIncreasedNewDocid(String docid) throws Exception
470
	  {
471
		  int rev = 1;
472
		  String revision = null;
473
		  String prefix = null;
474
		  String newId = null;
475
		  if (docid != null)
476
		  {
477
			  int index = docid.lastIndexOf(DOT);
478
			  try
479
			  {
480
				 // Get revsion part(1)
481
			     revision = docid.substring(index+1);
482
			     // Get prefix part (tao.1.)
483
			     prefix    = docid.substring(0, index+1);
484
			     // increase version from 1 to 2
485
			     rev = (new Integer(revision)).intValue();
486
			     rev++;
487
			     // combines the prefix tao.1. and new revision2 to get tao.1.2
488
			     newId= prefix+rev;
489
			     
490
			  }
491
			  catch(Exception e)
492
			  {
493
				  throw new Exception("Couldn't increase revsion number from the given docid "+docid+
494
						  " since "+e.getMessage());
495
				
496
			  }
497
		  }
498
		  return newId;
499
	  }
500
	  
501
	  /*
264 502
	   * Gets eml document list from searching Metacat
265 503
	   * TO-DO: This method need to be implemented
266 504
	   */
......
299 537
		  }
300 538
		  return list;
301 539
	  }
302

  
540
	  
541
	  /*
542
	   * Writes error message into log file.
543
	   */
544
	  private void writeLog(File file, String message)
545
	  {
546
	    try
547
	    {
548
	      FileOutputStream fos = new FileOutputStream(file, true);
549
	      PrintWriter pw = new PrintWriter(fos);
550
	      SimpleDateFormat formatter = new SimpleDateFormat ("yy-MM-dd HH:mm:ss");
551
	      java.util.Date localtime = new java.util.Date();
552
	      String dateString = formatter.format(localtime);
553
	      dateString += " :: " + message;
554
	      //time stamp each entry
555
	      pw.println(dateString);
556
	      pw.flush();
557
	      pw.close();
558
	      fos.close();
559
	    }
560
	    catch(Exception e)
561
	    {
562
	      System.out.println("error writing to replication log from " +
563
	                         "MetacatReplication.replLog: " + e.getMessage());
564
	      //e.printStackTrace(System.out);
565
	    }
566
	 }
303 567
}

Also available in: Unified diff