Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2000 Regents of the University of California and the
4
 *              National Center for Ecological Analysis and Synthesis
5
 *    Purpose: To test the ReplicationServerList class by JUnit
6
 *    Authors: Jing Tao
7
 *
8
 *   '$Author: tao $'
9
 *     '$Date: 2008-01-24 18:40:30 -0800 (Thu, 24 Jan 2008) $'
10
 * '$Revision: 3694 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacattest;
28

    
29
import edu.ucsb.nceas.metacat.*;
30
import edu.ucsb.nceas.metacat.client.Metacat;
31
import edu.ucsb.nceas.metacat.client.MetacatFactory;
32
import edu.ucsb.nceas.utilities.IOUtil;
33
import edu.ucsb.nceas.utilities.Options;
34
import edu.ucsb.nceas.utilities.XMLUtilities;
35
//import edu.ucsb.nceas.morpho.framework.*;
36
import junit.framework.Test;
37
import junit.framework.TestCase;
38
import junit.framework.TestResult;
39
import junit.framework.TestSuite;
40
import org.apache.commons.logging.Log;
41
import org.apache.commons.logging.LogFactory;
42
import org.w3c.dom.Document;
43
import org.w3c.dom.Node;
44
import org.w3c.dom.NodeList;
45

    
46
import java.io.*;
47
import java.net.*;
48
import java.text.SimpleDateFormat;
49
import java.util.*;
50

    
51

    
52
/**
53
 * This class is used to change the data file location for IPCC eml documents.
54
 * Currently IPCC eml documents point data file ti SRB server. However, the srb 
55
 * earthgrid is not very stable. We decided to change the online URL from srb to knb.
56
 * So this class will handle this case.
57
 * Before running this program, it needs:
58
 * 1. Downloaded data files from SRB
59
 * 2. A list of IPCC docid(with revision number) text file. If the text file is not available, it need
60
 * a metacat query file to search metacat to get the doicd list.
61
 * What the class will do:
62
 * 1. It will read the eml from Metacat.
63
 * 2. Get online URL information from eml document by DOM parser.
64
 * 3. Base on the URL information, this program will find the data file in
65
 *     the direcotry which contains the srb data file.
66
* 4. It will generate docid for the data file.
67
* 5. Upload the download srb data file to Metacat with assigned docid.
68
* 6. Modify the eml document with the new URL information (pointing to
69
 *     knb) and new version number in eml
70
 * 7. Update it to a new version in Metacat.
71
 * 8 . Go through above 7 steps for every eml document in the list.
72
 * 
73
 */ 
74
public class UploadIPCCDataTest extends TestCase
75
{
76
  
77
	 
78
	  /* Initialize Options*/
79
	  static
80
	  {
81
		  try
82
		  {
83
			  Options.initialize(new File("build/tests/metacat.properties"));
84
			  MetaCatUtil.pathsForIndexing 
85
			         = MetaCatUtil.getOptionList(MetaCatUtil.getOption("indexPaths"));
86
		  }
87
		  catch(Exception e)
88
		  {
89
			  System.err.println("Exception in initialize option in MetacatServletNetTest "+e.getMessage());
90
		  }
91
	  }
92
	  
93
	  /**Constants*/
94
	  private static String SRBDATAFILEDIR = "/home/tao/data-file"; // Dir for storing srb data file
95
	  private static String DOCLISTFILE       = "docidList"; // File name which stores IPCC document id
96
	  private static String METACATURL      = "http://chico.dyndns.org/knb/metacat";
97
	  private static String USERNAME          = "uid=tao,o=NCEAS,dc=ecoinformatics,dc=org";
98
	  private static String PASSWORD           = "password";
99
	  private static String TABLEONLINEURL= "/eml:eml/dataset/dataTable/physical/distribution/online/url";
100
	  private static String SPATIALONLINEURL = "/eml:eml/dataset/spatialRaster/physical/distribution/online/url";
101
	  private static String PACKAGEID               ="/eml:eml/@packageId";
102
	  private static String SRB                           = "srb://";
103
	  private static String KNB                           = "ecogrid://knb/";
104
	  private static String DATAIDPREFIX          = "IPCC";
105
	  private static String DOT                           = ".";
106
	  private static String SUCCESSLOG             = "update.log";
107
	  private static String ERRORLOG                = "error.log";
108
	  private static String CURRENT_CORRECTFILENAME = "correct_filename.csv";
109
	  private File log = new File(SUCCESSLOG);
110
	  private File error = new File (ERRORLOG);
111
	  
112
	  
113
	  /**
114
	   * Constructor to build the test
115
	   *
116
	   * @param name the name of the test method
117
	   */
118
	  public UploadIPCCDataTest(String name)
119
	  {
120
	    super(name);
121
	    
122
	  }
123

    
124

    
125
	  /**
126
	   * Create a suite of tests to be run together
127
	   */
128
	  public static Test suite()
129
	  {
130
		   TestSuite suite = new TestSuite();
131
		   //suite.addTest(new UploadIPCCDataTest("upload"));
132
		   //suite.addTest(new UploadIPCCDataTest("getCurrent_CorrectFileNamesPair"));
133
		    return suite;
134
	 }
135
	  
136
	  /**
137
	   * Upload the data file to Metacat and modify the eml documents
138
	   * @return
139
	   * @throws Exception
140
	   */
141
	  public void upload()
142
	  {
143
		  
144
		      // Get eml document first
145
			  Vector list = getDocumentList();
146
			  //If list is not empty, goes through every document by handleSingleEML method -
147
			  //1. It will read the eml from Metacat.
148
			  // 2. Get online URL information from eml document by DOM parser.
149
			  // 3. Base on the URL information, this program will find the data file in
150
			  // the direcotry which contains the srb data file.
151
			  // 4. It will generate docid for the data file
152
              // 5. At last upload the download srb data file to Metacat with assigned docid.
153
			  // 6. Modify the eml document with the new URL information (pointing to
154
			  // knb) and new version number in eml.
155
			  // 7.Update it to a new version in Metacat.
156
			  
157
              if (list != null && !list.isEmpty())
158
              {
159
            	   int size = list.size();
160
            	   for (int i=0; i<size; i++)
161
            	   {
162
            		   String docid = null;
163
            		   try
164
            		   {
165
            			   docid = (String)list.elementAt(i);
166
            			   String dataId = handleSingleEML(docid);
167
            			   String message = "Successfully update eml "+docid + " with data id "+dataId;
168
            			   writeLog(log, message);
169
            		   }
170
            		   catch(Exception e)
171
            		   {
172
            			   System.err.println("Failed to handle eml document "+docid + " since "+
173
            					   e.getMessage());
174
            			   String message = "failed to update eml "+docid + "\n "+e.getMessage();
175
            			   writeLog(error, message);
176
            		   }
177
            	   }
178
              }
179
              else
180
              {
181
            	  System.err.println("There is no EML document to handle");
182
              }
183
		
184
	  }
185
	  
186
	  /*
187
	   * Does actually job to upload data file and modify eml document for a given id.
188
	   * Here are its tasks:
189
	   * 1. It will read the eml from Metacat.
190
	   * 2. Get online URL information from eml document by DOM parser.
191
	   * 3. Base on the URL information, this program will find the data file in
192
	   *     the direcotry which contains the srb data file.
193
	   * 4. It will generate docid for the data file.
194
	   * 5. Upload the download srb data file to Metacat with assigned docid.
195
	   * 6. Modify the eml document with the new URL information (pointing to
196
	   *     knb) and new version number in eml
197
	   * 7. Update it to a new version in Metacat.
198
	   * 
199
	   */	 
200
	  private String handleSingleEML(String docid) throws Exception
201
	  {
202
		  Metacat metacat = MetacatFactory.createMetacatConnection(METACATURL);
203
		  // login metacat 
204
		  String response = metacat.login(USERNAME, PASSWORD);
205
		  if (response.indexOf("<login>") == -1)
206
		  {
207
			  throw new Exception("login failed "+response);
208
		  }
209
		  // 1. Reads eml document from metacat
210
		  Reader r = metacat.read(docid);
211
          Document DOMdoc = XMLUtilities.getXMLReaderAsDOMDocument(r);
212
          Node rootNode = (Node)DOMdoc.getDocumentElement();
213
          
214
          //2.  Gets online url information. If onlineUrl is not SRB, through an exception
215
          String onlineUrl = getOnLineURL(rootNode);
216
          //System.out.println("=================The url is "+onlineUrl);
217
          
218
          //3. Find the srb data file 
219
          String dataFileName = getDataFileNameFromURL(onlineUrl);
220
          //System.out.println("=================The data file is "+dataFileName);
221
          File dataFile = null;
222
          dataFile = new File(SRBDATAFILEDIR,dataFileName);
223
           if (!dataFile.exists())
224
           {
225
        	  throw new Exception("Couldn't find the data file in srb data directory "+dataFile);
226
          }
227
           
228
          //4. Generate docid for data file
229
          String dataId = generateId();
230
          //System.out.println("=======The docid for data file will be "+dataId);
231
          
232
          //5. upload data file to Metacat
233
          response = metacat.upload(dataId, dataFile);
234
          if (response.indexOf("<success>") == -1)
235
          {
236
        	  throw new Exception("Couldn't upload data file "+dataFileName +
237
        			  " with id "+dataId+ " into Metacat since "+response);
238
          }
239
          
240
          //6. Updates eml online url and package id in DOM
241
          String newId = updateEMLDoc(rootNode, docid, dataId);
242
          //System.out.println("The new docid is ========"+newId);
243
          
244
          //Put EML DOM with the new packagId and oneline url into a StringWriter and store it to String
245
          StringWriter stringWriter = new StringWriter();
246
          PrintWriter printWriter = new PrintWriter(stringWriter);
247
		  XMLUtilities.print(rootNode, printWriter);
248
		  String xml = stringWriter.toString();
249
		  //System.out.println("the xml is "+xml);		  
250
		  
251
		  //7.insert new (update) EML document into Metacat
252
          StringReader xmlReader = new StringReader(xml);
253
          response = metacat.update(newId, xmlReader, null);
254
          if (response.indexOf("<success>") == -1)
255
          {
256
        	  throw new Exception("Upload data file "+dataFileName +
257
        			  " with id "+dataId+ " successfully but update eml "+newId +" failed since "+ response);
258
          }
259
          metacat.logout();
260
          return dataId;
261
	  }
262
	  
263
	  /*
264
	   * Gets onlineUrl value from a given eml DOM document.
265
	   * The online url xpath can be "/eml/dataset/dataTable/physical/distribution/online/url"
266
	   * or "/eml/dataset/spatialRaster/physical/distribution/online/url"
267
	   */
268
	  private String getOnLineURL(Node root) throws Exception
269
	  {
270
		  String url = null;
271
		  if (root == null)
272
		  {
273
			  throw new Exception("root node for this EML is null and couldn't get online url from it");
274
		  }
275
		  Node urlNode = XMLUtilities.getTextNodeWithXPath(root, TABLEONLINEURL);
276
		  // in table online url does exist, we will try to use another xpath - SPATIALONLEURL
277
		  if (urlNode == null)
278
		  {
279
			  urlNode = XMLUtilities.getTextNodeWithXPath(root, SPATIALONLINEURL);
280
		  }
281
		  // Couldn't find any matche element, throw exception
282
		  if(urlNode == null)
283
		  {
284
			  throw new Exception("Couldn't find any onlie url information in eml document");
285
		  }
286
		  //Gets text node value and if the url doesn't contain "srb;//", it will throw a exception
287
		 url = urlNode.getNodeValue();
288
		 if (url == null || url.indexOf(SRB)== -1)
289
		 {
290
			 throw new Exception("The online url doesn't have srb protocol and we don't need to handle");
291
		 }
292
		  return url;
293
	  }
294
	  
295
	  /*
296
	   * Automatically to generate a unique id for ddata file. 
297
	   * This id will be looked like - DATAIDPREFIX.numberBaseonTime.1, e.g
298
	   * IPCC.20072321.1
299
	   */
300
	  private String generateId()
301
	  {
302
		  int version = 1;
303
		  StringBuffer docid = new StringBuffer(DATAIDPREFIX);
304
		  docid.append(DOT);
305
				     
306
		  // Create a calendar to get the date formatted properly
307
		  String[] ids = TimeZone.getAvailableIDs(-8 * 60 * 60 * 1000);
308
		  SimpleTimeZone pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, ids[0]);
309
		  pdt.setStartRule(Calendar.APRIL, 1, Calendar.SUNDAY, 2*60*60*1000);
310
		  pdt.setEndRule(Calendar.OCTOBER, -1, Calendar.SUNDAY, 2*60*60*1000);
311
		  Calendar calendar = new GregorianCalendar(pdt);
312
		  Date trialTime = new Date();
313
		  calendar.setTime(trialTime);
314

    
315
			int time = 0; 
316
			
317
			docid.append(calendar.get(Calendar.YEAR));
318
			
319
			time = calendar.get(Calendar.DAY_OF_YEAR);
320
			if(time < 10){
321
				docid.append("0");
322
				docid.append("0");
323
				docid.append(time);
324
			} else if(time < 100) {
325
				docid.append("0");
326
				docid.append(time);
327
			} else {
328
				docid.append(time);
329
			}
330
			
331
			time = calendar.get(Calendar.HOUR_OF_DAY);
332
			if(time < 10){
333
				docid.append("0");
334
				docid.append(time);
335
			} else {
336
				docid.append(time);
337
			}
338
			
339
			time = calendar.get(Calendar.MINUTE);
340
			if(time < 10){
341
				docid.append("0");
342
				docid.append(time);
343
			} else {
344
				docid.append(time);
345
			}
346
			
347
			time = calendar.get(Calendar.SECOND);
348
			if(time < 10){
349
				docid.append("0");
350
				docid.append(time);
351
			} else {
352
				docid.append(time);
353
			}		    
354
			 //sometimes this number is not unique, so we append a random number
355
			int random = (new Double(Math.random()*100)).intValue();
356
			docid.append(random);
357
			docid.append(DOT);
358
			docid.append(version);
359
			
360
			return docid.toString();
361
		 
362
	  }
363
	  /*
364
	   * Get data file name from online url. SRB oneline url will looks like -
365
	   * srb://seek:/home/beam.seek/IPCC_climate/Present/ccld6190.dat.
366
	   * The last part - ccld6190.dat is the file name. This method will get
367
	   * the file name from the give url
368
	   */
369
	  private String getDataFileNameFromURL(String onlineUrl) throws Exception
370
	  {
371
		  String dataFile = null;
372
		  String slash = "/";
373
		  if (onlineUrl != null)
374
		  {
375
			  int index = onlineUrl.lastIndexOf(slash);
376
			  try
377
			  {
378
			     dataFile = onlineUrl.substring(index+1);
379
			  }
380
			  catch(Exception e)
381
			  {
382
				  throw new Exception("Couldn't get data file name from the given url "+onlineUrl+
383
						  " since "+e.getMessage());
384
			  }
385
		  }
386
		  return dataFile;
387
	  }
388
	  
389
	  
390
     
391
      /*
392
       * Gets eml document list from text file. The text file format should be:
393
       * tao.1.1
394
       * tao.2.1
395
       */
396
	  private Vector getDocumentListFromFile() throws Exception
397
	  {
398
		  Vector docList = new Vector();
399
		  File docListFile = new File(SRBDATAFILEDIR,DOCLISTFILE);
400
		  FileReader docListFileReader= new FileReader(docListFile);
401
		  BufferedReader readDocList = new BufferedReader(docListFileReader);
402
		  // Read every line from the text file and put it into a vector
403
		  String docid = readDocList.readLine();
404
		  while (docid != null)
405
		  {
406
			  // If the docid string is not empty, put it into vector
407
			  if (!docid.trim().equals(""))
408
			  {
409
			     docList.add(docid.trim());
410
			  }
411
			  docid = readDocList.readLine();
412
		  }
413
		  return docList;
414
	  }
415
	  
416
	  /*
417
	   * Update the given eml document (in DOM). There are two places to be updated 
418
	   * The package id will be increased 1, i.e.,  from 1 to 2. The distribution online url will
419
	   * point to the new ecogrid id, i.e. , ecogrid://knb/IPCC.2007.1 
420
	   */
421
	  private String updateEMLDoc(Node root, String docid, String dataId) throws Exception 
422
	  {
423
		  // update package id
424
		  docid = getIncreasedNewDocid(docid);
425
		  XMLUtilities.addAttributeNodeToDOMTree( root, PACKAGEID, docid);
426
	      // update online url.  oneline url should either in spatialRaster or dataTable.
427
		  // First try to see if spatialRaster exist or not. If not try data table
428
		  String newUrl = KNB+dataId; //new url looks like ecogrid://knb/IPCC.2007.1
429
		  boolean isSpatialRaster = true;
430
		  boolean isDataTable = false;
431
		  Node urlNode = XMLUtilities.getTextNodeWithXPath(root, SPATIALONLINEURL);
432
		  if (urlNode == null)
433
		  {
434
			  // has no spatialRaster 
435
			 isSpatialRaster = false;		
436
		  }
437
		  else
438
		  {
439
			  // has spatialRaster
440
			  isSpatialRaster = true;		  
441
		  }
442
		  // determin if has datable or not
443
		  urlNode = XMLUtilities.getTextNodeWithXPath(root,TABLEONLINEURL);
444
		  if (urlNode != null)
445
		  {
446
				 isDataTable = true;
447
		  }
448
		  
449
		  if (isSpatialRaster && !isDataTable)
450
		  {
451
			 //only has spatialRaster and no dataTable, update spatialRaster online url
452
			  XMLUtilities.addTextNodeToDOMTree(root, SPATIALONLINEURL, newUrl);
453
		  }
454
		  else if (!isSpatialRaster && isDataTable)
455
		  {
456
              //only has dataTable and no spatialRaster, update dataTable online url
457
			  XMLUtilities.addTextNodeToDOMTree(root, TABLEONLINEURL, newUrl);
458
		  }
459
		  else
460
		  {
461
			  //some strange things happen
462
			  throw new Exception("The eml either has both dataTable or spatialRaster OR doesn't has any entity");
463
		  }
464
		  return docid;
465
	  }
466
	  
467
	  /*
468
	   * Gets new docid with increased version. Docid looks like tao.1.1. The new docid will be
469
	   * tao.1.2.
470
	   */
471
	  private String getIncreasedNewDocid(String docid) throws Exception
472
	  {
473
		  int rev = 1;
474
		  String revision = null;
475
		  String prefix = null;
476
		  String newId = null;
477
		  if (docid != null)
478
		  {
479
			  int index = docid.lastIndexOf(DOT);
480
			  try
481
			  {
482
				 // Get revsion part(1)
483
			     revision = docid.substring(index+1);
484
			     // Get prefix part (tao.1.)
485
			     prefix    = docid.substring(0, index+1);
486
			     // increase version from 1 to 2
487
			     rev = (new Integer(revision)).intValue();
488
			     rev++;
489
			     // combines the prefix tao.1. and new revision2 to get tao.1.2
490
			     newId= prefix+rev;
491
			     
492
			  }
493
			  catch(Exception e)
494
			  {
495
				  throw new Exception("Couldn't increase revsion number from the given docid "+docid+
496
						  " since "+e.getMessage());
497
				
498
			  }
499
		  }
500
		  return newId;
501
	  }
502
	  
503
	  /*
504
	   * Gets eml document list from searching Metacat
505
	   * TO-DO: This method need to be implemented
506
	   */
507
	  private Vector getDocumentListFromMetacat()
508
	  {
509
		  Vector docList = new Vector();
510
		  return docList;
511
	  }
512
	  
513
	  /*
514
	   * Get eml document list. First this method will try
515
	   * to get the eml document list form text file. If the result is empty or
516
	   * it caught an exception it will try to get eml document list from metacat.
517
	   */
518
	  private Vector getDocumentList()
519
	  {
520
		  Vector list = null;
521
		  try
522
		  {
523
			  //First, try to get eml doc list from text file
524
			  list = getDocumentListFromFile();
525
			  if (list == null || list.isEmpty())
526
			  {
527
				  throw new Exception("The eml doclist is empty in text file");
528
			  }
529
		  }
530
		  catch(Exception e)
531
		  {
532
			  System.err.println("Couldn't get eml document list from text file: "+e.getMessage());
533
			  // If an exception happened, try to get eml doc list from metacat
534
			  list = getDocumentListFromMetacat();
535
		  }
536
		  if (list != null)
537
		  {
538
			  System.out.println("the list is "+list);
539
		  }
540
		  return list;
541
	  }
542
	  
543
	  /*
544
	   * Writes error message into log file.
545
	   */
546
	  private void writeLog(File file, String message)
547
	  {
548
	    try
549
	    {
550
	      FileOutputStream fos = new FileOutputStream(file, true);
551
	      PrintWriter pw = new PrintWriter(fos);
552
	      SimpleDateFormat formatter = new SimpleDateFormat ("yy-MM-dd HH:mm:ss");
553
	      java.util.Date localtime = new java.util.Date();
554
	      String dateString = formatter.format(localtime);
555
	      dateString += " :: " + message;
556
	      //time stamp each entry
557
	      pw.println(dateString);
558
	      pw.flush();
559
	      pw.close();
560
	      fos.close();
561
	    }
562
	    catch(Exception e)
563
	    {
564
	      System.out.println("error writing to replication log from " +
565
	                         "MetacatReplication.replLog: " + e.getMessage());
566
	      //e.printStackTrace(System.out);
567
	    }
568
	 }
569
	  
570
	  /*
571
	   * Read a csv file which contains current data file name and correct data file name. 
572
	   * The format of csv file is:
573
	   * currentname1,correctname1
574
	   * currentname2,correctname2
575
	   * ........
576
	   * The return value is hash table, the current data file name is key and correct file name is
577
	   * value.
578
	   */
579
	   private Hashtable getCurrent_CorrectFileNamesPair() throws Exception
580
	   {
581
		   Hashtable fileNamesHash = new Hashtable();
582
		   File current_correctFileNames = new File(CURRENT_CORRECTFILENAME);
583
		   FileReader fileReader= new FileReader(current_correctFileNames);
584
		   BufferedReader readDocList = new BufferedReader(fileReader);
585
		   // Read every line from the text file, this line will look like:
586
		   // currentname1,correctname1
587
		   String lineString = readDocList.readLine();
588
		   while (lineString != null)
589
		   {
590
			   //Get the comma index number
591
			   int commaIndex = lineString.indexOf(",");
592
			   if (commaIndex != -1)
593
			   {
594
			      //Get the current file name part
595
			      String currentName = lineString.substring(0, commaIndex);
596
			      //Get the correct file name part
597
			      String correctName = lineString.substring(commaIndex+1, lineString.length());
598
				  if (currentName != null && correctName != null)
599
				  {
600
					  fileNamesHash.put(currentName.trim(), correctName.trim());
601
				  }
602
			  }
603
			   lineString = readDocList.readLine();
604
		   }
605
		   return fileNamesHash;
606
	   }
607
}
(16-16/17)