Project

General

Profile

1
/*
2
 * Harvester.java
3
 *
4
 * Created on January 14, 2004, 4:44 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.File;
10
import java.io.FileInputStream;
11
import java.io.IOException;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.ResultSet;
15
import java.sql.SQLException;
16
import java.sql.SQLWarning;
17
import java.sql.Statement;
18
import java.util.ArrayList;
19
import java.text.SimpleDateFormat;
20
import java.util.Date;
21
import java.util.Properties;
22

    
23
import edu.ucsb.nceas.metacat.client.Metacat;
24
import edu.ucsb.nceas.metacat.client.MetacatFactory;
25
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
26

    
27
/**
28
 * Harvester is the main class for the Harvester application. The main
29
 * method creates a single Harvester object which drives the application.
30
 * 
31
 * @author    costa
32
 * 
33
 */
34
public class Harvester {
35

    
36
  /*
37
   * Class fields
38
   */
39
   
40

    
41
  /* 
42
   * Class methods
43
   */
44
   
45

    
46
  /**
47
   * Constructor. Creates a new instance of Harvester.
48
   */
49
  public Harvester() {
50
  }
51
    
52

    
53
  /**
54
   * Determines whether Harvester is running on a Win32 platform. Used
55
   * during development.
56
   * 
57
   * @return    true if this in Win32, false otherwise
58
   */
59
  public static boolean isWin32 () {
60
    boolean isWin32;
61
    String osName = System.getProperty("os.name");
62
    
63
    isWin32 = (osName.startsWith("Windows"));
64
    return isWin32;
65
  }
66
  
67

    
68
  /**
69
    * Harvester main method.
70
    * 
71
    * @param args        the command line arguments
72
    * @throws SAXException
73
    * @throws IOException
74
    * @throws ParserConfigurationException
75
    */
76
  public static void main(String[] args) {
77
    Harvester harvester = new Harvester();
78
    
79
    harvester.startup();                  // Start up Harvester
80
    harvester.readHarvestSiteSchedule();  // Read the database table
81
    harvester.harvest();                  // Harvest the documents
82
    harvester.shutdown();                 // Shut down Harvester
83
  }
84

    
85

    
86
  /*
87
   * Object fields
88
   */
89

    
90
  /** Database connection */
91
  Connection conn;
92
  
93
  /** Used during development to determine whether to connect to metacat */
94
  private boolean connectToMetacat;
95

    
96
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
97
  private int detailLogID;
98
  
99
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
100
  private int harvestLogID;
101
  
102
  /** End time of this harvest session */
103
  private Date harvestEndTime;
104
  
105
  /** List of HarvestLog objects. Stores log entries for report generation. */
106
  private ArrayList harvestLogList = new ArrayList();
107
  
108
  /** List of HarvestSiteSchedule objects */
109
  private ArrayList harvestSiteScheduleList = new ArrayList();
110
  
111
  /** Start time of this harvest session */
112
  private Date harvestStartTime;
113
  
114
  /** Number of days to save log records. Any that are older are purged. */
115
  int logPeriod;
116
  
117
  /** Metacat client object */
118
  Metacat metacat;
119
  
120
  /** Loads harvester properties from configuration file */
121
  Properties properties;
122
    
123

    
124
  /*
125
   * Object methods
126
   */
127
   
128
   
129
  /*                
130
                    Harvester  harvester,
131
                    Date       harvestDate,
132
                    int        status,
133
                    String     message, 
134
                    String     harvestOperationCode,
135
                    int        siteScheduleID,
136
                    HarvestDocument harvestDocument,
137
                    String     errorMessage 
138
*/
139
  /**
140
   * Creates a new HarvestLog object and adds it to the harvestLogList.
141
   * 
142
   * @param  status          the status of the harvest operation
143
   * @param  message         the message text of the harvest operation
144
   * @param  harvestOperationCode  the harvest operation code
145
   * @param  siteScheduleID  the siteScheduleID for which this operation was
146
   *                         performed. 0 indicates that the operation did not
147
   *                         involve a particular harvest site.
148
   * @param  harvestDocument the associated HarvestDocument object. May be null.
149
   * @param  errorMessage    additional error message pertaining to document
150
   *                         error.
151
   */
152
  void addLogEntry(int    status,
153
                   String message,
154
                   String harvestOperationCode,
155
                   int    siteScheduleID,
156
                   HarvestDocument harvestDocument,
157
                   String errorMessage
158
                  ) {
159
    HarvestLog harvestLog;
160

    
161
    /* If there is no associated harvest document, call the basic constructor;
162
     * else call the extended constructor.
163
     */
164
    if (harvestDocument == null) {    
165
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
166
                                  harvestOperationCode, siteScheduleID);
167
    }
168
    else {
169
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
170
                                  harvestOperationCode, siteScheduleID,
171
                                  harvestDocument, errorMessage);
172
    }
173
    
174
    harvestLogList.add(harvestLog);
175
  }
176

    
177

    
178
  /**
179
   * Determines whether Harvester should attempt to connect to Metacat.
180
   * Used during development and testing.
181
   * 
182
   * @return     true if Harvester should connect, otherwise false
183
   */
184
  boolean connectToMetacat () {
185
    return connectToMetacat;
186
  }
187
  
188

    
189
  /**
190
   * Normalizes text prior to insertion into the HARVEST_LOG or
191
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
192
   * character with the double quote character. This prevents SQL errors
193
   * involving words that contain single quotes. Also removes \n and \r
194
   * characters from the text.
195
   * 
196
   * @param text  the original string
197
   * @return      a string containing the normalized text
198
   */
199
  String dequoteText(String text) {
200
    char c;
201
    StringBuffer stringBuffer = new StringBuffer();
202
    
203
    for (int i = 0; i < text.length(); i++) {
204
      c = text.charAt(i);
205
      switch (c) {
206
        case '\'':
207
          stringBuffer.append('\"');
208
          break;
209
        case '\r':
210
        case '\n':
211
          break;
212
        default:
213
          stringBuffer.append(c);
214
          break;
215
      }
216
    }
217
    
218
    return stringBuffer.toString();
219
  }
220

    
221

    
222
  /**
223
   * Gets the current value of the detailLogID for storage as a primary key in
224
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
225
   * 
226
   * @return  the current value of the detailLogID
227
   */
228
  int getDetailLogID() {
229
    int currentValue = detailLogID;
230
    
231
    detailLogID++;
232
    return currentValue;
233
  }
234
  
235
  
236
  /**
237
   * Gets the current value of the harvestLogID for storage as a primary key in
238
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
239
   * 
240
   * @return  the current value of the detailLogID
241
   */
242
  int getHarvestLogID() {
243
    int currentValue = harvestLogID;
244
    
245
    harvestLogID++;
246
    return currentValue;
247
  }
248
  
249

    
250
  /** 
251
   * Gets the maximum value of an integer field from a table.
252
   * 
253
   * @param tableName  the database table name
254
   * @param fieldName  the field name of the integer field in the table
255
   * @return  the maximum integer stored in the fieldName field of tableName
256
   */
257
  private int getMaxValue(String tableName, String fieldName) {
258
    int maxValue = 100;
259
    int fieldValue;
260
		String query = "SELECT " + fieldName + " FROM " + tableName;
261
		Statement stmt;
262
    
263
		try {
264
			stmt = conn.createStatement();							
265
			ResultSet rs = stmt.executeQuery(query);
266
	
267
			while (rs.next()) {
268
				fieldValue = rs.getInt(fieldName);
269
        maxValue = Math.max(maxValue, fieldValue);
270
			}
271
	
272
			stmt.close();	
273
		} 
274
    catch(SQLException ex) {
275
			System.out.println("SQLException: " + ex.getMessage());
276
		}
277
    
278
    return maxValue;
279
  }
280
  
281
  
282
  /** 
283
   * Gets the minimum value of an integer field from a table.
284
   * 
285
   * @param tableName  the database table name
286
   * @param fieldName  the field name of the integer field in the table
287
   * @return  the minimum integer stored in the fieldName field of tableName
288
   */
289
  private int getMinValue(String tableName, String fieldName) {
290
    int minValue = 0;
291
    int fieldValue;
292
		String query = "SELECT " + fieldName + " FROM " + tableName;
293
		Statement stmt;
294
    
295
		try {
296
			stmt = conn.createStatement();							
297
			ResultSet rs = stmt.executeQuery(query);
298
	
299
			while (rs.next()) {
300
				fieldValue = rs.getInt(fieldName);
301

    
302
        if (minValue == 0) {
303
          minValue = fieldValue;
304
        }
305
        else {
306
          minValue = Math.min(minValue, fieldValue);
307
        }
308
			}
309
	
310
			stmt.close();	
311
		} 
312
    catch(SQLException ex) {
313
			System.out.println("SQLException: " + ex.getMessage());
314
		}
315
    
316
    return minValue;
317
  }
318
  
319
  
320
  /**
321
   * For every Harvest site schedule in the database, harvest the
322
   * documents for that site if they are due to be harvested.
323
   * 
324
   * @throws SAXException
325
   * @throws IOException
326
   * @throws ParserConfigurationException
327
   */
328
  private void harvest() {
329
    HarvestSiteSchedule harvestSiteSchedule;
330

    
331
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
332
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
333
      harvestSiteSchedule.harvestDocumentList();
334
    }
335
  }
336
  
337
  
338
  /**
339
   * Initializes the detailLogID and harvestLogID values to their current
340
   * maximums + 1.
341
   */
342
  private void initLogIDs() {
343
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
344
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
345
  }
346
  
347
  
348
  /**
349
   * Loads Harvester properties from a configuration file.
350
   */
351
  private void loadProperties() {
352
    String homedir = System.getProperty("user.home");
353
    File configfile = new File(homedir, "harvester.properties");
354
    
355
    properties = new Properties();
356

    
357
    try {
358
      properties.load(new FileInputStream(configfile));
359
      properties.list(System.out);
360
    }
361
    catch (IOException e) {
362
      System.out.println("IOException: " + e.getMessage());
363
      System.exit(1);
364
    }
365
  }
366
  
367
  
368
  void printHarvestSiteSchedule(int siteScheduleID) {
369
     HarvestSiteSchedule harvestSiteSchedule;
370

    
371
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
372
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
373
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
374
        harvestSiteSchedule.printOutput();
375
      }
376
    }
377
  }
378
  
379

    
380
  /**
381
   * Prunes old records from the HARVEST_DETAIL_LOG table. Records are
382
   * removed if the HARVEST_LOG_ID foreign key is less than the lowest
383
   * HARVEST_LOG_ID primary key in the HARVEST_LOG table.
384
   */
385
  private void pruneHarvestDetailLog() {
386
		String deleteString;
387
    int minHarvestLogID;
388
    int recordsDeleted;
389
		Statement stmt;
390
    
391
    minHarvestLogID = getMinValue("HARVEST_LOG", "HARVEST_LOG_ID");
392
    deleteString = "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID < " +
393
                   minHarvestLogID;
394

    
395
		try {
396
			System.out.print("Pruning log entries from HARVEST_DETAIL_LOG: ");
397
			stmt = conn.createStatement();							
398
			recordsDeleted = stmt.executeUpdate(deleteString);
399
			System.out.println(recordsDeleted + " records deleted");
400
			stmt.close();
401
		}
402
    catch(SQLException e) {
403
			System.out.println("SQLException: " + e.getMessage());
404
		}
405
  }
406
    
407

    
408
  /**
409
   * Prunes old records from the HARVEST_LOG table. Records are removed if
410
   * their HARVEST_DATE is older than a given number of days, as stored in the
411
   * logPeriod object field.
412
   */
413
  private void pruneHarvestLog() {
414
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
415
    Date dateLastLog;                    // Prune everything prior to this date
416
		String deleteString;
417
    long delta;
418
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
419
    int recordsDeleted;
420
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
421
    String dateString;
422
		Statement stmt;
423
    long timeLastLog = 0;
424
    
425
    delta = logPeriod * millisecondsPerDay;
426
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
427
    timeLastLog = currentTime - delta;
428
    dateLastLog = new Date(timeLastLog);
429
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
430
    deleteString += dateString;
431

    
432
		try {
433
			System.out.print("Pruning log entries from HARVEST_LOG: ");
434
			stmt = conn.createStatement();							
435
			recordsDeleted = stmt.executeUpdate(deleteString);
436
			System.out.println(recordsDeleted + " records deleted");
437
			stmt.close();
438
		}
439
    catch (SQLException e) {
440
			System.out.println("SQLException: " + e.getMessage());
441
		}
442
  }
443
    
444

    
445
  /**
446
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
447
   * a HarvestSiteSchedule object for each row in the table.
448
   */
449
  private void readHarvestSiteSchedule() {
450
    HarvestSiteSchedule harvestSiteSchedule;
451
    ResultSet rs;
452
    SQLWarning warn;
453
    Statement stmt;
454

    
455
    String contactEmail;
456
    String dateLastHarvest;
457
    String dateNextHarvest;
458
    String documentListURL;
459
    String ldapDN;
460
    String ldapPwd;
461
    int siteScheduleID;
462
    String unit;
463
    int updateFrequency;
464
        
465
    try {
466
      // Read the HARVEST_SITE_SCHEDULE table
467
      stmt = conn.createStatement();
468
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
469
      warn = rs.getWarnings();
470

    
471
      if (warn != null) {
472
        System.out.println("\n---Warning---\n");
473

    
474
        while (warn != null) {
475
          System.out.println("Message: " + warn.getMessage());
476
          System.out.println("SQLState: " + warn.getSQLState());
477
          System.out.print("Vendor error code: ");
478
          System.out.println(warn.getErrorCode());
479
          System.out.println("");
480
          warn = warn.getNextWarning();
481
        }
482
      }
483
     
484
      while (rs.next()) {
485
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
486
        documentListURL = rs.getString("DOCUMENTLISTURL");
487
        ldapDN = rs.getString("LDAPDN");
488
        ldapPwd = rs.getString("LDAPPWD");
489
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
490
        dateLastHarvest = rs.getString("DATELASTHARVEST");
491
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
492
        unit = rs.getString("UNIT");
493
        contactEmail = rs.getString("CONTACT_EMAIL");
494
        
495
        warn = rs.getWarnings();
496

    
497
        if (warn != null) {
498
          System.out.println("\n---Warning---\n");
499
      
500
          while (warn != null) {
501
            System.out.println("Message: " + warn.getMessage());
502
            System.out.println("SQLState: " + warn.getSQLState());
503
            System.out.print("Vendor error code: ");
504
            System.out.println(warn.getErrorCode());
505
            System.out.println("");
506
            warn = warn.getNextWarning();
507
          }
508
        }
509
      
510
        harvestSiteSchedule = new HarvestSiteSchedule(this,
511
                                                      siteScheduleID,
512
                                                      documentListURL,
513
                                                      ldapDN,
514
                                                      ldapPwd,
515
                                                      dateNextHarvest,
516
                                                      dateLastHarvest,
517
                                                      updateFrequency,
518
                                                      unit,
519
                                                      contactEmail
520
                                                     );
521
        harvestSiteScheduleList.add(harvestSiteSchedule);
522
      }
523
      
524
      rs.close();
525
      stmt.close();
526
    }
527
    catch (SQLException e) {
528
      System.out.println("Database access failed " + e);
529
      System.exit(1);
530
    }
531
    
532
  }
533
    
534

    
535
  /**
536
   * Sends a report to the Harvester administrator.
537
   */
538
  void reportToAdministrator() {
539
    System.out.println("\nSending report to administrator.");
540
  }
541
    
542

    
543
  /**
544
   * Shuts down Harvester. Performs cleanup operations such as logging out
545
   * of Metacat and disconnecting from the database.
546
   */
547
  private void shutdown() {
548
    // Log shutdown operation
549
    System.out.println("Shutting Down Harvester");
550
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
551
    pruneHarvestLog();
552
    pruneHarvestDetailLog();
553

    
554
    try {
555
      // Close the database connection
556
      System.out.println("Closing the database connection");
557
      conn.close();
558
    }
559
    catch (SQLException e) {
560
      System.out.println("Database access failed " + e);
561
    }
562
    
563
    writeHarvestLog();
564
    reportToAdministrator();
565
  }
566
    
567

    
568
  /**
569
   * Initializes Harvester at startup. Connects to the database and to Metacat.
570
   */
571
  private void startup() {
572
    Boolean ctm;
573
    String dbDriver;
574
    Integer lp;
575
    String metacatURL;
576
    String osName = Harvester.isWin32() ? "Windows" : "Unix";
577
    String password;
578
    //String response;
579
    String sessionId;
580
		String url;
581
    String user;
582
    String userName = System.getProperty("user.name");
583
    SQLWarning warn;
584
    
585
    // Log startup operation
586
    System.out.println("*****************************************************");
587
    System.out.println("Starting Up Harvester");
588
    loadProperties();
589
    ctm = Boolean.valueOf(properties.getProperty("connectToMetacat", "true"));
590
    connectToMetacat = ctm.booleanValue();
591
    dbDriver = properties.getProperty("dbDriver");
592
    try {
593
      lp = Integer.valueOf(properties.getProperty("logPeriod", "90"));
594
      logPeriod = lp.intValue();
595
    }
596
    catch (NumberFormatException e) {
597
      System.err.println("NumberFormatException: Error parsing logPeriod " +
598
                         logPeriod + e.getMessage());
599
      System.err.println("Defaulting to logPeriod of 90 days");
600
      logPeriod = 90;
601
    }
602
    metacatURL = properties.getProperty("metacatURL");
603
    password = properties.getProperty("password");
604
    url = properties.getProperty("url");
605
    user = properties.getProperty("user");
606

    
607
    // Load the jdbc driver
608
    try {
609
      Class.forName(dbDriver);
610
    }
611
    catch (ClassNotFoundException e) {
612
      System.out.println("Can't load driver " + e);
613
      System.exit(1);
614
    } 
615

    
616
    // Make the database connection
617
    try {
618
      System.out.println("Getting connection to Harvester tables");
619
      conn = DriverManager.getConnection(url, user, password);
620

    
621
      // If a SQLWarning object is available, print its warning(s).
622
      // There may be multiple warnings chained.
623
      warn = conn.getWarnings();
624
      
625
      if (warn != null) {
626
        while (warn != null) {
627
          System.out.println("SQLState: " + warn.getSQLState());
628
          System.out.println("Message:  " + warn.getMessage());
629
          System.out.println("Vendor: " + warn.getErrorCode());
630
          System.out.println("");
631
          warn = warn.getNextWarning();
632
        }
633
      }
634
    }
635
    catch (SQLException e) {
636
      System.out.println("Database access failed " + e);
637
      System.exit(1);
638
    }
639
    
640
    initLogIDs();
641
    harvestStartTime = new Date();
642
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
643
      
644
    if (connectToMetacat()) {      
645
      try {
646
        System.out.println("Connecting to Metacat: " + metacatURL);
647
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
648
      } 
649
      catch (MetacatInaccessibleException e) {
650
        System.out.println("Metacat connection failed." + e.getMessage());
651
      } 
652
      catch (Exception e) {
653
        System.out.println("Metacat connection failed." + e.getMessage());
654
      }
655
    }
656
  }
657

    
658

    
659
  /**
660
   * Writes one or more log entries to the HARVEST_LOG table.
661
   */
662
  private void writeHarvestLog() {
663
    HarvestLog harvestLog;
664
    
665
    for (int i = 0; i < harvestLogList.size(); i++) {
666
      harvestLog = (HarvestLog) harvestLogList.get(i);
667
      harvestLog.printOutput();
668
    }
669
  }
670
    
671
}
(5-5/7)