Project

General

Profile

1 2022 costa
/*
2
 * Harvester.java
3
 *
4
 * Created on January 14, 2004, 4:44 PM
5
 */
6
7
package edu.ucsb.nceas.metacat.harvesterClient;
8
9 2031 costa
import java.io.File;
10
import java.io.FileInputStream;
11
import java.io.IOException;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.ResultSet;
15
import java.sql.SQLException;
16
import java.sql.SQLWarning;
17
import java.sql.Statement;
18
import java.util.ArrayList;
19
import java.text.SimpleDateFormat;
20
import java.util.Date;
21
import java.util.Properties;
22 2022 costa
23 2031 costa
import edu.ucsb.nceas.metacat.client.Metacat;
24
import edu.ucsb.nceas.metacat.client.MetacatFactory;
25
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
26 2022 costa
27
/**
28
 * Harvester is the main class for the Harvester application. The main
29
 * method creates a single Harvester object which drives the application.
30
 *
31
 * @author    costa
32
 *
33
 */
34
public class Harvester {
35
36
  /*
37
   * Class fields
38
   */
39
40
41
  /*
42
   * Class methods
43
   */
44
45 2031 costa
46 2022 costa
  /**
47
   * Constructor. Creates a new instance of Harvester.
48
   */
49
  public Harvester() {
50
  }
51
52 2031 costa
53 2022 costa
  /**
54
   * Determines whether Harvester is running on a Win32 platform. Used
55 2031 costa
   * during development.
56 2022 costa
   *
57
   * @return    true if this in Win32, false otherwise
58
   */
59
  public static boolean isWin32 () {
60
    boolean isWin32;
61
    String osName = System.getProperty("os.name");
62
63
    isWin32 = (osName.startsWith("Windows"));
64
    return isWin32;
65
  }
66
67
68
  /**
69
    * Harvester main method.
70
    *
71
    * @param args        the command line arguments
72
    * @throws SAXException
73
    * @throws IOException
74
    * @throws ParserConfigurationException
75
    */
76
  public static void main(String[] args) {
77
    Harvester harvester = new Harvester();
78
79
    harvester.startup();                  // Start up Harvester
80
    harvester.readHarvestSiteSchedule();  // Read the database table
81
    harvester.harvest();                  // Harvest the documents
82
    harvester.shutdown();                 // Shut down Harvester
83
  }
84
85
86
  /*
87
   * Object fields
88
   */
89
90 2031 costa
  /** Database connection */
91 2022 costa
  Connection conn;
92 2031 costa
93
  /** Used during development to determine whether to connect to metacat */
94
  private boolean connectToMetacat;
95
96
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
97
  private int detailLogID;
98
99
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
100
  private int harvestLogID;
101
102
  /** End time of this harvest session */
103
  private Date harvestEndTime;
104
105
  /** List of HarvestLog objects. Stores log entries for report generation. */
106
  private ArrayList harvestLogList = new ArrayList();
107
108
  /** List of HarvestSiteSchedule objects */
109
  private ArrayList harvestSiteScheduleList = new ArrayList();
110
111
  /** Start time of this harvest session */
112
  private Date harvestStartTime;
113
114
  /** Number of days to save log records. Any that are older are purged. */
115
  int logPeriod;
116
117
  /** Metacat client object */
118 2022 costa
  Metacat metacat;
119 2031 costa
120
  /** Loads harvester properties from configuration file */
121 2022 costa
  Properties properties;
122
123
124
  /*
125
   * Object methods
126
   */
127 2031 costa
128
129
  /*
130
                    Harvester  harvester,
131
                    Date       harvestDate,
132
                    int        status,
133
                    String     message,
134
                    String     harvestOperationCode,
135
                    int        siteScheduleID,
136
                    HarvestDocument harvestDocument,
137
                    String     errorMessage
138
*/
139
  /**
140
   * Creates a new HarvestLog object and adds it to the harvestLogList.
141
   *
142
   * @param  status          the status of the harvest operation
143
   * @param  message         the message text of the harvest operation
144
   * @param  harvestOperationCode  the harvest operation code
145
   * @param  siteScheduleID  the siteScheduleID for which this operation was
146
   *                         performed. 0 indicates that the operation did not
147
   *                         involve a particular harvest site.
148
   * @param  harvestDocument the associated HarvestDocument object. May be null.
149
   * @param  errorMessage    additional error message pertaining to document
150
   *                         error.
151
   */
152
  void addLogEntry(int    status,
153
                   String message,
154
                   String harvestOperationCode,
155
                   int    siteScheduleID,
156
                   HarvestDocument harvestDocument,
157
                   String errorMessage
158
                  ) {
159
    HarvestLog harvestLog;
160 2022 costa
161 2031 costa
    /* If there is no associated harvest document, call the basic constructor;
162
     * else call the extended constructor.
163
     */
164
    if (harvestDocument == null) {
165
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
166
                                  harvestOperationCode, siteScheduleID);
167
    }
168
    else {
169
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
170
                                  harvestOperationCode, siteScheduleID,
171
                                  harvestDocument, errorMessage);
172
    }
173
174
    harvestLogList.add(harvestLog);
175
  }
176
177
178 2022 costa
  /**
179
   * Determines whether Harvester should attempt to connect to Metacat.
180
   * Used during development and testing.
181
   *
182
   * @return     true if Harvester should connect, otherwise false
183
   */
184
  boolean connectToMetacat () {
185
    return connectToMetacat;
186
  }
187
188
189
  /**
190 2031 costa
   * Gets the current value of the detailLogID for storage as a primary key in
191
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
192
   *
193
   * @return  the current value of the detailLogID
194
   */
195
  int getDetailLogID() {
196
    int currentValue = detailLogID;
197
198
    detailLogID++;
199
    return currentValue;
200
  }
201
202
203
  /**
204
   * Gets the current value of the harvestLogID for storage as a primary key in
205
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
206
   *
207
   * @return  the current value of the detailLogID
208
   */
209
  int getHarvestLogID() {
210
    int currentValue = harvestLogID;
211
212
    harvestLogID++;
213
    return currentValue;
214
  }
215
216
217
  /**
218
   * Gets the maximum value of an integer field from a table.
219
   *
220
   * @param tableName  the database table name
221
   * @param fieldName  the field name of the integer field in the table
222
   * @return  the maximum integer stored in the fieldName field of tableName
223
   */
224
  private int getMaxValue(String tableName, String fieldName) {
225
    int maxValue = 100;
226
    int fieldValue;
227
		String query = "SELECT " + fieldName + " FROM " + tableName;
228
		Statement stmt;
229
230
		try {
231
			stmt = conn.createStatement();
232
			ResultSet rs = stmt.executeQuery(query);
233
234
			while (rs.next()) {
235
				fieldValue = rs.getInt(fieldName);
236
        maxValue = Math.max(maxValue, fieldValue);
237
			}
238
239
			stmt.close();
240
		}
241
    catch(SQLException ex) {
242
			System.out.println("SQLException: " + ex.getMessage());
243
		}
244
245
    return maxValue;
246
  }
247
248
249
  /**
250
   * Gets the minimum value of an integer field from a table.
251
   *
252
   * @param tableName  the database table name
253
   * @param fieldName  the field name of the integer field in the table
254
   * @return  the minimum integer stored in the fieldName field of tableName
255
   */
256
  private int getMinValue(String tableName, String fieldName) {
257
    int minValue = 0;
258
    int fieldValue;
259
		String query = "SELECT " + fieldName + " FROM " + tableName;
260
		Statement stmt;
261
262
		try {
263
			stmt = conn.createStatement();
264
			ResultSet rs = stmt.executeQuery(query);
265
266
			while (rs.next()) {
267
				fieldValue = rs.getInt(fieldName);
268
269
        if (minValue == 0) {
270
          minValue = fieldValue;
271
        }
272
        else {
273
          minValue = Math.min(minValue, fieldValue);
274
        }
275
			}
276
277
			stmt.close();
278
		}
279
    catch(SQLException ex) {
280
			System.out.println("SQLException: " + ex.getMessage());
281
		}
282
283
    return minValue;
284
  }
285
286
287
  /**
288 2022 costa
   * For every Harvest site schedule in the database, harvest the
289
   * documents for that site if they are due to be harvested.
290
   *
291
   * @throws SAXException
292
   * @throws IOException
293
   * @throws ParserConfigurationException
294
   */
295
  private void harvest() {
296
    HarvestSiteSchedule harvestSiteSchedule;
297
298 2031 costa
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
299
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
300
      harvestSiteSchedule.harvestDocumentList();
301 2022 costa
    }
302
  }
303
304
305
  /**
306 2031 costa
   * Initializes the detailLogID and harvestLogID values to their current
307
   * maximums + 1.
308 2022 costa
   */
309 2031 costa
  private void initLogIDs() {
310
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
311
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
312
  }
313
314
315
  /**
316
   * Loads Harvester properties from a configuration file.
317
   */
318 2022 costa
  private void loadProperties() {
319
    String homedir = System.getProperty("user.home");
320
    File configfile = new File(homedir, "harvester.properties");
321
322
    properties = new Properties();
323
324
    try {
325
      properties.load(new FileInputStream(configfile));
326
      properties.list(System.out);
327
    }
328
    catch (IOException e) {
329 2031 costa
      System.out.println("IOException: " + e.getMessage());
330 2022 costa
      System.exit(1);
331
    }
332
  }
333 2031 costa
334
335
  void printHarvestSiteSchedule(int siteScheduleID) {
336
     HarvestSiteSchedule harvestSiteSchedule;
337
338
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
339
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
340
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
341
        harvestSiteSchedule.printOutput();
342
      }
343
    }
344
  }
345
346
347
  /**
348
   * Prunes old records from the HARVEST_DETAIL_LOG table. Records are
349
   * removed if the HARVEST_LOG_ID foreign key is less than the lowest
350
   * HARVEST_LOG_ID primary key in the HARVEST_LOG table.
351
   */
352
  private void pruneHarvestDetailLog() {
353
		String deleteString;
354
    int minHarvestLogID;
355
    int recordsDeleted;
356
		Statement stmt;
357 2022 costa
358 2031 costa
    minHarvestLogID = getMinValue("HARVEST_LOG", "HARVEST_LOG_ID");
359
    deleteString = "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID < " +
360
                   minHarvestLogID;
361 2022 costa
362 2031 costa
		try {
363
			System.out.print("Pruning log entries from HARVEST_DETAIL_LOG: ");
364
			stmt = conn.createStatement();
365
			recordsDeleted = stmt.executeUpdate(deleteString);
366
			System.out.println(recordsDeleted + " records deleted");
367
			stmt.close();
368
		}
369
    catch(SQLException e) {
370
			System.out.println("SQLException: " + e.getMessage());
371
		}
372
  }
373
374
375 2022 costa
  /**
376 2031 costa
   * Prunes old records from the HARVEST_LOG table. Records are removed if
377
   * their HARVEST_DATE is older than a given number of days, as stored in the
378
   * logPeriod object field.
379
   */
380
  private void pruneHarvestLog() {
381
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
382
    Date dateLastLog;                    // Prune everything prior to this date
383
		String deleteString;
384
    long delta;
385
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
386
    int recordsDeleted;
387
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
388
    String dateString;
389
		Statement stmt;
390
    long timeLastLog = 0;
391
392
    delta = logPeriod * millisecondsPerDay;
393
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
394
    timeLastLog = currentTime - delta;
395
    dateLastLog = new Date(timeLastLog);
396
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
397
    deleteString += dateString;
398
399
		try {
400
			System.out.print("Pruning log entries from HARVEST_LOG: ");
401
			stmt = conn.createStatement();
402
			recordsDeleted = stmt.executeUpdate(deleteString);
403
			System.out.println(recordsDeleted + " records deleted");
404
			stmt.close();
405
		}
406
    catch (SQLException e) {
407
			System.out.println("SQLException: " + e.getMessage());
408
		}
409
  }
410
411
412
  /**
413 2022 costa
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
414
   * a HarvestSiteSchedule object for each row in the table.
415
   */
416
  private void readHarvestSiteSchedule() {
417
    HarvestSiteSchedule harvestSiteSchedule;
418
    ResultSet rs;
419
    SQLWarning warn;
420
    Statement stmt;
421
422
    String contactEmail;
423
    String dateLastHarvest;
424
    String dateNextHarvest;
425
    String documentListURL;
426
    String ldapDN;
427 2031 costa
    String ldapPwd;
428 2022 costa
    int siteScheduleID;
429
    String unit;
430
    int updateFrequency;
431
432
    try {
433
      // Read the HARVEST_SITE_SCHEDULE table
434
      stmt = conn.createStatement();
435
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
436
      warn = rs.getWarnings();
437
438
      if (warn != null) {
439
        System.out.println("\n---Warning---\n");
440
441
        while (warn != null) {
442
          System.out.println("Message: " + warn.getMessage());
443
          System.out.println("SQLState: " + warn.getSQLState());
444
          System.out.print("Vendor error code: ");
445
          System.out.println(warn.getErrorCode());
446
          System.out.println("");
447
          warn = warn.getNextWarning();
448
        }
449
      }
450
451
      while (rs.next()) {
452
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
453
        documentListURL = rs.getString("DOCUMENTLISTURL");
454
        ldapDN = rs.getString("LDAPDN");
455 2031 costa
        ldapPwd = rs.getString("LDAPPWD");
456 2022 costa
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
457
        dateLastHarvest = rs.getString("DATELASTHARVEST");
458
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
459
        unit = rs.getString("UNIT");
460
        contactEmail = rs.getString("CONTACT_EMAIL");
461
462
        warn = rs.getWarnings();
463
464
        if (warn != null) {
465
          System.out.println("\n---Warning---\n");
466
467
          while (warn != null) {
468
            System.out.println("Message: " + warn.getMessage());
469
            System.out.println("SQLState: " + warn.getSQLState());
470
            System.out.print("Vendor error code: ");
471
            System.out.println(warn.getErrorCode());
472
            System.out.println("");
473
            warn = warn.getNextWarning();
474
          }
475
        }
476
477 2031 costa
        harvestSiteSchedule = new HarvestSiteSchedule(this,
478 2022 costa
                                                      siteScheduleID,
479
                                                      documentListURL,
480
                                                      ldapDN,
481 2031 costa
                                                      ldapPwd,
482 2022 costa
                                                      dateNextHarvest,
483
                                                      dateLastHarvest,
484
                                                      updateFrequency,
485
                                                      unit,
486
                                                      contactEmail
487
                                                     );
488 2031 costa
        harvestSiteScheduleList.add(harvestSiteSchedule);
489 2022 costa
      }
490 2031 costa
491
      rs.close();
492
      stmt.close();
493
    }
494 2022 costa
    catch (SQLException e) {
495
      System.out.println("Database access failed " + e);
496
      System.exit(1);
497
    }
498
499
  }
500
501
502
  /**
503
   * Sends a report to the Harvester administrator.
504
   */
505
  void reportToAdministrator() {
506
    System.out.println("\nSending report to administrator.");
507
  }
508
509
510
  /**
511
   * Shuts down Harvester. Performs cleanup operations such as logging out
512
   * of Metacat and disconnecting from the database.
513
   */
514
  private void shutdown() {
515
    // Log shutdown operation
516
    System.out.println("Shutting Down Harvester");
517 2031 costa
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
518
    pruneHarvestLog();
519
    pruneHarvestDetailLog();
520 2022 costa
521
    try {
522
      // Close the database connection
523
      System.out.println("Closing the database connection");
524
      conn.close();
525
    }
526
    catch (SQLException e) {
527
      System.out.println("Database access failed " + e);
528
    }
529 2031 costa
530
    writeHarvestLog();
531
    reportToAdministrator();
532 2022 costa
  }
533
534
535
  /**
536
   * Initializes Harvester at startup. Connects to the database and to Metacat.
537
   */
538
  private void startup() {
539 2031 costa
    Boolean ctm;
540 2022 costa
    String dbDriver;
541 2031 costa
    Integer lp;
542 2022 costa
    String metacatURL;
543
    String osName = Harvester.isWin32() ? "Windows" : "Unix";
544
    String password;
545 2031 costa
    //String response;
546 2022 costa
    String sessionId;
547
		String url;
548
    String user;
549
    String userName = System.getProperty("user.name");
550
    SQLWarning warn;
551
552
    // Log startup operation
553
    System.out.println("*****************************************************");
554
    System.out.println("Starting Up Harvester");
555
    loadProperties();
556 2031 costa
    ctm = Boolean.valueOf(properties.getProperty("connectToMetacat", "true"));
557
    connectToMetacat = ctm.booleanValue();
558 2022 costa
    dbDriver = properties.getProperty("dbDriver");
559 2031 costa
    try {
560
      lp = Integer.valueOf(properties.getProperty("logPeriod", "90"));
561
      logPeriod = lp.intValue();
562
    }
563
    catch (NumberFormatException e) {
564
      System.err.println("NumberFormatException: Error parsing logPeriod " +
565
                         logPeriod + e.getMessage());
566
      System.err.println("Defaulting to logPeriod of 90 days");
567
      logPeriod = 90;
568
    }
569
    metacatURL = properties.getProperty("metacatURL");
570
    password = properties.getProperty("password");
571 2022 costa
    url = properties.getProperty("url");
572
    user = properties.getProperty("user");
573
574
    // Load the jdbc driver
575
    try {
576
      Class.forName(dbDriver);
577
    }
578
    catch (ClassNotFoundException e) {
579
      System.out.println("Can't load driver " + e);
580
      System.exit(1);
581
    }
582
583
    // Make the database connection
584
    try {
585
      System.out.println("Getting connection to Harvester tables");
586
      conn = DriverManager.getConnection(url, user, password);
587
588
      // If a SQLWarning object is available, print its warning(s).
589
      // There may be multiple warnings chained.
590
      warn = conn.getWarnings();
591
592
      if (warn != null) {
593
        while (warn != null) {
594
          System.out.println("SQLState: " + warn.getSQLState());
595
          System.out.println("Message:  " + warn.getMessage());
596
          System.out.println("Vendor: " + warn.getErrorCode());
597
          System.out.println("");
598
          warn = warn.getNextWarning();
599
        }
600
      }
601
    }
602
    catch (SQLException e) {
603
      System.out.println("Database access failed " + e);
604
      System.exit(1);
605
    }
606 2031 costa
607
    initLogIDs();
608
    harvestStartTime = new Date();
609
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
610 2022 costa
611
    if (connectToMetacat()) {
612
      try {
613
        System.out.println("Connecting to Metacat: " + metacatURL);
614
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
615
      }
616
      catch (MetacatInaccessibleException e) {
617
        System.out.println("Metacat connection failed." + e.getMessage());
618
      }
619
      catch (Exception e) {
620
        System.out.println("Metacat connection failed." + e.getMessage());
621
      }
622
    }
623
  }
624
625
626
  /**
627
   * Writes one or more log entries to the HARVEST_LOG table.
628
   */
629
  private void writeHarvestLog() {
630 2031 costa
    HarvestLog harvestLog;
631
632
    for (int i = 0; i < harvestLogList.size(); i++) {
633
      harvestLog = (HarvestLog) harvestLogList.get(i);
634
      harvestLog.printOutput();
635
    }
636 2022 costa
  }
637
638
}