Project

General

Profile

1 2022 costa
/*
2
 * Harvester.java
3
 *
4
 * Created on January 14, 2004, 4:44 PM
5
 */
6
7
package edu.ucsb.nceas.metacat.harvesterClient;
8
9 2031 costa
import java.io.File;
10
import java.io.FileInputStream;
11
import java.io.IOException;
12
import java.sql.Connection;
13
import java.sql.DriverManager;
14
import java.sql.ResultSet;
15
import java.sql.SQLException;
16
import java.sql.SQLWarning;
17
import java.sql.Statement;
18
import java.util.ArrayList;
19
import java.text.SimpleDateFormat;
20
import java.util.Date;
21
import java.util.Properties;
22 2022 costa
23 2031 costa
import edu.ucsb.nceas.metacat.client.Metacat;
24
import edu.ucsb.nceas.metacat.client.MetacatFactory;
25
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
26 2022 costa
27
/**
28
 * Harvester is the main class for the Harvester application. The main
29
 * method creates a single Harvester object which drives the application.
30
 *
31
 * @author    costa
32
 *
33
 */
34
public class Harvester {
35
36
  /*
37
   * Class fields
38
   */
39
40
41
  /*
42
   * Class methods
43
   */
44
45 2031 costa
46 2022 costa
  /**
47
   * Constructor. Creates a new instance of Harvester.
48
   */
49
  public Harvester() {
50
  }
51
52 2031 costa
53 2022 costa
  /**
54
   * Determines whether Harvester is running on a Win32 platform. Used
55 2031 costa
   * during development.
56 2022 costa
   *
57
   * @return    true if this in Win32, false otherwise
58
   */
59
  public static boolean isWin32 () {
60
    boolean isWin32;
61
    String osName = System.getProperty("os.name");
62
63
    isWin32 = (osName.startsWith("Windows"));
64
    return isWin32;
65
  }
66
67
68
  /**
69
    * Harvester main method.
70
    *
71
    * @param args        the command line arguments
72
    * @throws SAXException
73
    * @throws IOException
74
    * @throws ParserConfigurationException
75
    */
76
  public static void main(String[] args) {
77
    Harvester harvester = new Harvester();
78
79
    harvester.startup();                  // Start up Harvester
80
    harvester.readHarvestSiteSchedule();  // Read the database table
81
    harvester.harvest();                  // Harvest the documents
82
    harvester.shutdown();                 // Shut down Harvester
83
  }
84
85
86
  /*
87
   * Object fields
88
   */
89
90 2031 costa
  /** Database connection */
91 2022 costa
  Connection conn;
92 2031 costa
93
  /** Used during development to determine whether to connect to metacat */
94
  private boolean connectToMetacat;
95
96
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
97
  private int detailLogID;
98
99 2061 costa
  /** Email address of the Harvester Administrator */
100
  private String harvesterAdministrator;
101
102 2031 costa
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
103
  private int harvestLogID;
104
105
  /** End time of this harvest session */
106
  private Date harvestEndTime;
107
108
  /** List of HarvestLog objects. Stores log entries for report generation. */
109
  private ArrayList harvestLogList = new ArrayList();
110
111
  /** List of HarvestSiteSchedule objects */
112
  private ArrayList harvestSiteScheduleList = new ArrayList();
113
114
  /** Start time of this harvest session */
115
  private Date harvestStartTime;
116
117
  /** Number of days to save log records. Any that are older are purged. */
118
  int logPeriod;
119
120
  /** Metacat client object */
121 2022 costa
  Metacat metacat;
122 2031 costa
123
  /** Loads harvester properties from configuration file */
124 2022 costa
  Properties properties;
125
126
127
  /*
128
   * Object methods
129
   */
130 2031 costa
131
132
  /*
133
                    Harvester  harvester,
134
                    Date       harvestDate,
135
                    int        status,
136
                    String     message,
137
                    String     harvestOperationCode,
138
                    int        siteScheduleID,
139
                    HarvestDocument harvestDocument,
140
                    String     errorMessage
141
*/
142
  /**
143
   * Creates a new HarvestLog object and adds it to the harvestLogList.
144
   *
145
   * @param  status          the status of the harvest operation
146
   * @param  message         the message text of the harvest operation
147
   * @param  harvestOperationCode  the harvest operation code
148
   * @param  siteScheduleID  the siteScheduleID for which this operation was
149
   *                         performed. 0 indicates that the operation did not
150
   *                         involve a particular harvest site.
151
   * @param  harvestDocument the associated HarvestDocument object. May be null.
152
   * @param  errorMessage    additional error message pertaining to document
153
   *                         error.
154
   */
155
  void addLogEntry(int    status,
156
                   String message,
157
                   String harvestOperationCode,
158
                   int    siteScheduleID,
159
                   HarvestDocument harvestDocument,
160
                   String errorMessage
161
                  ) {
162
    HarvestLog harvestLog;
163 2022 costa
164 2031 costa
    /* If there is no associated harvest document, call the basic constructor;
165
     * else call the extended constructor.
166
     */
167
    if (harvestDocument == null) {
168
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
169
                                  harvestOperationCode, siteScheduleID);
170
    }
171
    else {
172
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
173
                                  harvestOperationCode, siteScheduleID,
174
                                  harvestDocument, errorMessage);
175
    }
176
177
    harvestLogList.add(harvestLog);
178
  }
179
180
181 2022 costa
  /**
182
   * Determines whether Harvester should attempt to connect to Metacat.
183
   * Used during development and testing.
184
   *
185
   * @return     true if Harvester should connect, otherwise false
186
   */
187
  boolean connectToMetacat () {
188
    return connectToMetacat;
189
  }
190 2036 costa
191
192
  /**
193
   * Normalizes text prior to insertion into the HARVEST_LOG or
194
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
195
   * character with the double quote character. This prevents SQL errors
196
   * involving words that contain single quotes. Also removes \n and \r
197
   * characters from the text.
198
   *
199
   * @param text  the original string
200
   * @return      a string containing the normalized text
201
   */
202
  String dequoteText(String text) {
203
    char c;
204
    StringBuffer stringBuffer = new StringBuffer();
205 2022 costa
206 2036 costa
    for (int i = 0; i < text.length(); i++) {
207
      c = text.charAt(i);
208
      switch (c) {
209
        case '\'':
210
          stringBuffer.append('\"');
211
          break;
212
        case '\r':
213
        case '\n':
214
          break;
215
        default:
216
          stringBuffer.append(c);
217
          break;
218
      }
219
    }
220
221
    return stringBuffer.toString();
222
  }
223 2022 costa
224 2036 costa
225 2022 costa
  /**
226 2031 costa
   * Gets the current value of the detailLogID for storage as a primary key in
227
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
228
   *
229
   * @return  the current value of the detailLogID
230
   */
231
  int getDetailLogID() {
232
    int currentValue = detailLogID;
233
234
    detailLogID++;
235
    return currentValue;
236
  }
237
238
239
  /**
240
   * Gets the current value of the harvestLogID for storage as a primary key in
241
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
242
   *
243
   * @return  the current value of the detailLogID
244
   */
245
  int getHarvestLogID() {
246
    int currentValue = harvestLogID;
247
248
    harvestLogID++;
249
    return currentValue;
250
  }
251
252
253
  /**
254
   * Gets the maximum value of an integer field from a table.
255
   *
256
   * @param tableName  the database table name
257
   * @param fieldName  the field name of the integer field in the table
258
   * @return  the maximum integer stored in the fieldName field of tableName
259
   */
260
  private int getMaxValue(String tableName, String fieldName) {
261
    int maxValue = 100;
262
    int fieldValue;
263
		String query = "SELECT " + fieldName + " FROM " + tableName;
264
		Statement stmt;
265
266
		try {
267
			stmt = conn.createStatement();
268
			ResultSet rs = stmt.executeQuery(query);
269
270
			while (rs.next()) {
271
				fieldValue = rs.getInt(fieldName);
272
        maxValue = Math.max(maxValue, fieldValue);
273
			}
274
275
			stmt.close();
276
		}
277
    catch(SQLException ex) {
278
			System.out.println("SQLException: " + ex.getMessage());
279
		}
280
281
    return maxValue;
282
  }
283
284
285
  /**
286
   * Gets the minimum value of an integer field from a table.
287
   *
288
   * @param tableName  the database table name
289
   * @param fieldName  the field name of the integer field in the table
290
   * @return  the minimum integer stored in the fieldName field of tableName
291
   */
292
  private int getMinValue(String tableName, String fieldName) {
293
    int minValue = 0;
294
    int fieldValue;
295
		String query = "SELECT " + fieldName + " FROM " + tableName;
296
		Statement stmt;
297
298
		try {
299
			stmt = conn.createStatement();
300
			ResultSet rs = stmt.executeQuery(query);
301
302
			while (rs.next()) {
303
				fieldValue = rs.getInt(fieldName);
304
305
        if (minValue == 0) {
306
          minValue = fieldValue;
307
        }
308
        else {
309
          minValue = Math.min(minValue, fieldValue);
310
        }
311
			}
312
313
			stmt.close();
314
		}
315
    catch(SQLException ex) {
316
			System.out.println("SQLException: " + ex.getMessage());
317
		}
318
319
    return minValue;
320
  }
321
322
323
  /**
324 2022 costa
   * For every Harvest site schedule in the database, harvest the
325
   * documents for that site if they are due to be harvested.
326
   *
327
   * @throws SAXException
328
   * @throws IOException
329
   * @throws ParserConfigurationException
330
   */
331
  private void harvest() {
332
    HarvestSiteSchedule harvestSiteSchedule;
333
334 2031 costa
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
335
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
336
      harvestSiteSchedule.harvestDocumentList();
337 2022 costa
    }
338
  }
339
340
341
  /**
342 2031 costa
   * Initializes the detailLogID and harvestLogID values to their current
343
   * maximums + 1.
344 2022 costa
   */
345 2031 costa
  private void initLogIDs() {
346
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
347
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
348
  }
349
350
351
  /**
352
   * Loads Harvester properties from a configuration file.
353
   */
354 2022 costa
  private void loadProperties() {
355 2061 costa
    File configfile = new File("../../lib/harvester", "harvester.properties");
356 2022 costa
357
    properties = new Properties();
358
359
    try {
360
      properties.load(new FileInputStream(configfile));
361
      properties.list(System.out);
362
    }
363
    catch (IOException e) {
364 2031 costa
      System.out.println("IOException: " + e.getMessage());
365 2022 costa
      System.exit(1);
366
    }
367
  }
368 2031 costa
369
370
  void printHarvestSiteSchedule(int siteScheduleID) {
371
     HarvestSiteSchedule harvestSiteSchedule;
372
373
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
374
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
375
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
376
        harvestSiteSchedule.printOutput();
377
      }
378
    }
379
  }
380
381
382
  /**
383
   * Prunes old records from the HARVEST_DETAIL_LOG table. Records are
384
   * removed if the HARVEST_LOG_ID foreign key is less than the lowest
385
   * HARVEST_LOG_ID primary key in the HARVEST_LOG table.
386
   */
387
  private void pruneHarvestDetailLog() {
388
		String deleteString;
389
    int minHarvestLogID;
390
    int recordsDeleted;
391
		Statement stmt;
392 2022 costa
393 2031 costa
    minHarvestLogID = getMinValue("HARVEST_LOG", "HARVEST_LOG_ID");
394
    deleteString = "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID < " +
395
                   minHarvestLogID;
396 2022 costa
397 2031 costa
		try {
398
			System.out.print("Pruning log entries from HARVEST_DETAIL_LOG: ");
399
			stmt = conn.createStatement();
400
			recordsDeleted = stmt.executeUpdate(deleteString);
401
			System.out.println(recordsDeleted + " records deleted");
402
			stmt.close();
403
		}
404
    catch(SQLException e) {
405
			System.out.println("SQLException: " + e.getMessage());
406
		}
407
  }
408
409
410 2022 costa
  /**
411 2031 costa
   * Prunes old records from the HARVEST_LOG table. Records are removed if
412
   * their HARVEST_DATE is older than a given number of days, as stored in the
413
   * logPeriod object field.
414
   */
415
  private void pruneHarvestLog() {
416
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
417
    Date dateLastLog;                    // Prune everything prior to this date
418
		String deleteString;
419
    long delta;
420
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
421
    int recordsDeleted;
422
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
423
    String dateString;
424
		Statement stmt;
425
    long timeLastLog = 0;
426
427
    delta = logPeriod * millisecondsPerDay;
428
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
429
    timeLastLog = currentTime - delta;
430
    dateLastLog = new Date(timeLastLog);
431
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
432
    deleteString += dateString;
433
434
		try {
435
			System.out.print("Pruning log entries from HARVEST_LOG: ");
436
			stmt = conn.createStatement();
437
			recordsDeleted = stmt.executeUpdate(deleteString);
438
			System.out.println(recordsDeleted + " records deleted");
439
			stmt.close();
440
		}
441
    catch (SQLException e) {
442
			System.out.println("SQLException: " + e.getMessage());
443
		}
444
  }
445
446
447
  /**
448 2022 costa
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
449
   * a HarvestSiteSchedule object for each row in the table.
450
   */
451
  private void readHarvestSiteSchedule() {
452
    HarvestSiteSchedule harvestSiteSchedule;
453
    ResultSet rs;
454
    SQLWarning warn;
455
    Statement stmt;
456
457
    String contactEmail;
458
    String dateLastHarvest;
459
    String dateNextHarvest;
460
    String documentListURL;
461
    String ldapDN;
462 2031 costa
    String ldapPwd;
463 2022 costa
    int siteScheduleID;
464
    String unit;
465
    int updateFrequency;
466
467
    try {
468
      // Read the HARVEST_SITE_SCHEDULE table
469
      stmt = conn.createStatement();
470
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
471
      warn = rs.getWarnings();
472
473
      if (warn != null) {
474
        System.out.println("\n---Warning---\n");
475
476
        while (warn != null) {
477
          System.out.println("Message: " + warn.getMessage());
478
          System.out.println("SQLState: " + warn.getSQLState());
479
          System.out.print("Vendor error code: ");
480
          System.out.println(warn.getErrorCode());
481
          System.out.println("");
482
          warn = warn.getNextWarning();
483
        }
484
      }
485
486
      while (rs.next()) {
487
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
488
        documentListURL = rs.getString("DOCUMENTLISTURL");
489
        ldapDN = rs.getString("LDAPDN");
490 2031 costa
        ldapPwd = rs.getString("LDAPPWD");
491 2022 costa
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
492
        dateLastHarvest = rs.getString("DATELASTHARVEST");
493
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
494
        unit = rs.getString("UNIT");
495
        contactEmail = rs.getString("CONTACT_EMAIL");
496
497
        warn = rs.getWarnings();
498
499
        if (warn != null) {
500
          System.out.println("\n---Warning---\n");
501
502
          while (warn != null) {
503
            System.out.println("Message: " + warn.getMessage());
504
            System.out.println("SQLState: " + warn.getSQLState());
505
            System.out.print("Vendor error code: ");
506
            System.out.println(warn.getErrorCode());
507
            System.out.println("");
508
            warn = warn.getNextWarning();
509
          }
510
        }
511
512 2031 costa
        harvestSiteSchedule = new HarvestSiteSchedule(this,
513 2022 costa
                                                      siteScheduleID,
514
                                                      documentListURL,
515
                                                      ldapDN,
516 2031 costa
                                                      ldapPwd,
517 2022 costa
                                                      dateNextHarvest,
518
                                                      dateLastHarvest,
519
                                                      updateFrequency,
520
                                                      unit,
521
                                                      contactEmail
522
                                                     );
523 2031 costa
        harvestSiteScheduleList.add(harvestSiteSchedule);
524 2022 costa
      }
525 2031 costa
526
      rs.close();
527
      stmt.close();
528
    }
529 2022 costa
    catch (SQLException e) {
530
      System.out.println("Database access failed " + e);
531
      System.exit(1);
532
    }
533
534
  }
535
536
537
  /**
538
   * Sends a report to the Harvester administrator.
539
   */
540
  void reportToAdministrator() {
541
    System.out.println("\nSending report to administrator.");
542
  }
543
544
545
  /**
546
   * Shuts down Harvester. Performs cleanup operations such as logging out
547
   * of Metacat and disconnecting from the database.
548
   */
549
  private void shutdown() {
550
    // Log shutdown operation
551
    System.out.println("Shutting Down Harvester");
552 2031 costa
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
553
    pruneHarvestLog();
554
    pruneHarvestDetailLog();
555 2022 costa
556
    try {
557
      // Close the database connection
558
      System.out.println("Closing the database connection");
559
      conn.close();
560
    }
561
    catch (SQLException e) {
562
      System.out.println("Database access failed " + e);
563
    }
564 2031 costa
565
    writeHarvestLog();
566
    reportToAdministrator();
567 2022 costa
  }
568
569
570
  /**
571
   * Initializes Harvester at startup. Connects to the database and to Metacat.
572
   */
573
  private void startup() {
574 2031 costa
    Boolean ctm;
575 2022 costa
    String dbDriver;
576 2031 costa
    Integer lp;
577 2022 costa
    String metacatURL;
578
    String osName = Harvester.isWin32() ? "Windows" : "Unix";
579
    String password;
580 2031 costa
    //String response;
581 2022 costa
    String sessionId;
582
		String url;
583
    String user;
584
    String userName = System.getProperty("user.name");
585
    SQLWarning warn;
586
587
    // Log startup operation
588
    System.out.println("*****************************************************");
589
    System.out.println("Starting Up Harvester");
590
    loadProperties();
591 2031 costa
    ctm = Boolean.valueOf(properties.getProperty("connectToMetacat", "true"));
592
    connectToMetacat = ctm.booleanValue();
593 2022 costa
    dbDriver = properties.getProperty("dbDriver");
594 2061 costa
    harvesterAdministrator = properties.getProperty("harvesterAdministrator");
595 2031 costa
    try {
596
      lp = Integer.valueOf(properties.getProperty("logPeriod", "90"));
597
      logPeriod = lp.intValue();
598
    }
599
    catch (NumberFormatException e) {
600
      System.err.println("NumberFormatException: Error parsing logPeriod " +
601
                         logPeriod + e.getMessage());
602
      System.err.println("Defaulting to logPeriod of 90 days");
603
      logPeriod = 90;
604
    }
605
    metacatURL = properties.getProperty("metacatURL");
606
    password = properties.getProperty("password");
607 2022 costa
    url = properties.getProperty("url");
608
    user = properties.getProperty("user");
609
610
    // Load the jdbc driver
611
    try {
612
      Class.forName(dbDriver);
613
    }
614
    catch (ClassNotFoundException e) {
615
      System.out.println("Can't load driver " + e);
616
      System.exit(1);
617
    }
618
619
    // Make the database connection
620
    try {
621
      System.out.println("Getting connection to Harvester tables");
622
      conn = DriverManager.getConnection(url, user, password);
623
624
      // If a SQLWarning object is available, print its warning(s).
625
      // There may be multiple warnings chained.
626
      warn = conn.getWarnings();
627
628
      if (warn != null) {
629
        while (warn != null) {
630
          System.out.println("SQLState: " + warn.getSQLState());
631
          System.out.println("Message:  " + warn.getMessage());
632
          System.out.println("Vendor: " + warn.getErrorCode());
633
          System.out.println("");
634
          warn = warn.getNextWarning();
635
        }
636
      }
637
    }
638
    catch (SQLException e) {
639
      System.out.println("Database access failed " + e);
640
      System.exit(1);
641
    }
642 2031 costa
643
    initLogIDs();
644
    harvestStartTime = new Date();
645
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
646 2022 costa
647
    if (connectToMetacat()) {
648
      try {
649
        System.out.println("Connecting to Metacat: " + metacatURL);
650
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
651
      }
652
      catch (MetacatInaccessibleException e) {
653
        System.out.println("Metacat connection failed." + e.getMessage());
654
      }
655
      catch (Exception e) {
656
        System.out.println("Metacat connection failed." + e.getMessage());
657
      }
658
    }
659
  }
660
661
662
  /**
663
   * Writes one or more log entries to the HARVEST_LOG table.
664
   */
665
  private void writeHarvestLog() {
666 2031 costa
    HarvestLog harvestLog;
667
668
    for (int i = 0; i < harvestLogList.size(); i++) {
669
      harvestLog = (HarvestLog) harvestLogList.get(i);
670
      harvestLog.printOutput();
671
    }
672 2022 costa
  }
673
674
}