Project

General

Profile

1 2094 jones
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the
4
 *                  Regents of the University of California
5 2022 costa
 *
6 2094 jones
 *   '$Author$'
7
 *     '$Date$'
8
 * '$Revision$'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 2022 costa
 */
24
25
package edu.ucsb.nceas.metacat.harvesterClient;
26
27 2086 costa
import com.oreilly.servlet.MailMessage;
28 2155 costa
import edu.ucsb.nceas.utilities.Options;
29 2031 costa
import java.io.File;
30
import java.io.FileInputStream;
31
import java.io.IOException;
32 2086 costa
import java.io.PrintStream;
33 2031 costa
import java.sql.Connection;
34
import java.sql.DriverManager;
35
import java.sql.ResultSet;
36
import java.sql.SQLException;
37
import java.sql.SQLWarning;
38
import java.sql.Statement;
39
import java.util.ArrayList;
40
import java.text.SimpleDateFormat;
41
import java.util.Date;
42 2022 costa
43 2031 costa
import edu.ucsb.nceas.metacat.client.Metacat;
44
import edu.ucsb.nceas.metacat.client.MetacatFactory;
45
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
46 2022 costa
47
/**
48
 * Harvester is the main class for the Harvester application. The main
49
 * method creates a single Harvester object which drives the application.
50
 *
51
 * @author    costa
52
 *
53
 */
54
public class Harvester {
55
56
  /*
57
   * Class fields
58
   */
59 2155 costa
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
60
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
61
  private static final String CONFIG_NAME = "metacat.properties";
62
  public static final String filler = "*";
63
  public static final String marker =
64 2062 costa
"*****************************************************************************";
65 2155 costa
  public static Options options = null;
66 2022 costa
67
68
  /*
69
   * Class methods
70
   */
71
72 2031 costa
73 2022 costa
  /**
74
   * Constructor. Creates a new instance of Harvester.
75
   */
76
  public Harvester() {
77
  }
78
79 2031 costa
80 2022 costa
  /**
81 2155 costa
   * Loads Harvester options from a configuration file.
82 2022 costa
   */
83 2155 costa
  public static void loadOptions(boolean test) {
84
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;
85
    File propertyFile = new File(configDir, CONFIG_NAME);
86 2062 costa
87
    try {
88 2155 costa
      options = Options.initialize(propertyFile);
89
    }
90 2062 costa
    catch (IOException e) {
91 2155 costa
      System.out.println("Error in loading options: " + e.getMessage());
92 2062 costa
    }
93 2022 costa
  }
94
95 2062 costa
96 2022 costa
  /**
97
    * Harvester main method.
98
    *
99
    * @param args        the command line arguments
100
    * @throws SAXException
101
    * @throws IOException
102
    * @throws ParserConfigurationException
103
    */
104
  public static void main(String[] args) {
105 2062 costa
    Integer delayDefault = new Integer(0); // Default number of hours delay
106
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
107
    Integer d;                            // Used for determining delay
108
    long delta;                           // endTime - startTime
109
    long endTime;                         // time that a harvest completes
110
    Harvester harvester;                  // object for a single harvest run
111
    Integer maxHarvestsDefault = new Integer(30);    // Default max harvests
112
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
113
    Integer mh;                              // used in determining max harvests
114
    int nHarvests = 0;                      // counts the number of harvest runs
115 2108 costa
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
116 2062 costa
    Integer periodDefault = new Integer(24); // Default hours between harvests
117
    int period = periodDefault.intValue();   // Hours between harvests
118
    Integer p;                               // Used in determining the period
119 2108 costa
    long startTime;                          // time that a harvest run starts
120 2155 costa
    boolean test = false;                    // Passed to loadOption()
121 2062 costa
122
    System.out.println(marker);
123
    System.out.println("Starting Harvester");
124 2155 costa
    Harvester.loadOptions(test);
125 2062 costa
126
    // Parse the delay property. Use default if necessary.
127
    try {
128 2155 costa
      d = Integer.valueOf(options.getOption("delay"));
129 2062 costa
      delay = d.intValue();
130
    }
131
    catch (NumberFormatException e) {
132
      System.out.println("NumberFormatException: Error parsing delay: " +
133
                         e.getMessage());
134
      System.out.println("Defaulting to delay=" + delayDefault);
135
      delay = delayDefault.intValue();
136
    }
137
138
    // Parse the maxHarvests property. Use default if necessary.
139
    try {
140 2155 costa
      mh = Integer.valueOf(options.getOption("maxHarvests"));
141 2062 costa
      maxHarvests = mh.intValue();
142
    }
143
    catch (NumberFormatException e) {
144
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
145
                         e.getMessage());
146
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
147
      maxHarvests = maxHarvestsDefault.intValue();
148
    }
149
150
    // Parse the period property. Use default if necessary.
151
    try {
152 2155 costa
      p = Integer.valueOf(options.getOption("period"));
153 2062 costa
      period = p.intValue();
154
    }
155
    catch (NumberFormatException e) {
156
      System.out.println("NumberFormatException: Error parsing period: " +
157
                         e.getMessage());
158
      System.out.println("Defaulting to period=" + periodDefault);
159
      period = periodDefault.intValue();
160
    }
161 2022 costa
162 2062 costa
    // Sleep for delay number of hours prior to starting first harvest
163
    if (delay > 0) {
164
      try {
165
        System.out.print("First harvest will begin in " + delay);
166
        if (delay == 1) {
167
          System.out.println(" hour.");
168
        }
169
        else {
170
          System.out.println(" hours.");
171
        }
172 2105 costa
        Thread.sleep(delay * oneHour);
173 2062 costa
      }
174
      catch (InterruptedException e) {
175
          System.err.println("InterruptedException: " + e.getMessage());
176
          System.exit(1);
177
      }
178
    }
179
180
    // Repeat a new harvest once every period number of hours, until we reach
181
    // the maximum number of harvests. Subtract delta from the time period so
182
    // that each harvest will start at a fixed interval.
183
    //
184
    while (nHarvests < maxHarvests) {
185
      nHarvests++;
186
      startTime = System.currentTimeMillis();
187
      harvester = new Harvester();                // New object for this harvest
188
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
189
      harvester.readHarvestSiteSchedule();        // Read the database table
190
      harvester.harvest();                        // Harvest the documents
191
      harvester.shutdown();                       // Shut down Harvester
192
      endTime = System.currentTimeMillis();
193
      delta = endTime - startTime;
194
195
      if (nHarvests < maxHarvests) {
196
        try {
197 2203 costa
          System.out.println("Next harvest will begin in " +
198
                             period + " hours.");
199 2108 costa
          Thread.sleep((period * oneHour) - delta);
200 2062 costa
        }
201
        catch (InterruptedException e) {
202
          System.err.println("InterruptedException: " + e.getMessage());
203
          System.exit(1);
204
        }
205
      }
206
    }
207 2022 costa
  }
208
209
210
  /*
211
   * Object fields
212
   */
213
214 2031 costa
  /** Database connection */
215 2139 costa
  private Connection conn = null;
216 2031 costa
217 2062 costa
  /** Used during development to determine whether to connect to metacat
218
   *  Sometimes it's useful to test parts of the code without actually
219
   *  connecting to Metacat.
220
   */
221 2031 costa
  private boolean connectToMetacat;
222
223
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
224
  private int detailLogID;
225
226 2061 costa
  /** Email address of the Harvester Administrator */
227 2105 costa
  String harvesterAdministrator;
228 2061 costa
229 2031 costa
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
230
  private int harvestLogID;
231
232
  /** End time of this harvest session */
233
  private Date harvestEndTime;
234
235
  /** List of HarvestLog objects. Stores log entries for report generation. */
236
  private ArrayList harvestLogList = new ArrayList();
237
238
  /** List of HarvestSiteSchedule objects */
239
  private ArrayList harvestSiteScheduleList = new ArrayList();
240
241
  /** Start time of this harvest session */
242
  private Date harvestStartTime;
243
244
  /** Number of days to save log records. Any that are older are purged. */
245
  int logPeriod;
246
247
  /** Metacat client object */
248 2022 costa
  Metacat metacat;
249 2031 costa
250 2086 costa
  /** SMTP server for sending mail messages */
251
  String smtpServer;
252
253 2108 costa
  /** The timestamp for this harvest run. Used for output only. */
254
  String timestamp;
255
256 2022 costa
257
  /*
258
   * Object methods
259
   */
260 2031 costa
261
  /**
262
   * Creates a new HarvestLog object and adds it to the harvestLogList.
263
   *
264
   * @param  status          the status of the harvest operation
265
   * @param  message         the message text of the harvest operation
266
   * @param  harvestOperationCode  the harvest operation code
267
   * @param  siteScheduleID  the siteScheduleID for which this operation was
268
   *                         performed. 0 indicates that the operation did not
269
   *                         involve a particular harvest site.
270
   * @param  harvestDocument the associated HarvestDocument object. May be null.
271
   * @param  errorMessage    additional error message pertaining to document
272
   *                         error.
273
   */
274
  void addLogEntry(int    status,
275
                   String message,
276
                   String harvestOperationCode,
277
                   int    siteScheduleID,
278
                   HarvestDocument harvestDocument,
279
                   String errorMessage
280
                  ) {
281
    HarvestLog harvestLog;
282 2139 costa
    int harvestLogID = getHarvestLogID();
283
    int detailLogID;
284 2022 costa
285 2031 costa
    /* If there is no associated harvest document, call the basic constructor;
286
     * else call the extended constructor.
287
     */
288
    if (harvestDocument == null) {
289 2139 costa
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime,
290
                                  status, message, harvestOperationCode,
291
                                  siteScheduleID);
292 2031 costa
    }
293
    else {
294 2139 costa
      detailLogID = getDetailLogID();
295
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID,
296
                                  harvestStartTime, status, message,
297 2031 costa
                                  harvestOperationCode, siteScheduleID,
298
                                  harvestDocument, errorMessage);
299
    }
300
301
    harvestLogList.add(harvestLog);
302
  }
303 2139 costa
304
305
  public void closeConnection() {
306
    try {
307
      // Close the database connection
308
      System.out.println("Closing the database connection.");
309
      conn.close();
310
    }
311
    catch (SQLException e) {
312
      System.out.println("Database access failed " + e);
313
    }
314
  }
315 2031 costa
316
317 2022 costa
  /**
318
   * Determines whether Harvester should attempt to connect to Metacat.
319
   * Used during development and testing.
320
   *
321
   * @return     true if Harvester should connect, otherwise false
322
   */
323
  boolean connectToMetacat () {
324
    return connectToMetacat;
325
  }
326 2036 costa
327
328
  /**
329
   * Normalizes text prior to insertion into the HARVEST_LOG or
330
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
331
   * character with the double quote character. This prevents SQL errors
332
   * involving words that contain single quotes. Also removes \n and \r
333
   * characters from the text.
334
   *
335
   * @param text  the original string
336
   * @return      a string containing the normalized text
337
   */
338 2139 costa
  public String dequoteText(String text) {
339 2036 costa
    char c;
340
    StringBuffer stringBuffer = new StringBuffer();
341 2022 costa
342 2036 costa
    for (int i = 0; i < text.length(); i++) {
343
      c = text.charAt(i);
344
      switch (c) {
345
        case '\'':
346
          stringBuffer.append('\"');
347
          break;
348
        case '\r':
349
        case '\n':
350
          break;
351
        default:
352
          stringBuffer.append(c);
353
          break;
354
      }
355
    }
356
357
    return stringBuffer.toString();
358
  }
359 2139 costa
360
  /**
361
   * Returns a connection to the database. Opens the connection if a connection
362
   * has not already been made previously.
363
   *
364
   * @return  conn  the database Connection object
365
   */
366
  public Connection getConnection() {
367
    String dbDriver = "";
368
		String defaultDB;
369
    String password;
370
    String user;
371
    SQLWarning warn;
372
373
    if (conn == null) {
374 2155 costa
      dbDriver = options.getOption("dbDriver");
375
      defaultDB = options.getOption("defaultDB");
376
      password = options.getOption("password");
377
      user = options.getOption("user");
378 2022 costa
379 2139 costa
      // Load the jdbc driver
380
      try {
381
        Class.forName(dbDriver);
382
      }
383
      catch (ClassNotFoundException e) {
384
        System.out.println("Can't load driver " + e);
385
        System.exit(1);
386
      }
387
388
      // Make the database connection
389
      try {
390
        System.out.println("Getting connection to Harvester tables");
391
        conn = DriverManager.getConnection(defaultDB, user, password);
392
393
        // If a SQLWarning object is available, print its warning(s).
394
        // There may be multiple warnings chained.
395
        warn = conn.getWarnings();
396
397
        if (warn != null) {
398
          while (warn != null) {
399
            System.out.println("SQLState: " + warn.getSQLState());
400
            System.out.println("Message:  " + warn.getMessage());
401
            System.out.println("Vendor: " + warn.getErrorCode());
402
            System.out.println("");
403
            warn = warn.getNextWarning();
404
          }
405
        }
406
      }
407
      catch (SQLException e) {
408
        System.out.println("Database access failed " + e);
409
        System.exit(1);
410
      }
411
    }
412
413
    return conn;
414
  }
415
416
417 2022 costa
  /**
418 2031 costa
   * Gets the current value of the detailLogID for storage as a primary key in
419
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
420
   *
421
   * @return  the current value of the detailLogID
422
   */
423 2139 costa
  public int getDetailLogID() {
424 2031 costa
    int currentValue = detailLogID;
425
426
    detailLogID++;
427
    return currentValue;
428
  }
429
430
431
  /**
432
   * Gets the current value of the harvestLogID for storage as a primary key in
433
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
434
   *
435
   * @return  the current value of the detailLogID
436
   */
437 2139 costa
  public int getHarvestLogID() {
438 2031 costa
    int currentValue = harvestLogID;
439
440
    harvestLogID++;
441
    return currentValue;
442
  }
443
444
445
  /**
446
   * Gets the maximum value of an integer field from a table.
447
   *
448
   * @param tableName  the database table name
449
   * @param fieldName  the field name of the integer field in the table
450
   * @return  the maximum integer stored in the fieldName field of tableName
451
   */
452
  private int getMaxValue(String tableName, String fieldName) {
453 2139 costa
    int maxValue = 0;
454 2031 costa
    int fieldValue;
455
		String query = "SELECT " + fieldName + " FROM " + tableName;
456
		Statement stmt;
457
458
		try {
459
			stmt = conn.createStatement();
460
			ResultSet rs = stmt.executeQuery(query);
461
462
			while (rs.next()) {
463
				fieldValue = rs.getInt(fieldName);
464
        maxValue = Math.max(maxValue, fieldValue);
465
			}
466
467
			stmt.close();
468
		}
469
    catch(SQLException ex) {
470
			System.out.println("SQLException: " + ex.getMessage());
471
		}
472
473
    return maxValue;
474
  }
475
476
477
  /**
478
   * Gets the minimum value of an integer field from a table.
479
   *
480
   * @param tableName  the database table name
481
   * @param fieldName  the field name of the integer field in the table
482
   * @return  the minimum integer stored in the fieldName field of tableName
483
   */
484
  private int getMinValue(String tableName, String fieldName) {
485
    int minValue = 0;
486
    int fieldValue;
487
		String query = "SELECT " + fieldName + " FROM " + tableName;
488
		Statement stmt;
489
490
		try {
491
			stmt = conn.createStatement();
492
			ResultSet rs = stmt.executeQuery(query);
493
494
			while (rs.next()) {
495
				fieldValue = rs.getInt(fieldName);
496
497
        if (minValue == 0) {
498
          minValue = fieldValue;
499
        }
500
        else {
501
          minValue = Math.min(minValue, fieldValue);
502
        }
503
			}
504
505
			stmt.close();
506
		}
507
    catch(SQLException ex) {
508
			System.out.println("SQLException: " + ex.getMessage());
509
		}
510
511
    return minValue;
512
  }
513
514
515
  /**
516 2022 costa
   * For every Harvest site schedule in the database, harvest the
517
   * documents for that site if they are due to be harvested.
518
   *
519
   * @throws SAXException
520
   * @throws IOException
521
   * @throws ParserConfigurationException
522
   */
523
  private void harvest() {
524
    HarvestSiteSchedule harvestSiteSchedule;
525
526 2031 costa
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
527
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
528
      harvestSiteSchedule.harvestDocumentList();
529 2022 costa
    }
530
  }
531
532
533
  /**
534 2031 costa
   * Initializes the detailLogID and harvestLogID values to their current
535
   * maximums + 1.
536 2022 costa
   */
537 2139 costa
  public void initLogIDs() {
538 2031 costa
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
539
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
540
  }
541
542 2062 costa
543 2031 costa
  /**
544 2155 costa
   * Prints the header of the harvest report.
545 2086 costa
   *
546 2155 costa
   * @param out            the PrintStream object to print to
547
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
548
   *                       have a value of 0 if no particular site is involved,
549
   *                       which indicates that the report is being prepared
550
   *                       for the Harvester Administrator rather than for a
551
   *                       particular Site Contact.
552 2086 costa
   */
553 2155 costa
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
554 2086 costa
    HarvestLog harvestLog;
555 2105 costa
    int logSiteScheduleID;
556
    int nErrors = 0;
557
    String phrase;
558 2086 costa
559
    for (int i = 0; i < harvestLogList.size(); i++) {
560
      harvestLog = (HarvestLog) harvestLogList.get(i);
561 2105 costa
      logSiteScheduleID = harvestLog.getSiteScheduleID();
562
563
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
564
        if (harvestLog.isErrorEntry()) {
565
          nErrors++;
566
        }
567
      }
568 2086 costa
    }
569 2105 costa
570
    out.println(marker);
571 2155 costa
    out.println(filler);
572 2108 costa
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
573 2155 costa
    out.println(filler);
574 2105 costa
575
    if (nErrors > 0) {
576
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
577
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
578
      out.println("* Please see the log entries below for additonal details.");
579
    }
580
    else {
581
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
582
    }
583
584 2155 costa
    out.println(filler);
585 2105 costa
    out.println(marker);
586 2155 costa
  }
587
588 2105 costa
589 2155 costa
  /**
590
   * Prints harvest log entries for this harvest run. Entries may be filtered
591
   * for a particular site, or all entries may be printed.
592
   *
593
   * @param out            the PrintStream object to write to
594
   * @param maxCodeLevel   the maximum code level that should be printed,
595
   *                       e.g. "warning". Any log entries higher than this
596
   *                       level will not be printed.
597
   * @param siteScheduleID if greater than 0, indicates that the log
598
   *                       entry should only be printed for a particular site
599
   *                       as identified by its siteScheduleID. if 0, then
600
   *                       print output for all sites.
601
   */
602
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
603
                      ) {
604
    HarvestLog harvestLog;
605
    int logSiteScheduleID;
606
    int nErrors = 0;
607
    String phrase;
608
609
    out.println("");
610
    out.println(marker);
611
    out.println(filler);
612
    out.println("*                       LOG ENTRIES");
613
    out.println(filler);
614
    out.println(marker);
615
616 2105 costa
    for (int i = 0; i < harvestLogList.size(); i++) {
617
      harvestLog = (HarvestLog) harvestLogList.get(i);
618
      logSiteScheduleID = harvestLog.getSiteScheduleID();
619
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
620
        harvestLog.printOutput(out, maxCodeLevel);
621
      }
622
    }
623 2086 costa
  }
624
625
626
  /**
627 2062 costa
   * Prints the site schedule data for a given site.
628
   *
629 2086 costa
   * @param out              the PrintStream to write to
630 2062 costa
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
631 2031 costa
   */
632 2086 costa
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
633 2031 costa
     HarvestSiteSchedule harvestSiteSchedule;
634
635
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
636
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
637
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
638 2086 costa
        harvestSiteSchedule.printOutput(out);
639 2031 costa
      }
640
    }
641
  }
642
643
644
  /**
645
   * Prunes old records from the HARVEST_DETAIL_LOG table. Records are
646
   * removed if the HARVEST_LOG_ID foreign key is less than the lowest
647
   * HARVEST_LOG_ID primary key in the HARVEST_LOG table.
648
   */
649
  private void pruneHarvestDetailLog() {
650
		String deleteString;
651
    int minHarvestLogID;
652
    int recordsDeleted;
653
		Statement stmt;
654 2022 costa
655 2031 costa
    minHarvestLogID = getMinValue("HARVEST_LOG", "HARVEST_LOG_ID");
656
    deleteString = "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID < " +
657
                   minHarvestLogID;
658 2022 costa
659 2031 costa
		try {
660
			System.out.print("Pruning log entries from HARVEST_DETAIL_LOG: ");
661 2203 costa
      System.out.println(deleteString);
662 2031 costa
			stmt = conn.createStatement();
663
			recordsDeleted = stmt.executeUpdate(deleteString);
664
			System.out.println(recordsDeleted + " records deleted");
665
			stmt.close();
666
		}
667
    catch(SQLException e) {
668
			System.out.println("SQLException: " + e.getMessage());
669
		}
670
  }
671
672
673 2022 costa
  /**
674 2031 costa
   * Prunes old records from the HARVEST_LOG table. Records are removed if
675
   * their HARVEST_DATE is older than a given number of days, as stored in the
676
   * logPeriod object field.
677
   */
678
  private void pruneHarvestLog() {
679
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
680
    Date dateLastLog;                    // Prune everything prior to this date
681
		String deleteString;
682
    long delta;
683
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
684
    int recordsDeleted;
685
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
686
    String dateString;
687
		Statement stmt;
688
    long timeLastLog = 0;
689
690
    delta = logPeriod * millisecondsPerDay;
691
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
692
    timeLastLog = currentTime - delta;
693
    dateLastLog = new Date(timeLastLog);
694
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
695
    deleteString += dateString;
696
697
		try {
698
			System.out.print("Pruning log entries from HARVEST_LOG: ");
699 2203 costa
      System.out.println(deleteString);
700 2031 costa
			stmt = conn.createStatement();
701
			recordsDeleted = stmt.executeUpdate(deleteString);
702
			System.out.println(recordsDeleted + " records deleted");
703
			stmt.close();
704
		}
705
    catch (SQLException e) {
706
			System.out.println("SQLException: " + e.getMessage());
707
		}
708
  }
709
710
711
  /**
712 2022 costa
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
713
   * a HarvestSiteSchedule object for each row in the table.
714
   */
715
  private void readHarvestSiteSchedule() {
716
    HarvestSiteSchedule harvestSiteSchedule;
717
    ResultSet rs;
718
    SQLWarning warn;
719
    Statement stmt;
720
721
    String contactEmail;
722
    String dateLastHarvest;
723
    String dateNextHarvest;
724
    String documentListURL;
725
    String ldapDN;
726 2031 costa
    String ldapPwd;
727 2022 costa
    int siteScheduleID;
728
    String unit;
729
    int updateFrequency;
730
731
    try {
732
      // Read the HARVEST_SITE_SCHEDULE table
733
      stmt = conn.createStatement();
734
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
735
      warn = rs.getWarnings();
736
737
      if (warn != null) {
738
        System.out.println("\n---Warning---\n");
739
740
        while (warn != null) {
741
          System.out.println("Message: " + warn.getMessage());
742
          System.out.println("SQLState: " + warn.getSQLState());
743
          System.out.print("Vendor error code: ");
744
          System.out.println(warn.getErrorCode());
745
          System.out.println("");
746
          warn = warn.getNextWarning();
747
        }
748
      }
749
750
      while (rs.next()) {
751
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
752
        documentListURL = rs.getString("DOCUMENTLISTURL");
753
        ldapDN = rs.getString("LDAPDN");
754 2031 costa
        ldapPwd = rs.getString("LDAPPWD");
755 2022 costa
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
756
        dateLastHarvest = rs.getString("DATELASTHARVEST");
757
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
758
        unit = rs.getString("UNIT");
759
        contactEmail = rs.getString("CONTACT_EMAIL");
760
761
        warn = rs.getWarnings();
762
763
        if (warn != null) {
764
          System.out.println("\n---Warning---\n");
765
766
          while (warn != null) {
767
            System.out.println("Message: " + warn.getMessage());
768
            System.out.println("SQLState: " + warn.getSQLState());
769
            System.out.print("Vendor error code: ");
770
            System.out.println(warn.getErrorCode());
771
            System.out.println("");
772
            warn = warn.getNextWarning();
773
          }
774
        }
775
776 2031 costa
        harvestSiteSchedule = new HarvestSiteSchedule(this,
777 2022 costa
                                                      siteScheduleID,
778
                                                      documentListURL,
779
                                                      ldapDN,
780 2031 costa
                                                      ldapPwd,
781 2022 costa
                                                      dateNextHarvest,
782
                                                      dateLastHarvest,
783
                                                      updateFrequency,
784
                                                      unit,
785
                                                      contactEmail
786
                                                     );
787 2031 costa
        harvestSiteScheduleList.add(harvestSiteSchedule);
788 2022 costa
      }
789 2031 costa
790
      rs.close();
791
      stmt.close();
792
    }
793 2022 costa
    catch (SQLException e) {
794
      System.out.println("Database access failed " + e);
795
      System.exit(1);
796
    }
797
798
  }
799
800
801
  /**
802 2155 costa
   * Sends a report to the Harvester Administrator. The report prints each log
803 2086 costa
   * entry pertaining to this harvest run.
804 2105 costa
   *
805
   * @param maxCodeLevel  the maximum code level that should be printed,
806
   *                      e.g. "warning". Any log entries higher than this
807
   *                      level will not be printed.
808 2022 costa
   */
809 2105 costa
  void reportToAdministrator(String maxCodeLevel) {
810 2086 costa
    PrintStream body;
811
    String from = harvesterAdministrator;
812 2330 costa
    String[] fromArray;
813 2086 costa
    MailMessage msg;
814 2105 costa
    int siteScheduleID = 0;
815 2108 costa
    String subject = "Report from Metacat Harvester: " + timestamp;
816 2086 costa
    String to = harvesterAdministrator;
817
818
    if (!to.equals("")) {
819
      System.out.println("Sending report to Harvester Administrator at address "
820
                         + harvesterAdministrator);
821
822
      try {
823
        msg = new MailMessage(smtpServer);
824 2330 costa
825
        if (from.indexOf(',') > 0) {
826
          fromArray = from.split(",");
827
828
          for (int i = 0; i < fromArray.length; i++) {
829
            if (i == 0) {
830
              msg.from(fromArray[i]);
831
            }
832
833
            msg.to(fromArray[i]);
834
          }
835
        }
836
        else if (from.indexOf(';') > 0) {
837
          fromArray = from.split(";");
838
839
          for (int i = 0; i < fromArray.length; i++) {
840
            if (i == 0) {
841
              msg.from(fromArray[i]);
842
            }
843
844
            msg.to(fromArray[i]);
845
          }
846
        }
847
        else {
848
          msg.from(from);
849
          msg.to(to);
850
        }
851
852 2086 costa
        msg.setSubject(subject);
853
        body = msg.getPrintStream();
854 2155 costa
        printHarvestHeader(body, siteScheduleID);
855 2105 costa
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
856 2086 costa
        msg.sendAndClose();
857
      }
858
      catch (IOException e) {
859
        System.out.println("There was a problem sending email to " + to);
860
        System.out.println("IOException: " + e.getMessage());
861
      }
862 2139 costa
    }
863 2022 costa
  }
864 2139 costa
865
866
  /**
867
   * Sets the harvest start time for this harvest run.
868
   *
869
   * @param date
870
   */
871
  public void setHarvestStartTime(Date date) {
872
    harvestStartTime = date;
873
  }
874 2022 costa
875
876
  /**
877
   * Shuts down Harvester. Performs cleanup operations such as logging out
878
   * of Metacat and disconnecting from the database.
879
   */
880
  private void shutdown() {
881 2105 costa
    String maxCodeLevel = "debug";  // Print all log entries from level 1
882
                                    // ("error") to level 5 ("debug")
883
    int siteScheduleID = 0;
884
885 2022 costa
    // Log shutdown operation
886
    System.out.println("Shutting Down Harvester");
887 2031 costa
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
888
    pruneHarvestLog();
889
    pruneHarvestDetailLog();
890 2139 costa
    closeConnection();
891 2105 costa
    // Print log to standard output and then email the Harvester administrator
892
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
893
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
894 2022 costa
  }
895
896
897
  /**
898
   * Initializes Harvester at startup. Connects to the database and to Metacat.
899 2062 costa
   *
900
   * @param nHarvests        the nth harvest
901
   * @param maxHarvests      the maximum number of harvests that this process
902
   *                         can run
903 2022 costa
   */
904 2062 costa
  private void startup(int nHarvests, int maxHarvests) {
905 2031 costa
    Boolean ctm;
906 2155 costa
    String httpserver;
907 2031 costa
    Integer lp;
908 2022 costa
    String metacatURL;
909 2139 costa
    Date now = new Date();
910 2305 costa
    String servletPath;
911 2022 costa
912 2139 costa
    timestamp = now.toString();
913 2062 costa
    System.out.println(Harvester.marker);
914 2108 costa
    System.out.println(timestamp + ": Starting Next Harvest (" +
915 2062 costa
                       nHarvests + "/" + maxHarvests + ")");
916 2155 costa
    ctm = Boolean.valueOf(options.getOption("connectToMetacat"));
917 2031 costa
    connectToMetacat = ctm.booleanValue();
918 2155 costa
    harvesterAdministrator = options.getOption("harvesterAdministrator");
919
    smtpServer = options.getOption("smtpServer");
920 2062 costa
921 2031 costa
    try {
922 2155 costa
      lp = Integer.valueOf(options.getOption("logPeriod"));
923 2031 costa
      logPeriod = lp.intValue();
924
    }
925
    catch (NumberFormatException e) {
926
      System.err.println("NumberFormatException: Error parsing logPeriod " +
927
                         logPeriod + e.getMessage());
928
      System.err.println("Defaulting to logPeriod of 90 days");
929
      logPeriod = 90;
930
    }
931 2062 costa
932 2139 costa
    conn = getConnection();
933 2031 costa
    initLogIDs();
934 2139 costa
    setHarvestStartTime(now);
935
    // Log startup operation
936 2031 costa
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
937 2022 costa
938
    if (connectToMetacat()) {
939
      try {
940 2155 costa
        httpserver = options.getOption("httpserver");
941 2305 costa
        servletPath = options.getOption("servletpath");
942
        metacatURL = httpserver + servletPath;
943 2022 costa
        System.out.println("Connecting to Metacat: " + metacatURL);
944
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
945
      }
946
      catch (MetacatInaccessibleException e) {
947
        System.out.println("Metacat connection failed." + e.getMessage());
948
      }
949
      catch (Exception e) {
950
        System.out.println("Metacat connection failed." + e.getMessage());
951
      }
952
    }
953
  }
954
955
}