Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2006-05-01 11:20:15 -0700 (Mon, 01 May 2006) $'
8
 * '$Revision: 2995 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import edu.ucsb.nceas.utilities.Options;
29
import java.io.File;
30
import java.io.IOException;
31
import java.io.PrintStream;
32
import java.sql.Connection;
33
import java.sql.DriverManager;
34
import java.sql.ResultSet;
35
import java.sql.SQLException;
36
import java.sql.SQLWarning;
37
import java.sql.Statement;
38
import java.util.ArrayList;
39
import java.text.SimpleDateFormat;
40
import java.util.Date;
41

    
42
import edu.ucsb.nceas.metacat.client.Metacat;
43
import edu.ucsb.nceas.metacat.client.MetacatFactory;
44
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
45

    
46
/**
47
 * Harvester is the main class for the Harvester application. The main
48
 * method creates a single Harvester object which drives the application.
49
 * 
50
 * @author    costa
51
 * 
52
 */
53
public class Harvester {
54

    
55
  /*
56
   * Class fields
57
   */
58
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
59
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
60
  private static final String CONFIG_NAME = "metacat.properties";
61
  public static final String filler = "*";
62
  private static boolean keepRunning = true;
63
  public static final String marker =
64
"*****************************************************************************";
65
  public static Options options = null;
66
  private static String schemaLocation = null;
67
   
68

    
69
  /* 
70
   * Class methods
71
   */
72
   
73

    
74
  /**
75
   * Constructor. Creates a new instance of Harvester.
76
   */
77
  public Harvester() {
78
  }
79
    
80

    
81
  /**
82
   * Loads Harvester options from a configuration file.
83
   */
84
  public static void loadOptions(boolean test) {
85
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
86
    File propertyFile = new File(configDir, CONFIG_NAME);
87

    
88
    try {
89
      options = Options.initialize(propertyFile);
90
    } 
91
    catch (IOException e) {
92
      System.out.println("Error in loading options: " + e.getMessage());
93
    }
94
  }
95
  
96
  
97
  /**
98
    * Harvester main method.
99
    * 
100
    * @param args        the command line arguments
101
    * @throws SAXException
102
    * @throws IOException
103
    * @throws ParserConfigurationException
104
    */
105
  public static void main(String[] args) {
106
    Integer delayDefault = new Integer(0); // Default number of hours delay
107
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
108
    Integer d;                            // Used for determining delay
109
    long delta;                           // endTime - startTime
110
    long endTime;                         // time that a harvest completes
111
    Harvester harvester;                  // object for a single harvest run
112
    Integer maxHarvestsDefault = new Integer(0);     // Default max harvests
113
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
114
    Integer mh;                              // used in determining max harvests
115
    int nHarvests = 0;                      // counts the number of harvest runs
116
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
117
    Integer periodDefault = new Integer(24); // Default hours between harvests
118
    int period = periodDefault.intValue();   // Hours between harvests
119
    Integer p;                               // Used in determining the period
120
    long startTime;                          // time that a harvest run starts
121
    boolean test = false;                    // Passed to loadOption()
122
    
123
    if (args.length > 0) {
124
      schemaLocation = args[0];
125
      System.err.println("schemaLocation: " + schemaLocation);
126
      
127
      try {
128
        Thread.sleep(10000);
129
      } 
130
      catch (InterruptedException e) {
131
        e.printStackTrace();
132
      }
133
    }
134

    
135
    System.out.println(marker);
136
    System.out.println("Starting Harvester");
137
    Harvester.loadOptions(test);
138

    
139
    // Parse the delay property. Use default if necessary.    
140
    try {
141
      d = Integer.valueOf(options.getOption("delay"));
142
      delay = d.intValue();
143
    }
144
    catch (NumberFormatException e) {
145
      System.out.println("NumberFormatException: Error parsing delay: " +
146
                         e.getMessage());
147
      System.out.println("Defaulting to delay=" + delayDefault);
148
      delay = delayDefault.intValue();
149
    }
150

    
151
    // Parse the maxHarvests property. Use default if necessary.    
152
    try {
153
      mh = Integer.valueOf(options.getOption("maxHarvests"));
154
      maxHarvests = mh.intValue();
155
    }
156
    catch (NumberFormatException e) {
157
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
158
                         e.getMessage());
159
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
160
      maxHarvests = maxHarvestsDefault.intValue();
161
    }
162

    
163
    // Parse the period property. Use default if necessary.    
164
    try {
165
      p = Integer.valueOf(options.getOption("period"));
166
      period = p.intValue();
167
    }
168
    catch (NumberFormatException e) {
169
      System.out.println("NumberFormatException: Error parsing period: " +
170
                         e.getMessage());
171
      System.out.println("Defaulting to period=" + periodDefault);
172
      period = periodDefault.intValue();
173
    }
174
    
175
    // Sleep for delay number of hours prior to starting first harvest
176
    if (delay > 0) {
177
      try {
178
        System.out.print("First harvest will begin in " + delay);
179
        if (delay == 1) {
180
          System.out.println(" hour.");
181
        }
182
        else {
183
          System.out.println(" hours.");
184
        }
185
        Thread.sleep(delay * oneHour);
186
      }
187
      catch (InterruptedException e) {
188
        System.err.println("InterruptedException: " + e.getMessage());
189
        System.exit(1);
190
      }
191
    }
192

    
193
    // Repeat a new harvest once every period number of hours, until we reach
194
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
195
    // Subtract delta from the time period so 
196
    // that each harvest will start at a fixed interval.
197
    //
198
    while (keepRunning && ((nHarvests < maxHarvests) || (maxHarvests <= 0))) {
199
      nHarvests++;
200
      startTime = System.currentTimeMillis();
201
      harvester = new Harvester();                // New object for this harvest
202
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
203
      harvester.readHarvestSiteSchedule();        // Read the database table
204
      harvester.harvest();                        // Harvest the documents
205
      harvester.shutdown();                       // Shut down Harvester
206
      endTime = System.currentTimeMillis();
207
      delta = endTime - startTime;
208

    
209
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
210
        try {
211
          System.out.println("Next harvest will begin in " + 
212
                             period + " hours.");
213
          Thread.sleep((period * oneHour) - delta);
214
        }
215
        catch (InterruptedException e) {
216
          System.err.println("InterruptedException: " + e.getMessage());
217
          System.exit(1);
218
        }
219
      }
220
    }
221
  }
222
  
223
  
224
  /**
225
   * Set the keepRunning flag. If set to false, the main program will end
226
   * the while loop that keeps harvester running every period number of hours.
227
   * The static method is intended to be called from the HarvesterServlet class
228
   * which creates a thread to run Harvester. When the thread is destroyed, the
229
   * thread's destroy() method calls Harvester.setKeepRunning(false).
230
   * 
231
   * @param keepRunning
232
   */
233
  static void setKeepRunning(boolean keepRunning) {
234
    Harvester.keepRunning = keepRunning;
235
  }
236

    
237
  
238
  /*
239
   * Object fields
240
   */
241

    
242
  /** Database connection */
243
  private Connection conn = null;
244
  
245
  /** Used during development to determine whether to connect to metacat 
246
   *  Sometimes it's useful to test parts of the code without actually
247
   *  connecting to Metacat.
248
   */
249
  private boolean connectToMetacat;
250

    
251
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
252
  private int detailLogID;
253
  
254
  /** Email address of the Harvester Administrator */
255
  String harvesterAdministrator;
256
  
257
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
258
  private int harvestLogID;
259
  
260
  /** End time of this harvest session */
261
  private Date harvestEndTime;
262
  
263
  /** List of HarvestLog objects. Stores log entries for report generation. */
264
  private ArrayList harvestLogList = new ArrayList();
265
  
266
  /** List of HarvestSiteSchedule objects */
267
  private ArrayList harvestSiteScheduleList = new ArrayList();
268
  
269
  /** Start time of this harvest session */
270
  private Date harvestStartTime;
271
  
272
  /** Number of days to save log records. Any that are older are purged. */
273
  int logPeriod;
274
  
275
  /** Metacat client object */
276
  Metacat metacat;
277
  
278
  /** SMTP server for sending mail messages */
279
  String smtpServer;
280
  
281
  /** The timestamp for this harvest run. Used for output only. */
282
  String timestamp;
283
  
284

    
285
  /*
286
   * Object methods
287
   */
288
   
289
  /**
290
   * Creates a new HarvestLog object and adds it to the harvestLogList.
291
   * 
292
   * @param  status          the status of the harvest operation
293
   * @param  message         the message text of the harvest operation
294
   * @param  harvestOperationCode  the harvest operation code
295
   * @param  siteScheduleID  the siteScheduleID for which this operation was
296
   *                         performed. 0 indicates that the operation did not
297
   *                         involve a particular harvest site.
298
   * @param  harvestDocument the associated HarvestDocument object. May be null.
299
   * @param  errorMessage    additional error message pertaining to document
300
   *                         error.
301
   */
302
  void addLogEntry(int    status,
303
                   String message,
304
                   String harvestOperationCode,
305
                   int    siteScheduleID,
306
                   HarvestDocument harvestDocument,
307
                   String errorMessage
308
                  ) {
309
    HarvestLog harvestLog;
310
    int harvestLogID = getHarvestLogID();
311
    int detailLogID;
312

    
313
    /* If there is no associated harvest document, call the basic constructor;
314
     * else call the extended constructor.
315
     */
316
    if (harvestDocument == null) {    
317
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
318
                                  status, message, harvestOperationCode, 
319
                                  siteScheduleID);
320
    }
321
    else {
322
      detailLogID = getDetailLogID();
323
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
324
                                  harvestStartTime, status, message,
325
                                  harvestOperationCode, siteScheduleID,
326
                                  harvestDocument, errorMessage);
327
    }
328
    
329
    harvestLogList.add(harvestLog);
330
  }
331
  
332
  
333
  public void closeConnection() {
334
    try {
335
      // Close the database connection
336
      System.out.println("Closing the database connection.");
337
      conn.close();
338
    }
339
    catch (SQLException e) {
340
      System.out.println("Database access failed " + e);
341
    }    
342
  }
343

    
344

    
345
  /**
346
   * Determines whether Harvester should attempt to connect to Metacat.
347
   * Used during development and testing.
348
   * 
349
   * @return     true if Harvester should connect, otherwise false
350
   */
351
  boolean connectToMetacat () {
352
    return connectToMetacat;
353
  }
354
  
355

    
356
  /**
357
   * Normalizes text prior to insertion into the HARVEST_LOG or
358
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
359
   * character with the double quote character. This prevents SQL errors
360
   * involving words that contain single quotes. Also removes \n and \r
361
   * characters from the text.
362
   * 
363
   * @param text  the original string
364
   * @return      a string containing the normalized text
365
   */
366
  public String dequoteText(String text) {
367
    char c;
368
    StringBuffer stringBuffer = new StringBuffer();
369
    
370
    for (int i = 0; i < text.length(); i++) {
371
      c = text.charAt(i);
372
      switch (c) {
373
        case '\'':
374
          stringBuffer.append('\"');
375
          break;
376
        case '\r':
377
        case '\n':
378
          break;
379
        default:
380
          stringBuffer.append(c);
381
          break;
382
      }
383
    }
384
    
385
    return stringBuffer.toString();
386
  }
387
  
388
  /**
389
   * Returns a connection to the database. Opens the connection if a connection
390
   * has not already been made previously.
391
   * 
392
   * @return  conn  the database Connection object
393
   */
394
  public Connection getConnection() {
395
    String dbDriver = "";
396
    String defaultDB;
397
    String password;
398
    String user;
399
    SQLWarning warn;
400
    
401
    if (conn == null) {
402
      dbDriver = options.getOption("dbDriver");
403
      defaultDB = options.getOption("defaultDB");
404
      password = options.getOption("password");
405
      user = options.getOption("user");
406

    
407
      // Load the jdbc driver
408
      try {
409
        Class.forName(dbDriver);
410
      }
411
      catch (ClassNotFoundException e) {
412
        System.out.println("Can't load driver " + e);
413
        System.exit(1);
414
      } 
415

    
416
      // Make the database connection
417
      try {
418
        System.out.println("Getting connection to Harvester tables");
419
        conn = DriverManager.getConnection(defaultDB, user, password);
420

    
421
        // If a SQLWarning object is available, print its warning(s).
422
        // There may be multiple warnings chained.
423
        warn = conn.getWarnings();
424
      
425
        if (warn != null) {
426
          while (warn != null) {
427
            System.out.println("SQLState: " + warn.getSQLState());
428
            System.out.println("Message:  " + warn.getMessage());
429
            System.out.println("Vendor: " + warn.getErrorCode());
430
            System.out.println("");
431
            warn = warn.getNextWarning();
432
          }
433
        }
434
      }
435
      catch (SQLException e) {
436
        System.out.println("Database access failed " + e);
437
        System.exit(1);
438
      }
439
    }
440
    
441
    return conn;
442
  }
443

    
444

    
445
  /**
446
   * Gets the current value of the detailLogID for storage as a primary key in
447
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
448
   * 
449
   * @return  the current value of the detailLogID
450
   */
451
  public int getDetailLogID() {
452
    int currentValue = detailLogID;
453
    
454
    detailLogID++;
455
    return currentValue;
456
  }
457
  
458
  
459
  /**
460
   * Gets the current value of the harvestLogID for storage as a primary key in
461
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
462
   * 
463
   * @return  the current value of the detailLogID
464
   */
465
  public int getHarvestLogID() {
466
    int currentValue = harvestLogID;
467
    
468
    harvestLogID++;
469
    return currentValue;
470
  }
471
  
472

    
473
  /** 
474
   * Gets the maximum value of an integer field from a table.
475
   * 
476
   * @param tableName  the database table name
477
   * @param fieldName  the field name of the integer field in the table
478
   * @return  the maximum integer stored in the fieldName field of tableName
479
   */
480
  private int getMaxValue(String tableName, String fieldName) {
481
    int maxValue = 0;
482
    int fieldValue;
483
    String query = "SELECT " + fieldName + " FROM " + tableName;
484
    Statement stmt;
485
    
486
	try {
487
      stmt = conn.createStatement();
488
      ResultSet rs = stmt.executeQuery(query);
489
	
490
      while (rs.next()) {
491
        fieldValue = rs.getInt(fieldName);
492
        maxValue = Math.max(maxValue, fieldValue);
493
      }
494
      
495
      stmt.close();
496
    } 
497
    catch(SQLException ex) {
498
      System.out.println("SQLException: " + ex.getMessage());
499
    }
500
    
501
    return maxValue;
502
  }
503
  
504
  
505
  /** 
506
   * Gets the minimum value of an integer field from a table.
507
   * 
508
   * @param tableName  the database table name
509
   * @param fieldName  the field name of the integer field in the table
510
   * @return  the minimum integer stored in the fieldName field of tableName
511
   */
512
  private int getMinValue(String tableName, String fieldName) {
513
    int minValue = 0;
514
    int fieldValue;
515
    String query = "SELECT " + fieldName + " FROM " + tableName;
516
    Statement stmt;
517
    
518
    try {
519
      stmt = conn.createStatement();
520
      ResultSet rs = stmt.executeQuery(query);
521
	
522
      while (rs.next()) {
523
        fieldValue = rs.getInt(fieldName);
524

    
525
        if (minValue == 0) {
526
          minValue = fieldValue;
527
        }
528
        else {
529
          minValue = Math.min(minValue, fieldValue);
530
        }
531
      }
532
      
533
      stmt.close();
534
    } 
535
    catch(SQLException ex) {
536
      System.out.println("SQLException: " + ex.getMessage());
537
    }
538

    
539
    return minValue;
540
  }
541
  
542
  
543
  /**
544
   * For every Harvest site schedule in the database, harvest the
545
   * documents for that site if they are due to be harvested.
546
   * 
547
   * @throws SAXException
548
   * @throws IOException
549
   * @throws ParserConfigurationException
550
   */
551
  private void harvest() {
552
    HarvestSiteSchedule harvestSiteSchedule;
553

    
554
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
555
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
556
      
557
      if (Harvester.schemaLocation != null) {
558
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
559
      }
560
      
561
      harvestSiteSchedule.harvestDocumentList();
562
    }
563
  }
564
  
565
  
566
  /**
567
   * Initializes the detailLogID and harvestLogID values to their current
568
   * maximums + 1.
569
   */
570
  public void initLogIDs() {
571
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
572
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
573
  }
574
  
575

    
576
  /**
577
   * Prints the header of the harvest report.
578
   * 
579
   * @param out            the PrintStream object to print to
580
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
581
   *                       have a value of 0 if no particular site is involved,
582
   *                       which indicates that the report is being prepared
583
   *                       for the Harvester Administrator rather than for a
584
   *                       particular Site Contact.
585
   */
586
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
587
    HarvestLog harvestLog;
588
    int logSiteScheduleID;
589
    int nErrors = 0;
590
    String phrase;
591
    
592
    for (int i = 0; i < harvestLogList.size(); i++) {
593
      harvestLog = (HarvestLog) harvestLogList.get(i);
594
      logSiteScheduleID = harvestLog.getSiteScheduleID();
595
      
596
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
597
        if (harvestLog.isErrorEntry()) {
598
          nErrors++;
599
        }
600
      }      
601
    }
602

    
603
    out.println(marker);
604
    out.println(filler);
605
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
606
    out.println(filler);
607

    
608
    if (nErrors > 0) {
609
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
610
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
611
      out.println("* Please see the log entries below for additonal details.");
612
    }
613
    else {
614
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
615
    }
616
    
617
    out.println(filler);
618
    out.println(marker);
619
  }
620
    
621

    
622
  /**
623
   * Prints harvest log entries for this harvest run. Entries may be filtered
624
   * for a particular site, or all entries may be printed.
625
   * 
626
   * @param out            the PrintStream object to write to
627
   * @param maxCodeLevel   the maximum code level that should be printed,
628
   *                       e.g. "warning". Any log entries higher than this
629
   *                       level will not be printed.
630
   * @param siteScheduleID if greater than 0, indicates that the log
631
   *                       entry should only be printed for a particular site
632
   *                       as identified by its siteScheduleID. if 0, then
633
   *                       print output for all sites.
634
   */
635
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
636
                      ) {
637
    HarvestLog harvestLog;
638
    int logSiteScheduleID;
639
    int nErrors = 0;
640
    String phrase;
641
    
642
    out.println("");
643
    out.println(marker);
644
    out.println(filler);
645
    out.println("*                       LOG ENTRIES");
646
    out.println(filler);
647
    out.println(marker);
648

    
649
    for (int i = 0; i < harvestLogList.size(); i++) {
650
      harvestLog = (HarvestLog) harvestLogList.get(i);
651
      logSiteScheduleID = harvestLog.getSiteScheduleID();
652
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
653
        harvestLog.printOutput(out, maxCodeLevel);
654
      }
655
    }
656
  }
657
    
658

    
659
  /**
660
   * Prints the site schedule data for a given site.
661
   * 
662
   * @param out              the PrintStream to write to
663
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
664
   */
665
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
666
    HarvestSiteSchedule harvestSiteSchedule;
667

    
668
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
669
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
670
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
671
        harvestSiteSchedule.printOutput(out);
672
      }
673
    }
674
  }
675
  
676

    
677
  /**
678
   * Prunes old records from the HARVEST_LOG table. Records are removed if
679
   * their HARVEST_DATE is older than a given number of days, as stored in the
680
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
681
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
682
   */
683
  private void pruneHarvestLog() {
684
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
685
    Date dateLastLog;                    // Prune everything prior to this date
686
    String deleteString;
687
    String deleteStringDetailLog;
688
    long delta;
689
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
690
    int recordsDeleted;
691
    int recordsDeletedDetail = 0;
692
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
693
    String dateString;
694
    ResultSet rs;
695
    String selectString;
696
    Statement stmt;
697
    long timeLastLog = 0;
698
    SQLWarning warn;
699
     
700
    delta = logPeriod * millisecondsPerDay;
701
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
702
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
703
    deleteStringDetailLog = 
704
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
705
    timeLastLog = currentTime - delta;
706
    dateLastLog = new Date(timeLastLog);
707
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
708
    deleteString += dateString;
709
    selectString += dateString;
710

    
711
    try {
712
      System.out.println(
713
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
714

    
715
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
716
       * table.
717
       */
718
      stmt = conn.createStatement();                            
719
      rs = stmt.executeQuery(selectString);
720
      warn = rs.getWarnings();
721

    
722
      if (warn != null) {
723
        System.out.println("\n---Warning---\n");
724

    
725
        while (warn != null) {
726
          System.out.println("Message: " + warn.getMessage());
727
          System.out.println("SQLState: " + warn.getSQLState());
728
          System.out.print("Vendor error code: ");
729
          System.out.println(warn.getErrorCode());
730
          System.out.println("");
731
          warn = warn.getNextWarning();
732
        }
733
      } 
734

    
735
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
736
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
737
       * be pruned first because its records have a child relationship to those
738
       * in HARVEST_LOG.
739
       */
740
      while (rs.next()) {
741
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
742
        stmt = conn.createStatement();                            
743
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
744
                                            harvestLogID);
745
        recordsDeletedDetail += recordsDeleted;
746
        stmt.close();
747
      }
748
 
749
      /* Now prune entries from the HARVEST_LOG table using a single update.
750
       */
751
      stmt = conn.createStatement();                            
752
      recordsDeleted = stmt.executeUpdate(deleteString);
753
      stmt.close();
754

    
755
      System.out.println("  " + recordsDeletedDetail + 
756
                         " records deleted from HARVEST_DETAIL_LOG");
757
      System.out.println("  " + recordsDeleted + 
758
                         " records deleted from HARVEST_LOG");
759
    }
760
    catch (SQLException e) {
761
      System.out.println("SQLException: " + e.getMessage());
762
    }
763
  }
764
    
765

    
766
  /**
767
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
768
   * a HarvestSiteSchedule object for each row in the table.
769
   */
770
  private void readHarvestSiteSchedule() {
771
    HarvestSiteSchedule harvestSiteSchedule;
772
    ResultSet rs;
773
    SQLWarning warn;
774
    Statement stmt;
775

    
776
    String contactEmail;
777
    String dateLastHarvest;
778
    String dateNextHarvest;
779
    String documentListURL;
780
    String ldapDN;
781
    String ldapPwd;
782
    int siteScheduleID;
783
    String unit;
784
    int updateFrequency;
785
        
786
    try {
787
      // Read the HARVEST_SITE_SCHEDULE table
788
      stmt = conn.createStatement();
789
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
790
      warn = rs.getWarnings();
791

    
792
      if (warn != null) {
793
        System.out.println("\n---Warning---\n");
794

    
795
        while (warn != null) {
796
          System.out.println("Message: " + warn.getMessage());
797
          System.out.println("SQLState: " + warn.getSQLState());
798
          System.out.print("Vendor error code: ");
799
          System.out.println(warn.getErrorCode());
800
          System.out.println("");
801
          warn = warn.getNextWarning();
802
        }
803
      }
804
     
805
      while (rs.next()) {
806
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
807
        documentListURL = rs.getString("DOCUMENTLISTURL");
808
        ldapDN = rs.getString("LDAPDN");
809
        ldapPwd = rs.getString("LDAPPWD");
810
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
811
        dateLastHarvest = rs.getString("DATELASTHARVEST");
812
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
813
        unit = rs.getString("UNIT");
814
        contactEmail = rs.getString("CONTACT_EMAIL");
815
        
816
        warn = rs.getWarnings();
817

    
818
        if (warn != null) {
819
          System.out.println("\n---Warning---\n");
820
      
821
          while (warn != null) {
822
            System.out.println("Message: " + warn.getMessage());
823
            System.out.println("SQLState: " + warn.getSQLState());
824
            System.out.print("Vendor error code: ");
825
            System.out.println(warn.getErrorCode());
826
            System.out.println("");
827
            warn = warn.getNextWarning();
828
          }
829
        }
830
      
831
        harvestSiteSchedule = new HarvestSiteSchedule(this,
832
                                                      siteScheduleID,
833
                                                      documentListURL,
834
                                                      ldapDN,
835
                                                      ldapPwd,
836
                                                      dateNextHarvest,
837
                                                      dateLastHarvest,
838
                                                      updateFrequency,
839
                                                      unit,
840
                                                      contactEmail
841
                                                     );
842
        harvestSiteScheduleList.add(harvestSiteSchedule);
843
      }
844
      
845
      rs.close();
846
      stmt.close();
847
    }
848
    catch (SQLException e) {
849
      System.out.println("Database access failed " + e);
850
      System.exit(1);
851
    }
852
    
853
  }
854
    
855

    
856
  /**
857
   * Sends a report to the Harvester Administrator. The report prints each log
858
   * entry pertaining to this harvest run.
859
   *
860
   * @param maxCodeLevel  the maximum code level that should be printed,
861
   *                      e.g. "warning". Any log entries higher than this
862
   *                      level will not be printed.
863
   */
864
  void reportToAdministrator(String maxCodeLevel) {
865
    PrintStream body;
866
    String from = harvesterAdministrator;
867
    String[] fromArray;
868
    MailMessage msg;
869
    int siteScheduleID = 0;
870
    String subject = "Report from Metacat Harvester: " + timestamp;
871
    String to = harvesterAdministrator;
872
    
873
    if (!to.equals("")) {
874
      System.out.println("Sending report to Harvester Administrator at address "
875
                         + harvesterAdministrator);
876
      
877
      try {
878
        msg = new MailMessage(smtpServer);
879

    
880
        if (from.indexOf(',') > 0) {
881
          fromArray = from.split(",");
882
          
883
          for (int i = 0; i < fromArray.length; i++) {
884
            if (i == 0) {
885
              msg.from(fromArray[i]);
886
            }
887
            
888
            msg.to(fromArray[i]);            
889
          }
890
        }
891
        else if (from.indexOf(';') > 0) {
892
          fromArray = from.split(";");
893

    
894
          for (int i = 0; i < fromArray.length; i++) {
895
            if (i == 0) {
896
              msg.from(fromArray[i]);
897
            }
898
            
899
            msg.to(fromArray[i]);            
900
          }
901
        }
902
        else {
903
          msg.from(from);
904
          msg.to(to);
905
        }
906
        
907
        msg.setSubject(subject);
908
        body = msg.getPrintStream();
909
        printHarvestHeader(body, siteScheduleID);
910
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
911
        msg.sendAndClose();
912
      }
913
      catch (IOException e) {
914
        System.out.println("There was a problem sending email to " + to);
915
        System.out.println("IOException: " + e.getMessage());
916
      }
917
    }
918
  }
919
  
920

    
921
  /**
922
   * Sets the harvest start time for this harvest run.
923
   * 
924
   * @param date
925
   */
926
  public void setHarvestStartTime(Date date) {
927
    harvestStartTime = date;
928
  }
929
    
930

    
931
  /**
932
   * Shuts down Harvester. Performs cleanup operations such as logging out
933
   * of Metacat and disconnecting from the database.
934
   */
935
  private void shutdown() {
936
    String maxCodeLevel = "debug";  // Print all log entries from level 1
937
                                    // ("error") to level 5 ("debug")
938
    int siteScheduleID = 0;
939

    
940
    // Log shutdown operation
941
    System.out.println("Shutting Down Harvester");
942
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
943
    pruneHarvestLog();
944
    closeConnection();
945
    // Print log to standard output and then email the Harvester administrator
946
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
947
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
948
  }
949
    
950

    
951
  /**
952
   * Initializes Harvester at startup. Connects to the database and to Metacat.
953
   * 
954
   * @param nHarvests        the nth harvest
955
   * @param maxHarvests      the maximum number of harvests that this process
956
   *                         can run
957
   */
958
  private void startup(int nHarvests, int maxHarvests) {
959
    Boolean ctm;
960
    String httpserver;
961
    Integer lp;
962
    String metacatURL;
963
    Date now = new Date();
964
    String servletPath;
965
    
966
    timestamp = now.toString();
967
    System.out.println(Harvester.marker);
968
    System.out.print(timestamp + ": Starting Next Harvest");
969
    if (maxHarvests > 0) {
970
      System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
971
    }
972
    System.out.print("\n");
973
    ctm = Boolean.valueOf(options.getOption("connectToMetacat"));
974
    connectToMetacat = ctm.booleanValue();
975
    harvesterAdministrator = options.getOption("harvesterAdministrator");
976
    smtpServer = options.getOption("smtpServer");
977

    
978
    try {
979
      lp = Integer.valueOf(options.getOption("logPeriod"));
980
      logPeriod = lp.intValue();
981
    }
982
    catch (NumberFormatException e) {
983
      System.err.println("NumberFormatException: Error parsing logPeriod " +
984
                         logPeriod + e.getMessage());
985
      System.err.println("Defaulting to logPeriod of 90 days");
986
      logPeriod = 90;
987
    }
988

    
989
    conn = getConnection();
990
    initLogIDs();
991
    setHarvestStartTime(now);
992
    // Log startup operation
993
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
994
      
995
    if (connectToMetacat()) {      
996
      try {
997
        httpserver = options.getOption("httpserver");
998
        servletPath = options.getOption("servletpath");
999
        metacatURL = httpserver + servletPath;
1000
        System.out.println("Connecting to Metacat: " + metacatURL);
1001
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
1002
      } 
1003
      catch (MetacatInaccessibleException e) {
1004
        System.out.println("Metacat connection failed." + e.getMessage());
1005
      } 
1006
      catch (Exception e) {
1007
        System.out.println("Metacat connection failed." + e.getMessage());
1008
      }
1009
    }
1010
  }
1011

    
1012
}
(6-6/11)