Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2005-01-20 13:42:07 -0800 (Thu, 20 Jan 2005) $'
8
 * '$Revision: 2381 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import edu.ucsb.nceas.utilities.Options;
29
import java.io.File;
30
import java.io.FileInputStream;
31
import java.io.IOException;
32
import java.io.PrintStream;
33
import java.sql.Connection;
34
import java.sql.DriverManager;
35
import java.sql.ResultSet;
36
import java.sql.SQLException;
37
import java.sql.SQLWarning;
38
import java.sql.Statement;
39
import java.util.ArrayList;
40
import java.text.SimpleDateFormat;
41
import java.util.Date;
42

    
43
import edu.ucsb.nceas.metacat.client.Metacat;
44
import edu.ucsb.nceas.metacat.client.MetacatFactory;
45
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
46

    
47
/**
48
 * Harvester is the main class for the Harvester application. The main
49
 * method creates a single Harvester object which drives the application.
50
 * 
51
 * @author    costa
52
 * 
53
 */
54
public class Harvester {
55

    
56
  /*
57
   * Class fields
58
   */
59
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
60
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
61
  private static final String CONFIG_NAME = "metacat.properties";
62
  public static final String filler = "*";
63
  public static final String marker =
64
"*****************************************************************************";
65
  public static Options options = null;
66
   
67

    
68
  /* 
69
   * Class methods
70
   */
71
   
72

    
73
  /**
74
   * Constructor. Creates a new instance of Harvester.
75
   */
76
  public Harvester() {
77
  }
78
    
79

    
80
  /**
81
   * Loads Harvester options from a configuration file.
82
   */
83
  public static void loadOptions(boolean test) {
84
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
85
    File propertyFile = new File(configDir, CONFIG_NAME);
86

    
87
    try {
88
      options = Options.initialize(propertyFile);
89
    } 
90
    catch (IOException e) {
91
      System.out.println("Error in loading options: " + e.getMessage());
92
    }
93
  }
94
  
95
  
96
  /**
97
    * Harvester main method.
98
    * 
99
    * @param args        the command line arguments
100
    * @throws SAXException
101
    * @throws IOException
102
    * @throws ParserConfigurationException
103
    */
104
  public static void main(String[] args) {
105
    Integer delayDefault = new Integer(0); // Default number of hours delay
106
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
107
    Integer d;                            // Used for determining delay
108
    long delta;                           // endTime - startTime
109
    long endTime;                         // time that a harvest completes
110
    Harvester harvester;                  // object for a single harvest run
111
    Integer maxHarvestsDefault = new Integer(30);    // Default max harvests
112
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
113
    Integer mh;                              // used in determining max harvests
114
    int nHarvests = 0;                      // counts the number of harvest runs
115
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
116
    Integer periodDefault = new Integer(24); // Default hours between harvests
117
    int period = periodDefault.intValue();   // Hours between harvests
118
    Integer p;                               // Used in determining the period
119
    long startTime;                          // time that a harvest run starts
120
    boolean test = false;                    // Passed to loadOption()
121

    
122
    System.out.println(marker);
123
    System.out.println("Starting Harvester");
124
    Harvester.loadOptions(test);
125

    
126
    // Parse the delay property. Use default if necessary.    
127
    try {
128
      d = Integer.valueOf(options.getOption("delay"));
129
      delay = d.intValue();
130
    }
131
    catch (NumberFormatException e) {
132
      System.out.println("NumberFormatException: Error parsing delay: " +
133
                         e.getMessage());
134
      System.out.println("Defaulting to delay=" + delayDefault);
135
      delay = delayDefault.intValue();
136
    }
137

    
138
    // Parse the maxHarvests property. Use default if necessary.    
139
    try {
140
      mh = Integer.valueOf(options.getOption("maxHarvests"));
141
      maxHarvests = mh.intValue();
142
    }
143
    catch (NumberFormatException e) {
144
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
145
                         e.getMessage());
146
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
147
      maxHarvests = maxHarvestsDefault.intValue();
148
    }
149

    
150
    // Parse the period property. Use default if necessary.    
151
    try {
152
      p = Integer.valueOf(options.getOption("period"));
153
      period = p.intValue();
154
    }
155
    catch (NumberFormatException e) {
156
      System.out.println("NumberFormatException: Error parsing period: " +
157
                         e.getMessage());
158
      System.out.println("Defaulting to period=" + periodDefault);
159
      period = periodDefault.intValue();
160
    }
161
    
162
    // Sleep for delay number of hours prior to starting first harvest
163
    if (delay > 0) {
164
      try {
165
        System.out.print("First harvest will begin in " + delay);
166
        if (delay == 1) {
167
          System.out.println(" hour.");
168
        }
169
        else {
170
          System.out.println(" hours.");
171
        }
172
        Thread.sleep(delay * oneHour);
173
      }
174
      catch (InterruptedException e) {
175
        System.err.println("InterruptedException: " + e.getMessage());
176
        System.exit(1);
177
      }
178
    }
179

    
180
    // Repeat a new harvest once every period number of hours, until we reach
181
    // the maximum number of harvests. Subtract delta from the time period so 
182
    // that each harvest will start at a fixed interval.
183
    //
184
    while (nHarvests < maxHarvests) {
185
      nHarvests++;
186
      startTime = System.currentTimeMillis();
187
      harvester = new Harvester();                // New object for this harvest
188
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
189
      harvester.readHarvestSiteSchedule();        // Read the database table
190
      harvester.harvest();                        // Harvest the documents
191
      harvester.shutdown();                       // Shut down Harvester
192
      endTime = System.currentTimeMillis();
193
      delta = endTime - startTime;
194

    
195
      if (nHarvests < maxHarvests) {
196
        try {
197
          System.out.println("Next harvest will begin in " + 
198
                             period + " hours.");
199
          Thread.sleep((period * oneHour) - delta);
200
        }
201
        catch (InterruptedException e) {
202
          System.err.println("InterruptedException: " + e.getMessage());
203
          System.exit(1);
204
        }
205
      }
206
    }
207
  }
208

    
209

    
210
  /*
211
   * Object fields
212
   */
213

    
214
  /** Database connection */
215
  private Connection conn = null;
216
  
217
  /** Used during development to determine whether to connect to metacat 
218
   *  Sometimes it's useful to test parts of the code without actually
219
   *  connecting to Metacat.
220
   */
221
  private boolean connectToMetacat;
222

    
223
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
224
  private int detailLogID;
225
  
226
  /** Email address of the Harvester Administrator */
227
  String harvesterAdministrator;
228
  
229
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
230
  private int harvestLogID;
231
  
232
  /** End time of this harvest session */
233
  private Date harvestEndTime;
234
  
235
  /** List of HarvestLog objects. Stores log entries for report generation. */
236
  private ArrayList harvestLogList = new ArrayList();
237
  
238
  /** List of HarvestSiteSchedule objects */
239
  private ArrayList harvestSiteScheduleList = new ArrayList();
240
  
241
  /** Start time of this harvest session */
242
  private Date harvestStartTime;
243
  
244
  /** Number of days to save log records. Any that are older are purged. */
245
  int logPeriod;
246
  
247
  /** Metacat client object */
248
  Metacat metacat;
249
  
250
  /** SMTP server for sending mail messages */
251
  String smtpServer;
252
  
253
  /** The timestamp for this harvest run. Used for output only. */
254
  String timestamp;
255
  
256

    
257
  /*
258
   * Object methods
259
   */
260
   
261
  /**
262
   * Creates a new HarvestLog object and adds it to the harvestLogList.
263
   * 
264
   * @param  status          the status of the harvest operation
265
   * @param  message         the message text of the harvest operation
266
   * @param  harvestOperationCode  the harvest operation code
267
   * @param  siteScheduleID  the siteScheduleID for which this operation was
268
   *                         performed. 0 indicates that the operation did not
269
   *                         involve a particular harvest site.
270
   * @param  harvestDocument the associated HarvestDocument object. May be null.
271
   * @param  errorMessage    additional error message pertaining to document
272
   *                         error.
273
   */
274
  void addLogEntry(int    status,
275
                   String message,
276
                   String harvestOperationCode,
277
                   int    siteScheduleID,
278
                   HarvestDocument harvestDocument,
279
                   String errorMessage
280
                  ) {
281
    HarvestLog harvestLog;
282
    int harvestLogID = getHarvestLogID();
283
    int detailLogID;
284

    
285
    /* If there is no associated harvest document, call the basic constructor;
286
     * else call the extended constructor.
287
     */
288
    if (harvestDocument == null) {    
289
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
290
                                  status, message, harvestOperationCode, 
291
                                  siteScheduleID);
292
    }
293
    else {
294
      detailLogID = getDetailLogID();
295
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
296
                                  harvestStartTime, status, message,
297
                                  harvestOperationCode, siteScheduleID,
298
                                  harvestDocument, errorMessage);
299
    }
300
    
301
    harvestLogList.add(harvestLog);
302
  }
303
  
304
  
305
  public void closeConnection() {
306
    try {
307
      // Close the database connection
308
      System.out.println("Closing the database connection.");
309
      conn.close();
310
    }
311
    catch (SQLException e) {
312
      System.out.println("Database access failed " + e);
313
    }    
314
  }
315

    
316

    
317
  /**
318
   * Determines whether Harvester should attempt to connect to Metacat.
319
   * Used during development and testing.
320
   * 
321
   * @return     true if Harvester should connect, otherwise false
322
   */
323
  boolean connectToMetacat () {
324
    return connectToMetacat;
325
  }
326
  
327

    
328
  /**
329
   * Normalizes text prior to insertion into the HARVEST_LOG or
330
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
331
   * character with the double quote character. This prevents SQL errors
332
   * involving words that contain single quotes. Also removes \n and \r
333
   * characters from the text.
334
   * 
335
   * @param text  the original string
336
   * @return      a string containing the normalized text
337
   */
338
  public String dequoteText(String text) {
339
    char c;
340
    StringBuffer stringBuffer = new StringBuffer();
341
    
342
    for (int i = 0; i < text.length(); i++) {
343
      c = text.charAt(i);
344
      switch (c) {
345
        case '\'':
346
          stringBuffer.append('\"');
347
          break;
348
        case '\r':
349
        case '\n':
350
          break;
351
        default:
352
          stringBuffer.append(c);
353
          break;
354
      }
355
    }
356
    
357
    return stringBuffer.toString();
358
  }
359
  
360
  /**
361
   * Returns a connection to the database. Opens the connection if a connection
362
   * has not already been made previously.
363
   * 
364
   * @return  conn  the database Connection object
365
   */
366
  public Connection getConnection() {
367
    String dbDriver = "";
368
    String defaultDB;
369
    String password;
370
    String user;
371
    SQLWarning warn;
372
    
373
    if (conn == null) {
374
      dbDriver = options.getOption("dbDriver");
375
      defaultDB = options.getOption("defaultDB");
376
      password = options.getOption("password");
377
      user = options.getOption("user");
378

    
379
      // Load the jdbc driver
380
      try {
381
        Class.forName(dbDriver);
382
      }
383
      catch (ClassNotFoundException e) {
384
        System.out.println("Can't load driver " + e);
385
        System.exit(1);
386
      } 
387

    
388
      // Make the database connection
389
      try {
390
        System.out.println("Getting connection to Harvester tables");
391
        conn = DriverManager.getConnection(defaultDB, user, password);
392

    
393
        // If a SQLWarning object is available, print its warning(s).
394
        // There may be multiple warnings chained.
395
        warn = conn.getWarnings();
396
      
397
        if (warn != null) {
398
          while (warn != null) {
399
            System.out.println("SQLState: " + warn.getSQLState());
400
            System.out.println("Message:  " + warn.getMessage());
401
            System.out.println("Vendor: " + warn.getErrorCode());
402
            System.out.println("");
403
            warn = warn.getNextWarning();
404
          }
405
        }
406
      }
407
      catch (SQLException e) {
408
        System.out.println("Database access failed " + e);
409
        System.exit(1);
410
      }
411
    }
412
    
413
    return conn;
414
  }
415

    
416

    
417
  /**
418
   * Gets the current value of the detailLogID for storage as a primary key in
419
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
420
   * 
421
   * @return  the current value of the detailLogID
422
   */
423
  public int getDetailLogID() {
424
    int currentValue = detailLogID;
425
    
426
    detailLogID++;
427
    return currentValue;
428
  }
429
  
430
  
431
  /**
432
   * Gets the current value of the harvestLogID for storage as a primary key in
433
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
434
   * 
435
   * @return  the current value of the detailLogID
436
   */
437
  public int getHarvestLogID() {
438
    int currentValue = harvestLogID;
439
    
440
    harvestLogID++;
441
    return currentValue;
442
  }
443
  
444

    
445
  /** 
446
   * Gets the maximum value of an integer field from a table.
447
   * 
448
   * @param tableName  the database table name
449
   * @param fieldName  the field name of the integer field in the table
450
   * @return  the maximum integer stored in the fieldName field of tableName
451
   */
452
  private int getMaxValue(String tableName, String fieldName) {
453
    int maxValue = 0;
454
    int fieldValue;
455
    String query = "SELECT " + fieldName + " FROM " + tableName;
456
    Statement stmt;
457
    
458
	try {
459
      stmt = conn.createStatement();
460
      ResultSet rs = stmt.executeQuery(query);
461
	
462
      while (rs.next()) {
463
        fieldValue = rs.getInt(fieldName);
464
        maxValue = Math.max(maxValue, fieldValue);
465
      }
466
      
467
      stmt.close();
468
    } 
469
    catch(SQLException ex) {
470
      System.out.println("SQLException: " + ex.getMessage());
471
    }
472
    
473
    return maxValue;
474
  }
475
  
476
  
477
  /** 
478
   * Gets the minimum value of an integer field from a table.
479
   * 
480
   * @param tableName  the database table name
481
   * @param fieldName  the field name of the integer field in the table
482
   * @return  the minimum integer stored in the fieldName field of tableName
483
   */
484
  private int getMinValue(String tableName, String fieldName) {
485
    int minValue = 0;
486
    int fieldValue;
487
    String query = "SELECT " + fieldName + " FROM " + tableName;
488
    Statement stmt;
489
    
490
    try {
491
      stmt = conn.createStatement();
492
      ResultSet rs = stmt.executeQuery(query);
493
	
494
      while (rs.next()) {
495
        fieldValue = rs.getInt(fieldName);
496

    
497
        if (minValue == 0) {
498
          minValue = fieldValue;
499
        }
500
        else {
501
          minValue = Math.min(minValue, fieldValue);
502
        }
503
      }
504
      
505
      stmt.close();
506
    } 
507
    catch(SQLException ex) {
508
      System.out.println("SQLException: " + ex.getMessage());
509
    }
510

    
511
    return minValue;
512
  }
513
  
514
  
515
  /**
516
   * For every Harvest site schedule in the database, harvest the
517
   * documents for that site if they are due to be harvested.
518
   * 
519
   * @throws SAXException
520
   * @throws IOException
521
   * @throws ParserConfigurationException
522
   */
523
  private void harvest() {
524
    HarvestSiteSchedule harvestSiteSchedule;
525

    
526
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
527
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
528
      harvestSiteSchedule.harvestDocumentList();
529
    }
530
  }
531
  
532
  
533
  /**
534
   * Initializes the detailLogID and harvestLogID values to their current
535
   * maximums + 1.
536
   */
537
  public void initLogIDs() {
538
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
539
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
540
  }
541
  
542

    
543
  /**
544
   * Prints the header of the harvest report.
545
   * 
546
   * @param out            the PrintStream object to print to
547
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
548
   *                       have a value of 0 if no particular site is involved,
549
   *                       which indicates that the report is being prepared
550
   *                       for the Harvester Administrator rather than for a
551
   *                       particular Site Contact.
552
   */
553
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
554
    HarvestLog harvestLog;
555
    int logSiteScheduleID;
556
    int nErrors = 0;
557
    String phrase;
558
    
559
    for (int i = 0; i < harvestLogList.size(); i++) {
560
      harvestLog = (HarvestLog) harvestLogList.get(i);
561
      logSiteScheduleID = harvestLog.getSiteScheduleID();
562
      
563
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
564
        if (harvestLog.isErrorEntry()) {
565
          nErrors++;
566
        }
567
      }      
568
    }
569

    
570
    out.println(marker);
571
    out.println(filler);
572
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
573
    out.println(filler);
574

    
575
    if (nErrors > 0) {
576
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
577
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
578
      out.println("* Please see the log entries below for additonal details.");
579
    }
580
    else {
581
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
582
    }
583
    
584
    out.println(filler);
585
    out.println(marker);
586
  }
587
    
588

    
589
  /**
590
   * Prints harvest log entries for this harvest run. Entries may be filtered
591
   * for a particular site, or all entries may be printed.
592
   * 
593
   * @param out            the PrintStream object to write to
594
   * @param maxCodeLevel   the maximum code level that should be printed,
595
   *                       e.g. "warning". Any log entries higher than this
596
   *                       level will not be printed.
597
   * @param siteScheduleID if greater than 0, indicates that the log
598
   *                       entry should only be printed for a particular site
599
   *                       as identified by its siteScheduleID. if 0, then
600
   *                       print output for all sites.
601
   */
602
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
603
                      ) {
604
    HarvestLog harvestLog;
605
    int logSiteScheduleID;
606
    int nErrors = 0;
607
    String phrase;
608
    
609
    out.println("");
610
    out.println(marker);
611
    out.println(filler);
612
    out.println("*                       LOG ENTRIES");
613
    out.println(filler);
614
    out.println(marker);
615

    
616
    for (int i = 0; i < harvestLogList.size(); i++) {
617
      harvestLog = (HarvestLog) harvestLogList.get(i);
618
      logSiteScheduleID = harvestLog.getSiteScheduleID();
619
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
620
        harvestLog.printOutput(out, maxCodeLevel);
621
      }
622
    }
623
  }
624
    
625

    
626
  /**
627
   * Prints the site schedule data for a given site.
628
   * 
629
   * @param out              the PrintStream to write to
630
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
631
   */
632
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
633
    HarvestSiteSchedule harvestSiteSchedule;
634

    
635
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
636
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
637
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
638
        harvestSiteSchedule.printOutput(out);
639
      }
640
    }
641
  }
642
  
643

    
644
  /**
645
   * Prunes old records from the HARVEST_LOG table. Records are removed if
646
   * their HARVEST_DATE is older than a given number of days, as stored in the
647
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
648
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
649
   */
650
  private void pruneHarvestLog() {
651
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
652
    Date dateLastLog;                    // Prune everything prior to this date
653
    String deleteString;
654
    String deleteStringDetailLog;
655
    long delta;
656
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
657
    int recordsDeleted;
658
    int recordsDeletedDetail = 0;
659
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
660
    String dateString;
661
    ResultSet rs;
662
    String selectString;
663
    Statement stmt;
664
    long timeLastLog = 0;
665
    SQLWarning warn;
666
     
667
    delta = logPeriod * millisecondsPerDay;
668
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
669
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
670
    deleteStringDetailLog = 
671
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
672
    timeLastLog = currentTime - delta;
673
    dateLastLog = new Date(timeLastLog);
674
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
675
    deleteString += dateString;
676
    selectString += dateString;
677

    
678
    try {
679
      System.out.println(
680
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
681

    
682
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
683
       * table.
684
       */
685
      stmt = conn.createStatement();                            
686
      rs = stmt.executeQuery(selectString);
687
      warn = rs.getWarnings();
688

    
689
      if (warn != null) {
690
        System.out.println("\n---Warning---\n");
691

    
692
        while (warn != null) {
693
          System.out.println("Message: " + warn.getMessage());
694
          System.out.println("SQLState: " + warn.getSQLState());
695
          System.out.print("Vendor error code: ");
696
          System.out.println(warn.getErrorCode());
697
          System.out.println("");
698
          warn = warn.getNextWarning();
699
        }
700
      } 
701

    
702
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
703
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
704
       * be pruned first because its records have a child relationship to those
705
       * in HARVEST_LOG.
706
       */
707
      while (rs.next()) {
708
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
709
        stmt = conn.createStatement();                            
710
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
711
                                            harvestLogID);
712
        recordsDeletedDetail += recordsDeleted;
713
        stmt.close();
714
      }
715
 
716
      /* Now prune entries from the HARVEST_LOG table using a single update.
717
       */
718
      stmt = conn.createStatement();                            
719
      recordsDeleted = stmt.executeUpdate(deleteString);
720
      stmt.close();
721

    
722
      System.out.println("  " + recordsDeletedDetail + 
723
                         " records deleted from HARVEST_DETAIL_LOG");
724
      System.out.println("  " + recordsDeleted + 
725
                         " records deleted from HARVEST_LOG");
726
    }
727
    catch (SQLException e) {
728
      System.out.println("SQLException: " + e.getMessage());
729
    }
730
  }
731
    
732

    
733
  /**
734
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
735
   * a HarvestSiteSchedule object for each row in the table.
736
   */
737
  private void readHarvestSiteSchedule() {
738
    HarvestSiteSchedule harvestSiteSchedule;
739
    ResultSet rs;
740
    SQLWarning warn;
741
    Statement stmt;
742

    
743
    String contactEmail;
744
    String dateLastHarvest;
745
    String dateNextHarvest;
746
    String documentListURL;
747
    String ldapDN;
748
    String ldapPwd;
749
    int siteScheduleID;
750
    String unit;
751
    int updateFrequency;
752
        
753
    try {
754
      // Read the HARVEST_SITE_SCHEDULE table
755
      stmt = conn.createStatement();
756
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
757
      warn = rs.getWarnings();
758

    
759
      if (warn != null) {
760
        System.out.println("\n---Warning---\n");
761

    
762
        while (warn != null) {
763
          System.out.println("Message: " + warn.getMessage());
764
          System.out.println("SQLState: " + warn.getSQLState());
765
          System.out.print("Vendor error code: ");
766
          System.out.println(warn.getErrorCode());
767
          System.out.println("");
768
          warn = warn.getNextWarning();
769
        }
770
      }
771
     
772
      while (rs.next()) {
773
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
774
        documentListURL = rs.getString("DOCUMENTLISTURL");
775
        ldapDN = rs.getString("LDAPDN");
776
        ldapPwd = rs.getString("LDAPPWD");
777
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
778
        dateLastHarvest = rs.getString("DATELASTHARVEST");
779
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
780
        unit = rs.getString("UNIT");
781
        contactEmail = rs.getString("CONTACT_EMAIL");
782
        
783
        warn = rs.getWarnings();
784

    
785
        if (warn != null) {
786
          System.out.println("\n---Warning---\n");
787
      
788
          while (warn != null) {
789
            System.out.println("Message: " + warn.getMessage());
790
            System.out.println("SQLState: " + warn.getSQLState());
791
            System.out.print("Vendor error code: ");
792
            System.out.println(warn.getErrorCode());
793
            System.out.println("");
794
            warn = warn.getNextWarning();
795
          }
796
        }
797
      
798
        harvestSiteSchedule = new HarvestSiteSchedule(this,
799
                                                      siteScheduleID,
800
                                                      documentListURL,
801
                                                      ldapDN,
802
                                                      ldapPwd,
803
                                                      dateNextHarvest,
804
                                                      dateLastHarvest,
805
                                                      updateFrequency,
806
                                                      unit,
807
                                                      contactEmail
808
                                                     );
809
        harvestSiteScheduleList.add(harvestSiteSchedule);
810
      }
811
      
812
      rs.close();
813
      stmt.close();
814
    }
815
    catch (SQLException e) {
816
      System.out.println("Database access failed " + e);
817
      System.exit(1);
818
    }
819
    
820
  }
821
    
822

    
823
  /**
824
   * Sends a report to the Harvester Administrator. The report prints each log
825
   * entry pertaining to this harvest run.
826
   *
827
   * @param maxCodeLevel  the maximum code level that should be printed,
828
   *                      e.g. "warning". Any log entries higher than this
829
   *                      level will not be printed.
830
   */
831
  void reportToAdministrator(String maxCodeLevel) {
832
    PrintStream body;
833
    String from = harvesterAdministrator;
834
    String[] fromArray;
835
    MailMessage msg;
836
    int siteScheduleID = 0;
837
    String subject = "Report from Metacat Harvester: " + timestamp;
838
    String to = harvesterAdministrator;
839
    
840
    if (!to.equals("")) {
841
      System.out.println("Sending report to Harvester Administrator at address "
842
                         + harvesterAdministrator);
843
      
844
      try {
845
        msg = new MailMessage(smtpServer);
846

    
847
        if (from.indexOf(',') > 0) {
848
          fromArray = from.split(",");
849
          
850
          for (int i = 0; i < fromArray.length; i++) {
851
            if (i == 0) {
852
              msg.from(fromArray[i]);
853
            }
854
            
855
            msg.to(fromArray[i]);            
856
          }
857
        }
858
        else if (from.indexOf(';') > 0) {
859
          fromArray = from.split(";");
860

    
861
          for (int i = 0; i < fromArray.length; i++) {
862
            if (i == 0) {
863
              msg.from(fromArray[i]);
864
            }
865
            
866
            msg.to(fromArray[i]);            
867
          }
868
        }
869
        else {
870
          msg.from(from);
871
          msg.to(to);
872
        }
873
        
874
        msg.setSubject(subject);
875
        body = msg.getPrintStream();
876
        printHarvestHeader(body, siteScheduleID);
877
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
878
        msg.sendAndClose();
879
      }
880
      catch (IOException e) {
881
        System.out.println("There was a problem sending email to " + to);
882
        System.out.println("IOException: " + e.getMessage());
883
      }
884
    }
885
  }
886
  
887

    
888
  /**
889
   * Sets the harvest start time for this harvest run.
890
   * 
891
   * @param date
892
   */
893
  public void setHarvestStartTime(Date date) {
894
    harvestStartTime = date;
895
  }
896
    
897

    
898
  /**
899
   * Shuts down Harvester. Performs cleanup operations such as logging out
900
   * of Metacat and disconnecting from the database.
901
   */
902
  private void shutdown() {
903
    String maxCodeLevel = "debug";  // Print all log entries from level 1
904
                                    // ("error") to level 5 ("debug")
905
    int siteScheduleID = 0;
906

    
907
    // Log shutdown operation
908
    System.out.println("Shutting Down Harvester");
909
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
910
    pruneHarvestLog();
911
    closeConnection();
912
    // Print log to standard output and then email the Harvester administrator
913
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
914
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
915
  }
916
    
917

    
918
  /**
919
   * Initializes Harvester at startup. Connects to the database and to Metacat.
920
   * 
921
   * @param nHarvests        the nth harvest
922
   * @param maxHarvests      the maximum number of harvests that this process
923
   *                         can run
924
   */
925
  private void startup(int nHarvests, int maxHarvests) {
926
    Boolean ctm;
927
    String httpserver;
928
    Integer lp;
929
    String metacatURL;
930
    Date now = new Date();
931
    String servletPath;
932
    
933
    timestamp = now.toString();
934
    System.out.println(Harvester.marker);
935
    System.out.println(timestamp + ": Starting Next Harvest (" +
936
                       nHarvests + "/" + maxHarvests + ")");
937
    ctm = Boolean.valueOf(options.getOption("connectToMetacat"));
938
    connectToMetacat = ctm.booleanValue();
939
    harvesterAdministrator = options.getOption("harvesterAdministrator");
940
    smtpServer = options.getOption("smtpServer");
941

    
942
    try {
943
      lp = Integer.valueOf(options.getOption("logPeriod"));
944
      logPeriod = lp.intValue();
945
    }
946
    catch (NumberFormatException e) {
947
      System.err.println("NumberFormatException: Error parsing logPeriod " +
948
                         logPeriod + e.getMessage());
949
      System.err.println("Defaulting to logPeriod of 90 days");
950
      logPeriod = 90;
951
    }
952

    
953
    conn = getConnection();
954
    initLogIDs();
955
    setHarvestStartTime(now);
956
    // Log startup operation
957
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
958
      
959
    if (connectToMetacat()) {      
960
      try {
961
        httpserver = options.getOption("httpserver");
962
        servletPath = options.getOption("servletpath");
963
        metacatURL = httpserver + servletPath;
964
        System.out.println("Connecting to Metacat: " + metacatURL);
965
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
966
      } 
967
      catch (MetacatInaccessibleException e) {
968
        System.out.println("Metacat connection failed." + e.getMessage());
969
      } 
970
      catch (Exception e) {
971
        System.out.println("Metacat connection failed." + e.getMessage());
972
      }
973
    }
974
  }
975

    
976
}
(6-6/10)