Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2005-01-25 13:57:02 -0800 (Tue, 25 Jan 2005) $'
8
 * '$Revision: 2384 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import edu.ucsb.nceas.utilities.Options;
29
import java.io.File;
30
import java.io.IOException;
31
import java.io.PrintStream;
32
import java.sql.Connection;
33
import java.sql.DriverManager;
34
import java.sql.ResultSet;
35
import java.sql.SQLException;
36
import java.sql.SQLWarning;
37
import java.sql.Statement;
38
import java.util.ArrayList;
39
import java.text.SimpleDateFormat;
40
import java.util.Date;
41

    
42
import edu.ucsb.nceas.metacat.client.Metacat;
43
import edu.ucsb.nceas.metacat.client.MetacatFactory;
44
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
45

    
46
/**
47
 * Harvester is the main class for the Harvester application. The main
48
 * method creates a single Harvester object which drives the application.
49
 * 
50
 * @author    costa
51
 * 
52
 */
53
public class Harvester {
54

    
55
  /*
56
   * Class fields
57
   */
58
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
59
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
60
  private static final String CONFIG_NAME = "metacat.properties";
61
  public static final String filler = "*";
62
  public static final String marker =
63
"*****************************************************************************";
64
  public static Options options = null;
65
  private static String schemaLocation = null;
66
   
67

    
68
  /* 
69
   * Class methods
70
   */
71
   
72

    
73
  /**
74
   * Constructor. Creates a new instance of Harvester.
75
   */
76
  public Harvester() {
77
  }
78
    
79

    
80
  /**
81
   * Loads Harvester options from a configuration file.
82
   */
83
  public static void loadOptions(boolean test) {
84
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
85
    File propertyFile = new File(configDir, CONFIG_NAME);
86

    
87
    try {
88
      options = Options.initialize(propertyFile);
89
    } 
90
    catch (IOException e) {
91
      System.out.println("Error in loading options: " + e.getMessage());
92
    }
93
  }
94
  
95
  
96
  /**
97
    * Harvester main method.
98
    * 
99
    * @param args        the command line arguments
100
    * @throws SAXException
101
    * @throws IOException
102
    * @throws ParserConfigurationException
103
    */
104
  public static void main(String[] args) {
105
    Integer delayDefault = new Integer(0); // Default number of hours delay
106
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
107
    Integer d;                            // Used for determining delay
108
    long delta;                           // endTime - startTime
109
    long endTime;                         // time that a harvest completes
110
    Harvester harvester;                  // object for a single harvest run
111
    Integer maxHarvestsDefault = new Integer(30);    // Default max harvests
112
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
113
    Integer mh;                              // used in determining max harvests
114
    int nHarvests = 0;                      // counts the number of harvest runs
115
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
116
    Integer periodDefault = new Integer(24); // Default hours between harvests
117
    int period = periodDefault.intValue();   // Hours between harvests
118
    Integer p;                               // Used in determining the period
119
    long startTime;                          // time that a harvest run starts
120
    boolean test = false;                    // Passed to loadOption()
121
    
122
    if (args[0] != null) {
123
      schemaLocation = args[0];
124
    }
125

    
126
    System.out.println(marker);
127
    System.out.println("Starting Harvester");
128
    Harvester.loadOptions(test);
129

    
130
    // Parse the delay property. Use default if necessary.    
131
    try {
132
      d = Integer.valueOf(options.getOption("delay"));
133
      delay = d.intValue();
134
    }
135
    catch (NumberFormatException e) {
136
      System.out.println("NumberFormatException: Error parsing delay: " +
137
                         e.getMessage());
138
      System.out.println("Defaulting to delay=" + delayDefault);
139
      delay = delayDefault.intValue();
140
    }
141

    
142
    // Parse the maxHarvests property. Use default if necessary.    
143
    try {
144
      mh = Integer.valueOf(options.getOption("maxHarvests"));
145
      maxHarvests = mh.intValue();
146
    }
147
    catch (NumberFormatException e) {
148
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
149
                         e.getMessage());
150
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
151
      maxHarvests = maxHarvestsDefault.intValue();
152
    }
153

    
154
    // Parse the period property. Use default if necessary.    
155
    try {
156
      p = Integer.valueOf(options.getOption("period"));
157
      period = p.intValue();
158
    }
159
    catch (NumberFormatException e) {
160
      System.out.println("NumberFormatException: Error parsing period: " +
161
                         e.getMessage());
162
      System.out.println("Defaulting to period=" + periodDefault);
163
      period = periodDefault.intValue();
164
    }
165
    
166
    // Sleep for delay number of hours prior to starting first harvest
167
    if (delay > 0) {
168
      try {
169
        System.out.print("First harvest will begin in " + delay);
170
        if (delay == 1) {
171
          System.out.println(" hour.");
172
        }
173
        else {
174
          System.out.println(" hours.");
175
        }
176
        Thread.sleep(delay * oneHour);
177
      }
178
      catch (InterruptedException e) {
179
        System.err.println("InterruptedException: " + e.getMessage());
180
        System.exit(1);
181
      }
182
    }
183

    
184
    // Repeat a new harvest once every period number of hours, until we reach
185
    // the maximum number of harvests. Subtract delta from the time period so 
186
    // that each harvest will start at a fixed interval.
187
    //
188
    while (nHarvests < maxHarvests) {
189
      nHarvests++;
190
      startTime = System.currentTimeMillis();
191
      harvester = new Harvester();                // New object for this harvest
192
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
193
      harvester.readHarvestSiteSchedule();        // Read the database table
194
      harvester.harvest();                        // Harvest the documents
195
      harvester.shutdown();                       // Shut down Harvester
196
      endTime = System.currentTimeMillis();
197
      delta = endTime - startTime;
198

    
199
      if (nHarvests < maxHarvests) {
200
        try {
201
          System.out.println("Next harvest will begin in " + 
202
                             period + " hours.");
203
          Thread.sleep((period * oneHour) - delta);
204
        }
205
        catch (InterruptedException e) {
206
          System.err.println("InterruptedException: " + e.getMessage());
207
          System.exit(1);
208
        }
209
      }
210
    }
211
  }
212

    
213

    
214
  /*
215
   * Object fields
216
   */
217

    
218
  /** Database connection */
219
  private Connection conn = null;
220
  
221
  /** Used during development to determine whether to connect to metacat 
222
   *  Sometimes it's useful to test parts of the code without actually
223
   *  connecting to Metacat.
224
   */
225
  private boolean connectToMetacat;
226

    
227
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
228
  private int detailLogID;
229
  
230
  /** Email address of the Harvester Administrator */
231
  String harvesterAdministrator;
232
  
233
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
234
  private int harvestLogID;
235
  
236
  /** End time of this harvest session */
237
  private Date harvestEndTime;
238
  
239
  /** List of HarvestLog objects. Stores log entries for report generation. */
240
  private ArrayList harvestLogList = new ArrayList();
241
  
242
  /** List of HarvestSiteSchedule objects */
243
  private ArrayList harvestSiteScheduleList = new ArrayList();
244
  
245
  /** Start time of this harvest session */
246
  private Date harvestStartTime;
247
  
248
  /** Number of days to save log records. Any that are older are purged. */
249
  int logPeriod;
250
  
251
  /** Metacat client object */
252
  Metacat metacat;
253
  
254
  /** SMTP server for sending mail messages */
255
  String smtpServer;
256
  
257
  /** The timestamp for this harvest run. Used for output only. */
258
  String timestamp;
259
  
260

    
261
  /*
262
   * Object methods
263
   */
264
   
265
  /**
266
   * Creates a new HarvestLog object and adds it to the harvestLogList.
267
   * 
268
   * @param  status          the status of the harvest operation
269
   * @param  message         the message text of the harvest operation
270
   * @param  harvestOperationCode  the harvest operation code
271
   * @param  siteScheduleID  the siteScheduleID for which this operation was
272
   *                         performed. 0 indicates that the operation did not
273
   *                         involve a particular harvest site.
274
   * @param  harvestDocument the associated HarvestDocument object. May be null.
275
   * @param  errorMessage    additional error message pertaining to document
276
   *                         error.
277
   */
278
  void addLogEntry(int    status,
279
                   String message,
280
                   String harvestOperationCode,
281
                   int    siteScheduleID,
282
                   HarvestDocument harvestDocument,
283
                   String errorMessage
284
                  ) {
285
    HarvestLog harvestLog;
286
    int harvestLogID = getHarvestLogID();
287
    int detailLogID;
288

    
289
    /* If there is no associated harvest document, call the basic constructor;
290
     * else call the extended constructor.
291
     */
292
    if (harvestDocument == null) {    
293
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
294
                                  status, message, harvestOperationCode, 
295
                                  siteScheduleID);
296
    }
297
    else {
298
      detailLogID = getDetailLogID();
299
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
300
                                  harvestStartTime, status, message,
301
                                  harvestOperationCode, siteScheduleID,
302
                                  harvestDocument, errorMessage);
303
    }
304
    
305
    harvestLogList.add(harvestLog);
306
  }
307
  
308
  
309
  public void closeConnection() {
310
    try {
311
      // Close the database connection
312
      System.out.println("Closing the database connection.");
313
      conn.close();
314
    }
315
    catch (SQLException e) {
316
      System.out.println("Database access failed " + e);
317
    }    
318
  }
319

    
320

    
321
  /**
322
   * Determines whether Harvester should attempt to connect to Metacat.
323
   * Used during development and testing.
324
   * 
325
   * @return     true if Harvester should connect, otherwise false
326
   */
327
  boolean connectToMetacat () {
328
    return connectToMetacat;
329
  }
330
  
331

    
332
  /**
333
   * Normalizes text prior to insertion into the HARVEST_LOG or
334
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
335
   * character with the double quote character. This prevents SQL errors
336
   * involving words that contain single quotes. Also removes \n and \r
337
   * characters from the text.
338
   * 
339
   * @param text  the original string
340
   * @return      a string containing the normalized text
341
   */
342
  public String dequoteText(String text) {
343
    char c;
344
    StringBuffer stringBuffer = new StringBuffer();
345
    
346
    for (int i = 0; i < text.length(); i++) {
347
      c = text.charAt(i);
348
      switch (c) {
349
        case '\'':
350
          stringBuffer.append('\"');
351
          break;
352
        case '\r':
353
        case '\n':
354
          break;
355
        default:
356
          stringBuffer.append(c);
357
          break;
358
      }
359
    }
360
    
361
    return stringBuffer.toString();
362
  }
363
  
364
  /**
365
   * Returns a connection to the database. Opens the connection if a connection
366
   * has not already been made previously.
367
   * 
368
   * @return  conn  the database Connection object
369
   */
370
  public Connection getConnection() {
371
    String dbDriver = "";
372
    String defaultDB;
373
    String password;
374
    String user;
375
    SQLWarning warn;
376
    
377
    if (conn == null) {
378
      dbDriver = options.getOption("dbDriver");
379
      defaultDB = options.getOption("defaultDB");
380
      password = options.getOption("password");
381
      user = options.getOption("user");
382

    
383
      // Load the jdbc driver
384
      try {
385
        Class.forName(dbDriver);
386
      }
387
      catch (ClassNotFoundException e) {
388
        System.out.println("Can't load driver " + e);
389
        System.exit(1);
390
      } 
391

    
392
      // Make the database connection
393
      try {
394
        System.out.println("Getting connection to Harvester tables");
395
        conn = DriverManager.getConnection(defaultDB, user, password);
396

    
397
        // If a SQLWarning object is available, print its warning(s).
398
        // There may be multiple warnings chained.
399
        warn = conn.getWarnings();
400
      
401
        if (warn != null) {
402
          while (warn != null) {
403
            System.out.println("SQLState: " + warn.getSQLState());
404
            System.out.println("Message:  " + warn.getMessage());
405
            System.out.println("Vendor: " + warn.getErrorCode());
406
            System.out.println("");
407
            warn = warn.getNextWarning();
408
          }
409
        }
410
      }
411
      catch (SQLException e) {
412
        System.out.println("Database access failed " + e);
413
        System.exit(1);
414
      }
415
    }
416
    
417
    return conn;
418
  }
419

    
420

    
421
  /**
422
   * Gets the current value of the detailLogID for storage as a primary key in
423
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
424
   * 
425
   * @return  the current value of the detailLogID
426
   */
427
  public int getDetailLogID() {
428
    int currentValue = detailLogID;
429
    
430
    detailLogID++;
431
    return currentValue;
432
  }
433
  
434
  
435
  /**
436
   * Gets the current value of the harvestLogID for storage as a primary key in
437
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
438
   * 
439
   * @return  the current value of the detailLogID
440
   */
441
  public int getHarvestLogID() {
442
    int currentValue = harvestLogID;
443
    
444
    harvestLogID++;
445
    return currentValue;
446
  }
447
  
448

    
449
  /** 
450
   * Gets the maximum value of an integer field from a table.
451
   * 
452
   * @param tableName  the database table name
453
   * @param fieldName  the field name of the integer field in the table
454
   * @return  the maximum integer stored in the fieldName field of tableName
455
   */
456
  private int getMaxValue(String tableName, String fieldName) {
457
    int maxValue = 0;
458
    int fieldValue;
459
    String query = "SELECT " + fieldName + " FROM " + tableName;
460
    Statement stmt;
461
    
462
	try {
463
      stmt = conn.createStatement();
464
      ResultSet rs = stmt.executeQuery(query);
465
	
466
      while (rs.next()) {
467
        fieldValue = rs.getInt(fieldName);
468
        maxValue = Math.max(maxValue, fieldValue);
469
      }
470
      
471
      stmt.close();
472
    } 
473
    catch(SQLException ex) {
474
      System.out.println("SQLException: " + ex.getMessage());
475
    }
476
    
477
    return maxValue;
478
  }
479
  
480
  
481
  /** 
482
   * Gets the minimum value of an integer field from a table.
483
   * 
484
   * @param tableName  the database table name
485
   * @param fieldName  the field name of the integer field in the table
486
   * @return  the minimum integer stored in the fieldName field of tableName
487
   */
488
  private int getMinValue(String tableName, String fieldName) {
489
    int minValue = 0;
490
    int fieldValue;
491
    String query = "SELECT " + fieldName + " FROM " + tableName;
492
    Statement stmt;
493
    
494
    try {
495
      stmt = conn.createStatement();
496
      ResultSet rs = stmt.executeQuery(query);
497
	
498
      while (rs.next()) {
499
        fieldValue = rs.getInt(fieldName);
500

    
501
        if (minValue == 0) {
502
          minValue = fieldValue;
503
        }
504
        else {
505
          minValue = Math.min(minValue, fieldValue);
506
        }
507
      }
508
      
509
      stmt.close();
510
    } 
511
    catch(SQLException ex) {
512
      System.out.println("SQLException: " + ex.getMessage());
513
    }
514

    
515
    return minValue;
516
  }
517
  
518
  
519
  /**
520
   * For every Harvest site schedule in the database, harvest the
521
   * documents for that site if they are due to be harvested.
522
   * 
523
   * @throws SAXException
524
   * @throws IOException
525
   * @throws ParserConfigurationException
526
   */
527
  private void harvest() {
528
    HarvestSiteSchedule harvestSiteSchedule;
529

    
530
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
531
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
532
      
533
      if (Harvester.schemaLocation != null) {
534
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
535
      }
536
      
537
      harvestSiteSchedule.harvestDocumentList();
538
    }
539
  }
540
  
541
  
542
  /**
543
   * Initializes the detailLogID and harvestLogID values to their current
544
   * maximums + 1.
545
   */
546
  public void initLogIDs() {
547
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
548
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
549
  }
550
  
551

    
552
  /**
553
   * Prints the header of the harvest report.
554
   * 
555
   * @param out            the PrintStream object to print to
556
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
557
   *                       have a value of 0 if no particular site is involved,
558
   *                       which indicates that the report is being prepared
559
   *                       for the Harvester Administrator rather than for a
560
   *                       particular Site Contact.
561
   */
562
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
563
    HarvestLog harvestLog;
564
    int logSiteScheduleID;
565
    int nErrors = 0;
566
    String phrase;
567
    
568
    for (int i = 0; i < harvestLogList.size(); i++) {
569
      harvestLog = (HarvestLog) harvestLogList.get(i);
570
      logSiteScheduleID = harvestLog.getSiteScheduleID();
571
      
572
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
573
        if (harvestLog.isErrorEntry()) {
574
          nErrors++;
575
        }
576
      }      
577
    }
578

    
579
    out.println(marker);
580
    out.println(filler);
581
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
582
    out.println(filler);
583

    
584
    if (nErrors > 0) {
585
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
586
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
587
      out.println("* Please see the log entries below for additonal details.");
588
    }
589
    else {
590
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
591
    }
592
    
593
    out.println(filler);
594
    out.println(marker);
595
  }
596
    
597

    
598
  /**
599
   * Prints harvest log entries for this harvest run. Entries may be filtered
600
   * for a particular site, or all entries may be printed.
601
   * 
602
   * @param out            the PrintStream object to write to
603
   * @param maxCodeLevel   the maximum code level that should be printed,
604
   *                       e.g. "warning". Any log entries higher than this
605
   *                       level will not be printed.
606
   * @param siteScheduleID if greater than 0, indicates that the log
607
   *                       entry should only be printed for a particular site
608
   *                       as identified by its siteScheduleID. if 0, then
609
   *                       print output for all sites.
610
   */
611
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
612
                      ) {
613
    HarvestLog harvestLog;
614
    int logSiteScheduleID;
615
    int nErrors = 0;
616
    String phrase;
617
    
618
    out.println("");
619
    out.println(marker);
620
    out.println(filler);
621
    out.println("*                       LOG ENTRIES");
622
    out.println(filler);
623
    out.println(marker);
624

    
625
    for (int i = 0; i < harvestLogList.size(); i++) {
626
      harvestLog = (HarvestLog) harvestLogList.get(i);
627
      logSiteScheduleID = harvestLog.getSiteScheduleID();
628
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
629
        harvestLog.printOutput(out, maxCodeLevel);
630
      }
631
    }
632
  }
633
    
634

    
635
  /**
636
   * Prints the site schedule data for a given site.
637
   * 
638
   * @param out              the PrintStream to write to
639
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
640
   */
641
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
642
    HarvestSiteSchedule harvestSiteSchedule;
643

    
644
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
645
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
646
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
647
        harvestSiteSchedule.printOutput(out);
648
      }
649
    }
650
  }
651
  
652

    
653
  /**
654
   * Prunes old records from the HARVEST_LOG table. Records are removed if
655
   * their HARVEST_DATE is older than a given number of days, as stored in the
656
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
657
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
658
   */
659
  private void pruneHarvestLog() {
660
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
661
    Date dateLastLog;                    // Prune everything prior to this date
662
    String deleteString;
663
    String deleteStringDetailLog;
664
    long delta;
665
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
666
    int recordsDeleted;
667
    int recordsDeletedDetail = 0;
668
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
669
    String dateString;
670
    ResultSet rs;
671
    String selectString;
672
    Statement stmt;
673
    long timeLastLog = 0;
674
    SQLWarning warn;
675
     
676
    delta = logPeriod * millisecondsPerDay;
677
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
678
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
679
    deleteStringDetailLog = 
680
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
681
    timeLastLog = currentTime - delta;
682
    dateLastLog = new Date(timeLastLog);
683
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
684
    deleteString += dateString;
685
    selectString += dateString;
686

    
687
    try {
688
      System.out.println(
689
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
690

    
691
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
692
       * table.
693
       */
694
      stmt = conn.createStatement();                            
695
      rs = stmt.executeQuery(selectString);
696
      warn = rs.getWarnings();
697

    
698
      if (warn != null) {
699
        System.out.println("\n---Warning---\n");
700

    
701
        while (warn != null) {
702
          System.out.println("Message: " + warn.getMessage());
703
          System.out.println("SQLState: " + warn.getSQLState());
704
          System.out.print("Vendor error code: ");
705
          System.out.println(warn.getErrorCode());
706
          System.out.println("");
707
          warn = warn.getNextWarning();
708
        }
709
      } 
710

    
711
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
712
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
713
       * be pruned first because its records have a child relationship to those
714
       * in HARVEST_LOG.
715
       */
716
      while (rs.next()) {
717
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
718
        stmt = conn.createStatement();                            
719
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
720
                                            harvestLogID);
721
        recordsDeletedDetail += recordsDeleted;
722
        stmt.close();
723
      }
724
 
725
      /* Now prune entries from the HARVEST_LOG table using a single update.
726
       */
727
      stmt = conn.createStatement();                            
728
      recordsDeleted = stmt.executeUpdate(deleteString);
729
      stmt.close();
730

    
731
      System.out.println("  " + recordsDeletedDetail + 
732
                         " records deleted from HARVEST_DETAIL_LOG");
733
      System.out.println("  " + recordsDeleted + 
734
                         " records deleted from HARVEST_LOG");
735
    }
736
    catch (SQLException e) {
737
      System.out.println("SQLException: " + e.getMessage());
738
    }
739
  }
740
    
741

    
742
  /**
743
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
744
   * a HarvestSiteSchedule object for each row in the table.
745
   */
746
  private void readHarvestSiteSchedule() {
747
    HarvestSiteSchedule harvestSiteSchedule;
748
    ResultSet rs;
749
    SQLWarning warn;
750
    Statement stmt;
751

    
752
    String contactEmail;
753
    String dateLastHarvest;
754
    String dateNextHarvest;
755
    String documentListURL;
756
    String ldapDN;
757
    String ldapPwd;
758
    int siteScheduleID;
759
    String unit;
760
    int updateFrequency;
761
        
762
    try {
763
      // Read the HARVEST_SITE_SCHEDULE table
764
      stmt = conn.createStatement();
765
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
766
      warn = rs.getWarnings();
767

    
768
      if (warn != null) {
769
        System.out.println("\n---Warning---\n");
770

    
771
        while (warn != null) {
772
          System.out.println("Message: " + warn.getMessage());
773
          System.out.println("SQLState: " + warn.getSQLState());
774
          System.out.print("Vendor error code: ");
775
          System.out.println(warn.getErrorCode());
776
          System.out.println("");
777
          warn = warn.getNextWarning();
778
        }
779
      }
780
     
781
      while (rs.next()) {
782
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
783
        documentListURL = rs.getString("DOCUMENTLISTURL");
784
        ldapDN = rs.getString("LDAPDN");
785
        ldapPwd = rs.getString("LDAPPWD");
786
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
787
        dateLastHarvest = rs.getString("DATELASTHARVEST");
788
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
789
        unit = rs.getString("UNIT");
790
        contactEmail = rs.getString("CONTACT_EMAIL");
791
        
792
        warn = rs.getWarnings();
793

    
794
        if (warn != null) {
795
          System.out.println("\n---Warning---\n");
796
      
797
          while (warn != null) {
798
            System.out.println("Message: " + warn.getMessage());
799
            System.out.println("SQLState: " + warn.getSQLState());
800
            System.out.print("Vendor error code: ");
801
            System.out.println(warn.getErrorCode());
802
            System.out.println("");
803
            warn = warn.getNextWarning();
804
          }
805
        }
806
      
807
        harvestSiteSchedule = new HarvestSiteSchedule(this,
808
                                                      siteScheduleID,
809
                                                      documentListURL,
810
                                                      ldapDN,
811
                                                      ldapPwd,
812
                                                      dateNextHarvest,
813
                                                      dateLastHarvest,
814
                                                      updateFrequency,
815
                                                      unit,
816
                                                      contactEmail
817
                                                     );
818
        harvestSiteScheduleList.add(harvestSiteSchedule);
819
      }
820
      
821
      rs.close();
822
      stmt.close();
823
    }
824
    catch (SQLException e) {
825
      System.out.println("Database access failed " + e);
826
      System.exit(1);
827
    }
828
    
829
  }
830
    
831

    
832
  /**
833
   * Sends a report to the Harvester Administrator. The report prints each log
834
   * entry pertaining to this harvest run.
835
   *
836
   * @param maxCodeLevel  the maximum code level that should be printed,
837
   *                      e.g. "warning". Any log entries higher than this
838
   *                      level will not be printed.
839
   */
840
  void reportToAdministrator(String maxCodeLevel) {
841
    PrintStream body;
842
    String from = harvesterAdministrator;
843
    String[] fromArray;
844
    MailMessage msg;
845
    int siteScheduleID = 0;
846
    String subject = "Report from Metacat Harvester: " + timestamp;
847
    String to = harvesterAdministrator;
848
    
849
    if (!to.equals("")) {
850
      System.out.println("Sending report to Harvester Administrator at address "
851
                         + harvesterAdministrator);
852
      
853
      try {
854
        msg = new MailMessage(smtpServer);
855

    
856
        if (from.indexOf(',') > 0) {
857
          fromArray = from.split(",");
858
          
859
          for (int i = 0; i < fromArray.length; i++) {
860
            if (i == 0) {
861
              msg.from(fromArray[i]);
862
            }
863
            
864
            msg.to(fromArray[i]);            
865
          }
866
        }
867
        else if (from.indexOf(';') > 0) {
868
          fromArray = from.split(";");
869

    
870
          for (int i = 0; i < fromArray.length; i++) {
871
            if (i == 0) {
872
              msg.from(fromArray[i]);
873
            }
874
            
875
            msg.to(fromArray[i]);            
876
          }
877
        }
878
        else {
879
          msg.from(from);
880
          msg.to(to);
881
        }
882
        
883
        msg.setSubject(subject);
884
        body = msg.getPrintStream();
885
        printHarvestHeader(body, siteScheduleID);
886
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
887
        msg.sendAndClose();
888
      }
889
      catch (IOException e) {
890
        System.out.println("There was a problem sending email to " + to);
891
        System.out.println("IOException: " + e.getMessage());
892
      }
893
    }
894
  }
895
  
896

    
897
  /**
898
   * Sets the harvest start time for this harvest run.
899
   * 
900
   * @param date
901
   */
902
  public void setHarvestStartTime(Date date) {
903
    harvestStartTime = date;
904
  }
905
    
906

    
907
  /**
908
   * Shuts down Harvester. Performs cleanup operations such as logging out
909
   * of Metacat and disconnecting from the database.
910
   */
911
  private void shutdown() {
912
    String maxCodeLevel = "debug";  // Print all log entries from level 1
913
                                    // ("error") to level 5 ("debug")
914
    int siteScheduleID = 0;
915

    
916
    // Log shutdown operation
917
    System.out.println("Shutting Down Harvester");
918
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
919
    pruneHarvestLog();
920
    closeConnection();
921
    // Print log to standard output and then email the Harvester administrator
922
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
923
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
924
  }
925
    
926

    
927
  /**
928
   * Initializes Harvester at startup. Connects to the database and to Metacat.
929
   * 
930
   * @param nHarvests        the nth harvest
931
   * @param maxHarvests      the maximum number of harvests that this process
932
   *                         can run
933
   */
934
  private void startup(int nHarvests, int maxHarvests) {
935
    Boolean ctm;
936
    String httpserver;
937
    Integer lp;
938
    String metacatURL;
939
    Date now = new Date();
940
    String servletPath;
941
    
942
    timestamp = now.toString();
943
    System.out.println(Harvester.marker);
944
    System.out.println(timestamp + ": Starting Next Harvest (" +
945
                       nHarvests + "/" + maxHarvests + ")");
946
    ctm = Boolean.valueOf(options.getOption("connectToMetacat"));
947
    connectToMetacat = ctm.booleanValue();
948
    harvesterAdministrator = options.getOption("harvesterAdministrator");
949
    smtpServer = options.getOption("smtpServer");
950

    
951
    try {
952
      lp = Integer.valueOf(options.getOption("logPeriod"));
953
      logPeriod = lp.intValue();
954
    }
955
    catch (NumberFormatException e) {
956
      System.err.println("NumberFormatException: Error parsing logPeriod " +
957
                         logPeriod + e.getMessage());
958
      System.err.println("Defaulting to logPeriod of 90 days");
959
      logPeriod = 90;
960
    }
961

    
962
    conn = getConnection();
963
    initLogIDs();
964
    setHarvestStartTime(now);
965
    // Log startup operation
966
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
967
      
968
    if (connectToMetacat()) {      
969
      try {
970
        httpserver = options.getOption("httpserver");
971
        servletPath = options.getOption("servletpath");
972
        metacatURL = httpserver + servletPath;
973
        System.out.println("Connecting to Metacat: " + metacatURL);
974
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
975
      } 
976
      catch (MetacatInaccessibleException e) {
977
        System.out.println("Metacat connection failed." + e.getMessage());
978
      } 
979
      catch (Exception e) {
980
        System.out.println("Metacat connection failed." + e.getMessage());
981
      }
982
    }
983
  }
984

    
985
}
(6-6/11)