Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: daigle $'
7
 *     '$Date: 2008-07-06 21:25:34 -0700 (Sun, 06 Jul 2008) $'
8
 * '$Revision: 4080 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import edu.ucsb.nceas.utilities.Options;
29
import java.io.File;
30
import java.io.IOException;
31
import java.io.PrintStream;
32
import java.sql.Connection;
33
import java.sql.DriverManager;
34
import java.sql.ResultSet;
35
import java.sql.SQLException;
36
import java.sql.SQLWarning;
37
import java.sql.Statement;
38
import java.util.ArrayList;
39
import java.text.SimpleDateFormat;
40
import java.util.Date;
41

    
42

    
43
import edu.ucsb.nceas.metacat.client.Metacat;
44
import edu.ucsb.nceas.metacat.client.MetacatFactory;
45
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
46
import edu.ucsb.nceas.metacat.util.SystemUtil;
47

    
48
/**
49
 * Harvester is the main class for the Harvester application. The main
50
 * method creates a single Harvester object which drives the application.
51
 * 
52
 * @author    costa
53
 * 
54
 */
55
public class Harvester {
56

    
57
  /*
58
   * Class fields
59
   */
60
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
61
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
62
  private static final String CONFIG_NAME = "metacat.properties";
63
  public static final String filler = "*";
64
  private static boolean keepRunning = true;
65
  public static final String marker =
66
"*****************************************************************************";
67
  public static Options options = null;
68
  private static String schemaLocation = null;
69
   
70

    
71
  /* 
72
   * Class methods
73
   */
74
   
75

    
76
  /**
77
   * Constructor. Creates a new instance of Harvester.
78
   */
79
  public Harvester() {
80
  }
81
    
82

    
83
  /**
84
   * Loads Harvester options from a configuration file.
85
   */
86
  public static void loadOptions(boolean test) {
87
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
88
    File propertyFile = new File(configDir, CONFIG_NAME);
89

    
90
    try {
91
      options = Options.initialize(propertyFile);
92
    } 
93
    catch (IOException e) {
94
      System.out.println("Error in loading options: " + e.getMessage());
95
    }
96
  }
97
  
98
  
99
  /**
100
    * Harvester main method.
101
    * 
102
    * @param args        the command line arguments
103
    * @throws SAXException
104
    * @throws IOException
105
    * @throws ParserConfigurationException
106
    */
107
  public static void main(String[] args) {
108
    Integer delayDefault = new Integer(0); // Default number of hours delay
109
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
110
    Integer d;                            // Used for determining delay
111
    long delta;                           // endTime - startTime
112
    long endTime;                         // time that a harvest completes
113
    Harvester harvester;                  // object for a single harvest run
114
    Integer maxHarvestsDefault = new Integer(0);     // Default max harvests
115
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
116
    Integer mh;                              // used in determining max harvests
117
    int nHarvests = 0;                      // counts the number of harvest runs
118
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
119
    Integer periodDefault = new Integer(24); // Default hours between harvests
120
    int period = periodDefault.intValue();   // Hours between harvests
121
    Integer p;                               // Used in determining the period
122
    long startTime;                          // time that a harvest run starts
123
    boolean test = false;                    // Passed to loadOption()
124
    
125
    if (args.length > 0) {
126
      schemaLocation = args[0];
127
      System.err.println("schemaLocation: " + schemaLocation);
128
      
129
      try {
130
        Thread.sleep(10000);
131
      } 
132
      catch (InterruptedException e) {
133
        e.printStackTrace();
134
      }
135
    }
136

    
137
    System.out.println(marker);
138
    System.out.println("Starting Harvester");
139
    Harvester.loadOptions(test);
140

    
141
    // Parse the delay property. Use default if necessary.    
142
    try {
143
      d = Integer.valueOf(options.getOption("delay"));
144
      delay = d.intValue();
145
    }
146
    catch (NumberFormatException e) {
147
      System.out.println("NumberFormatException: Error parsing delay: " +
148
                         e.getMessage());
149
      System.out.println("Defaulting to delay=" + delayDefault);
150
      delay = delayDefault.intValue();
151
    }
152

    
153
    // Parse the maxHarvests property. Use default if necessary.    
154
    try {
155
      mh = Integer.valueOf(options.getOption("maxHarvests"));
156
      maxHarvests = mh.intValue();
157
    }
158
    catch (NumberFormatException e) {
159
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
160
                         e.getMessage());
161
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
162
      maxHarvests = maxHarvestsDefault.intValue();
163
    }
164

    
165
    // Parse the period property. Use default if necessary.    
166
    try {
167
      p = Integer.valueOf(options.getOption("period"));
168
      period = p.intValue();
169
    }
170
    catch (NumberFormatException e) {
171
      System.out.println("NumberFormatException: Error parsing period: " +
172
                         e.getMessage());
173
      System.out.println("Defaulting to period=" + periodDefault);
174
      period = periodDefault.intValue();
175
    }
176
    
177
    // Sleep for delay number of hours prior to starting first harvest
178
    if (delay > 0) {
179
      try {
180
        System.out.print("First harvest will begin in " + delay);
181
        if (delay == 1) {
182
          System.out.println(" hour.");
183
        }
184
        else {
185
          System.out.println(" hours.");
186
        }
187
        Thread.sleep(delay * oneHour);
188
      }
189
      catch (InterruptedException e) {
190
        System.err.println("InterruptedException: " + e.getMessage());
191
        System.exit(1);
192
      }
193
    }
194

    
195
    // Repeat a new harvest once every period number of hours, until we reach
196
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
197
    // Subtract delta from the time period so 
198
    // that each harvest will start at a fixed interval.
199
    //
200
    while (keepRunning && ((nHarvests < maxHarvests) || (maxHarvests <= 0))) {
201
      nHarvests++;
202
      startTime = System.currentTimeMillis();
203
      harvester = new Harvester();                // New object for this harvest
204
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
205
      harvester.readHarvestSiteSchedule();        // Read the database table
206
      harvester.harvest();                        // Harvest the documents
207
      harvester.shutdown();                       // Shut down Harvester
208
      endTime = System.currentTimeMillis();
209
      delta = endTime - startTime;
210

    
211
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
212
        try {
213
          System.out.println("Next harvest will begin in " + 
214
                             period + " hours.");
215
          Thread.sleep((period * oneHour) - delta);
216
        }
217
        catch (InterruptedException e) {
218
          System.err.println("InterruptedException: " + e.getMessage());
219
          System.exit(1);
220
        }
221
      }
222
    }
223
  }
224
  
225
  
226
  /**
227
   * Set the keepRunning flag. If set to false, the main program will end
228
   * the while loop that keeps harvester running every period number of hours.
229
   * The static method is intended to be called from the HarvesterServlet class
230
   * which creates a thread to run Harvester. When the thread is destroyed, the
231
   * thread's destroy() method calls Harvester.setKeepRunning(false).
232
   * 
233
   * @param keepRunning
234
   */
235
  static void setKeepRunning(boolean keepRunning) {
236
    Harvester.keepRunning = keepRunning;
237
  }
238

    
239
  
240
  /*
241
   * Object fields
242
   */
243

    
244
  /** Database connection */
245
  private Connection conn = null;
246
  
247
  /** Used during development to determine whether to connect to metacat 
248
   *  Sometimes it's useful to test parts of the code without actually
249
   *  connecting to Metacat.
250
   */
251
  private boolean connectToMetacat;
252

    
253
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
254
  private int detailLogID;
255
  
256
  /** Email address of the Harvester Administrator */
257
  String harvesterAdministrator;
258
  
259
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
260
  private int harvestLogID;
261
  
262
  /** End time of this harvest session */
263
  private Date harvestEndTime;
264
  
265
  /** List of HarvestLog objects. Stores log entries for report generation. */
266
  private ArrayList harvestLogList = new ArrayList();
267
  
268
  /** List of HarvestSiteSchedule objects */
269
  private ArrayList harvestSiteScheduleList = new ArrayList();
270
  
271
  /** Start time of this harvest session */
272
  private Date harvestStartTime;
273
  
274
  /** Number of days to save log records. Any that are older are purged. */
275
  int logPeriod;
276
  
277
  /** Metacat client object */
278
  Metacat metacat;
279
  
280
  /** SMTP server for sending mail messages */
281
  String smtpServer;
282
  
283
  /** The timestamp for this harvest run. Used for output only. */
284
  String timestamp;
285
  
286

    
287
  /*
288
   * Object methods
289
   */
290
   
291
  /**
292
   * Creates a new HarvestLog object and adds it to the harvestLogList.
293
   * 
294
   * @param  status          the status of the harvest operation
295
   * @param  message         the message text of the harvest operation
296
   * @param  harvestOperationCode  the harvest operation code
297
   * @param  siteScheduleID  the siteScheduleID for which this operation was
298
   *                         performed. 0 indicates that the operation did not
299
   *                         involve a particular harvest site.
300
   * @param  harvestDocument the associated HarvestDocument object. May be null.
301
   * @param  errorMessage    additional error message pertaining to document
302
   *                         error.
303
   */
304
  void addLogEntry(int    status,
305
                   String message,
306
                   String harvestOperationCode,
307
                   int    siteScheduleID,
308
                   HarvestDocument harvestDocument,
309
                   String errorMessage
310
                  ) {
311
    HarvestLog harvestLog;
312
    int harvestLogID = getHarvestLogID();
313
    int detailLogID;
314

    
315
    /* If there is no associated harvest document, call the basic constructor;
316
     * else call the extended constructor.
317
     */
318
    if (harvestDocument == null) {    
319
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
320
                                  status, message, harvestOperationCode, 
321
                                  siteScheduleID);
322
    }
323
    else {
324
      detailLogID = getDetailLogID();
325
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
326
                                  harvestStartTime, status, message,
327
                                  harvestOperationCode, siteScheduleID,
328
                                  harvestDocument, errorMessage);
329
    }
330
    
331
    harvestLogList.add(harvestLog);
332
  }
333
  
334
  
335
  public void closeConnection() {
336
    try {
337
      // Close the database connection
338
      System.out.println("Closing the database connection.");
339
      conn.close();
340
    }
341
    catch (SQLException e) {
342
      System.out.println("Database access failed " + e);
343
    }    
344
  }
345

    
346

    
347
  /**
348
   * Determines whether Harvester should attempt to connect to Metacat.
349
   * Used during development and testing.
350
   * 
351
   * @return     true if Harvester should connect, otherwise false
352
   */
353
  boolean connectToMetacat () {
354
    return connectToMetacat;
355
  }
356
  
357

    
358
  /**
359
   * Normalizes text prior to insertion into the HARVEST_LOG or
360
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
361
   * character with the double quote character. This prevents SQL errors
362
   * involving words that contain single quotes. Also removes \n and \r
363
   * characters from the text.
364
   * 
365
   * @param text  the original string
366
   * @return      a string containing the normalized text
367
   */
368
  public String dequoteText(String text) {
369
    char c;
370
    StringBuffer stringBuffer = new StringBuffer();
371
    
372
    for (int i = 0; i < text.length(); i++) {
373
      c = text.charAt(i);
374
      switch (c) {
375
        case '\'':
376
          stringBuffer.append('\"');
377
          break;
378
        case '\r':
379
        case '\n':
380
          break;
381
        default:
382
          stringBuffer.append(c);
383
          break;
384
      }
385
    }
386
    
387
    return stringBuffer.toString();
388
  }
389
  
390
  /**
391
   * Returns a connection to the database. Opens the connection if a connection
392
   * has not already been made previously.
393
   * 
394
   * @return  conn  the database Connection object
395
   */
396
  public Connection getConnection() {
397
    String dbDriver = "";
398
    String defaultDB;
399
    String password;
400
    String user;
401
    SQLWarning warn;
402
    
403
    if (conn == null) {
404
      dbDriver = options.getOption("database.driver");
405
      defaultDB = options.getOption("database.connectionURI");
406
      password = options.getOption("database.password");
407
      user = options.getOption("database.user");
408

    
409
      // Load the jdbc driver
410
      try {
411
        Class.forName(dbDriver);
412
      }
413
      catch (ClassNotFoundException e) {
414
        System.out.println("Can't load driver " + e);
415
        System.exit(1);
416
      } 
417

    
418
      // Make the database connection
419
      try {
420
        System.out.println("Getting connection to Harvester tables");
421
        conn = DriverManager.getConnection(defaultDB, user, password);
422

    
423
        // If a SQLWarning object is available, print its warning(s).
424
        // There may be multiple warnings chained.
425
        warn = conn.getWarnings();
426
      
427
        if (warn != null) {
428
          while (warn != null) {
429
            System.out.println("SQLState: " + warn.getSQLState());
430
            System.out.println("Message:  " + warn.getMessage());
431
            System.out.println("Vendor: " + warn.getErrorCode());
432
            System.out.println("");
433
            warn = warn.getNextWarning();
434
          }
435
        }
436
      }
437
      catch (SQLException e) {
438
        System.out.println("Database access failed " + e);
439
        System.exit(1);
440
      }
441
    }
442
    
443
    return conn;
444
  }
445

    
446

    
447
  /**
448
   * Gets the current value of the detailLogID for storage as a primary key in
449
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
450
   * 
451
   * @return  the current value of the detailLogID
452
   */
453
  public int getDetailLogID() {
454
    int currentValue = detailLogID;
455
    
456
    detailLogID++;
457
    return currentValue;
458
  }
459
  
460
  
461
  /**
462
   * Gets the current value of the harvestLogID for storage as a primary key in
463
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
464
   * 
465
   * @return  the current value of the detailLogID
466
   */
467
  public int getHarvestLogID() {
468
    int currentValue = harvestLogID;
469
    
470
    harvestLogID++;
471
    return currentValue;
472
  }
473
  
474

    
475
  /** 
476
   * Gets the maximum value of an integer field from a table.
477
   * 
478
   * @param tableName  the database table name
479
   * @param fieldName  the field name of the integer field in the table
480
   * @return  the maximum integer stored in the fieldName field of tableName
481
   */
482
  private int getMaxValue(String tableName, String fieldName) {
483
    int maxValue = 0;
484
    int fieldValue;
485
    String query = "SELECT " + fieldName + " FROM " + tableName;
486
    Statement stmt;
487
    
488
	try {
489
      stmt = conn.createStatement();
490
      ResultSet rs = stmt.executeQuery(query);
491
	
492
      while (rs.next()) {
493
        fieldValue = rs.getInt(fieldName);
494
        maxValue = Math.max(maxValue, fieldValue);
495
      }
496
      
497
      stmt.close();
498
    } 
499
    catch(SQLException ex) {
500
      System.out.println("SQLException: " + ex.getMessage());
501
    }
502
    
503
    return maxValue;
504
  }
505
  
506
  
507
  /** 
508
   * Gets the minimum value of an integer field from a table.
509
   * 
510
   * @param tableName  the database table name
511
   * @param fieldName  the field name of the integer field in the table
512
   * @return  the minimum integer stored in the fieldName field of tableName
513
   */
514
  private int getMinValue(String tableName, String fieldName) {
515
    int minValue = 0;
516
    int fieldValue;
517
    String query = "SELECT " + fieldName + " FROM " + tableName;
518
    Statement stmt;
519
    
520
    try {
521
      stmt = conn.createStatement();
522
      ResultSet rs = stmt.executeQuery(query);
523
	
524
      while (rs.next()) {
525
        fieldValue = rs.getInt(fieldName);
526

    
527
        if (minValue == 0) {
528
          minValue = fieldValue;
529
        }
530
        else {
531
          minValue = Math.min(minValue, fieldValue);
532
        }
533
      }
534
      
535
      stmt.close();
536
    } 
537
    catch(SQLException ex) {
538
      System.out.println("SQLException: " + ex.getMessage());
539
    }
540

    
541
    return minValue;
542
  }
543
  
544
  
545
  /**
546
   * For every Harvest site schedule in the database, harvest the
547
   * documents for that site if they are due to be harvested.
548
   * 
549
   * @throws SAXException
550
   * @throws IOException
551
   * @throws ParserConfigurationException
552
   */
553
  private void harvest() {
554
    HarvestSiteSchedule harvestSiteSchedule;
555

    
556
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
557
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
558
      
559
      if (Harvester.schemaLocation != null) {
560
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
561
      }
562
      
563
      harvestSiteSchedule.harvestDocumentList();
564
    }
565
  }
566
  
567
  
568
  /**
569
   * Initializes the detailLogID and harvestLogID values to their current
570
   * maximums + 1.
571
   */
572
  public void initLogIDs() {
573
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
574
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
575
  }
576
  
577

    
578
  /**
579
   * Prints the header of the harvest report.
580
   * 
581
   * @param out            the PrintStream object to print to
582
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
583
   *                       have a value of 0 if no particular site is involved,
584
   *                       which indicates that the report is being prepared
585
   *                       for the Harvester Administrator rather than for a
586
   *                       particular Site Contact.
587
   */
588
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
589
    HarvestLog harvestLog;
590
    int logSiteScheduleID;
591
    int nErrors = 0;
592
    String phrase;
593
    
594
    for (int i = 0; i < harvestLogList.size(); i++) {
595
      harvestLog = (HarvestLog) harvestLogList.get(i);
596
      logSiteScheduleID = harvestLog.getSiteScheduleID();
597
      
598
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
599
        if (harvestLog.isErrorEntry()) {
600
          nErrors++;
601
        }
602
      }      
603
    }
604

    
605
    out.println(marker);
606
    out.println(filler);
607
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
608
    out.println(filler);
609

    
610
    if (nErrors > 0) {
611
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
612
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
613
      out.println("* Please see the log entries below for additonal details.");
614
    }
615
    else {
616
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
617
    }
618
    
619
    out.println(filler);
620
    out.println(marker);
621
  }
622
    
623

    
624
  /**
625
   * Prints harvest log entries for this harvest run. Entries may be filtered
626
   * for a particular site, or all entries may be printed.
627
   * 
628
   * @param out            the PrintStream object to write to
629
   * @param maxCodeLevel   the maximum code level that should be printed,
630
   *                       e.g. "warning". Any log entries higher than this
631
   *                       level will not be printed.
632
   * @param siteScheduleID if greater than 0, indicates that the log
633
   *                       entry should only be printed for a particular site
634
   *                       as identified by its siteScheduleID. if 0, then
635
   *                       print output for all sites.
636
   */
637
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
638
                      ) {
639
    HarvestLog harvestLog;
640
    int logSiteScheduleID;
641
    int nErrors = 0;
642
    String phrase;
643
    
644
    out.println("");
645
    out.println(marker);
646
    out.println(filler);
647
    out.println("*                       LOG ENTRIES");
648
    out.println(filler);
649
    out.println(marker);
650

    
651
    for (int i = 0; i < harvestLogList.size(); i++) {
652
      harvestLog = (HarvestLog) harvestLogList.get(i);
653
      logSiteScheduleID = harvestLog.getSiteScheduleID();
654
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
655
        harvestLog.printOutput(out, maxCodeLevel);
656
      }
657
    }
658
  }
659
    
660

    
661
  /**
662
   * Prints the site schedule data for a given site.
663
   * 
664
   * @param out              the PrintStream to write to
665
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
666
   */
667
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
668
    HarvestSiteSchedule harvestSiteSchedule;
669

    
670
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
671
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
672
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
673
        harvestSiteSchedule.printOutput(out);
674
      }
675
    }
676
  }
677
  
678

    
679
  /**
680
   * Prunes old records from the HARVEST_LOG table. Records are removed if
681
   * their HARVEST_DATE is older than a given number of days, as stored in the
682
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
683
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
684
   */
685
  private void pruneHarvestLog() {
686
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
687
    Date dateLastLog;                    // Prune everything prior to this date
688
    String deleteString;
689
    String deleteStringDetailLog;
690
    long delta;
691
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
692
    int recordsDeleted;
693
    int recordsDeletedDetail = 0;
694
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
695
    String dateString;
696
    ResultSet rs;
697
    String selectString;
698
    Statement stmt;
699
    long timeLastLog = 0;
700
    SQLWarning warn;
701
     
702
    delta = logPeriod * millisecondsPerDay;
703
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
704
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
705
    deleteStringDetailLog = 
706
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
707
    timeLastLog = currentTime - delta;
708
    dateLastLog = new Date(timeLastLog);
709
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
710
    deleteString += dateString;
711
    selectString += dateString;
712

    
713
    try {
714
      System.out.println(
715
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
716

    
717
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
718
       * table.
719
       */
720
      stmt = conn.createStatement();                            
721
      rs = stmt.executeQuery(selectString);
722
      warn = rs.getWarnings();
723

    
724
      if (warn != null) {
725
        System.out.println("\n---Warning---\n");
726

    
727
        while (warn != null) {
728
          System.out.println("Message: " + warn.getMessage());
729
          System.out.println("SQLState: " + warn.getSQLState());
730
          System.out.print("Vendor error code: ");
731
          System.out.println(warn.getErrorCode());
732
          System.out.println("");
733
          warn = warn.getNextWarning();
734
        }
735
      } 
736

    
737
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
738
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
739
       * be pruned first because its records have a child relationship to those
740
       * in HARVEST_LOG.
741
       */
742
      while (rs.next()) {
743
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
744
        stmt = conn.createStatement();                            
745
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
746
                                            harvestLogID);
747
        recordsDeletedDetail += recordsDeleted;
748
        stmt.close();
749
      }
750
 
751
      /* Now prune entries from the HARVEST_LOG table using a single update.
752
       */
753
      stmt = conn.createStatement();                            
754
      recordsDeleted = stmt.executeUpdate(deleteString);
755
      stmt.close();
756

    
757
      System.out.println("  " + recordsDeletedDetail + 
758
                         " records deleted from HARVEST_DETAIL_LOG");
759
      System.out.println("  " + recordsDeleted + 
760
                         " records deleted from HARVEST_LOG");
761
    }
762
    catch (SQLException e) {
763
      System.out.println("SQLException: " + e.getMessage());
764
    }
765
  }
766
    
767

    
768
  /**
769
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
770
   * a HarvestSiteSchedule object for each row in the table.
771
   */
772
  private void readHarvestSiteSchedule() {
773
    HarvestSiteSchedule harvestSiteSchedule;
774
    ResultSet rs;
775
    SQLWarning warn;
776
    Statement stmt;
777

    
778
    String contactEmail;
779
    String dateLastHarvest;
780
    String dateNextHarvest;
781
    String documentListURL;
782
    String ldapDN;
783
    String ldapPwd;
784
    int siteScheduleID;
785
    String unit;
786
    int updateFrequency;
787
        
788
    try {
789
      // Read the HARVEST_SITE_SCHEDULE table
790
      stmt = conn.createStatement();
791
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
792
      warn = rs.getWarnings();
793

    
794
      if (warn != null) {
795
        System.out.println("\n---Warning---\n");
796

    
797
        while (warn != null) {
798
          System.out.println("Message: " + warn.getMessage());
799
          System.out.println("SQLState: " + warn.getSQLState());
800
          System.out.print("Vendor error code: ");
801
          System.out.println(warn.getErrorCode());
802
          System.out.println("");
803
          warn = warn.getNextWarning();
804
        }
805
      }
806
     
807
      while (rs.next()) {
808
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
809
        documentListURL = rs.getString("DOCUMENTLISTURL");
810
        ldapDN = rs.getString("LDAPDN");
811
        ldapPwd = rs.getString("LDAPPWD");
812
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
813
        dateLastHarvest = rs.getString("DATELASTHARVEST");
814
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
815
        unit = rs.getString("UNIT");
816
        contactEmail = rs.getString("CONTACT_EMAIL");
817
        
818
        warn = rs.getWarnings();
819

    
820
        if (warn != null) {
821
          System.out.println("\n---Warning---\n");
822
      
823
          while (warn != null) {
824
            System.out.println("Message: " + warn.getMessage());
825
            System.out.println("SQLState: " + warn.getSQLState());
826
            System.out.print("Vendor error code: ");
827
            System.out.println(warn.getErrorCode());
828
            System.out.println("");
829
            warn = warn.getNextWarning();
830
          }
831
        }
832
      
833
        harvestSiteSchedule = new HarvestSiteSchedule(this,
834
                                                      siteScheduleID,
835
                                                      documentListURL,
836
                                                      ldapDN,
837
                                                      ldapPwd,
838
                                                      dateNextHarvest,
839
                                                      dateLastHarvest,
840
                                                      updateFrequency,
841
                                                      unit,
842
                                                      contactEmail
843
                                                     );
844
        harvestSiteScheduleList.add(harvestSiteSchedule);
845
      }
846
      
847
      rs.close();
848
      stmt.close();
849
    }
850
    catch (SQLException e) {
851
      System.out.println("Database access failed " + e);
852
      System.exit(1);
853
    }
854
    
855
  }
856
    
857

    
858
  /**
859
   * Sends a report to the Harvester Administrator. The report prints each log
860
   * entry pertaining to this harvest run.
861
   *
862
   * @param maxCodeLevel  the maximum code level that should be printed,
863
   *                      e.g. "warning". Any log entries higher than this
864
   *                      level will not be printed.
865
   */
866
  void reportToAdministrator(String maxCodeLevel) {
867
    PrintStream body;
868
    String from = harvesterAdministrator;
869
    String[] fromArray;
870
    MailMessage msg;
871
    int siteScheduleID = 0;
872
    String subject = "Report from Metacat Harvester: " + timestamp;
873
    String to = harvesterAdministrator;
874
    
875
    if (!to.equals("")) {
876
      System.out.println("Sending report to Harvester Administrator at address "
877
                         + harvesterAdministrator);
878
      
879
      try {
880
        msg = new MailMessage(smtpServer);
881

    
882
        if (from.indexOf(',') > 0) {
883
          fromArray = from.split(",");
884
          
885
          for (int i = 0; i < fromArray.length; i++) {
886
            if (i == 0) {
887
              msg.from(fromArray[i]);
888
            }
889
            
890
            msg.to(fromArray[i]);            
891
          }
892
        }
893
        else if (from.indexOf(';') > 0) {
894
          fromArray = from.split(";");
895

    
896
          for (int i = 0; i < fromArray.length; i++) {
897
            if (i == 0) {
898
              msg.from(fromArray[i]);
899
            }
900
            
901
            msg.to(fromArray[i]);            
902
          }
903
        }
904
        else {
905
          msg.from(from);
906
          msg.to(to);
907
        }
908
        
909
        msg.setSubject(subject);
910
        body = msg.getPrintStream();
911
        printHarvestHeader(body, siteScheduleID);
912
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
913
        msg.sendAndClose();
914
      }
915
      catch (IOException e) {
916
        System.out.println("There was a problem sending email to " + to);
917
        System.out.println("IOException: " + e.getMessage());
918
      }
919
    }
920
  }
921
  
922

    
923
  /**
924
   * Sets the harvest start time for this harvest run.
925
   * 
926
   * @param date
927
   */
928
  public void setHarvestStartTime(Date date) {
929
    harvestStartTime = date;
930
  }
931
    
932

    
933
  /**
934
   * Shuts down Harvester. Performs cleanup operations such as logging out
935
   * of Metacat and disconnecting from the database.
936
   */
937
  private void shutdown() {
938
    String maxCodeLevel = "debug";  // Print all log entries from level 1
939
                                    // ("error") to level 5 ("debug")
940
    int siteScheduleID = 0;
941

    
942
    // Log shutdown operation
943
    System.out.println("Shutting Down Harvester");
944
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
945
    pruneHarvestLog();
946
    closeConnection();
947
    // Print log to standard output and then email the Harvester administrator
948
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
949
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
950
  }
951
    
952

    
953
  /**
954
   * Initializes Harvester at startup. Connects to the database and to Metacat.
955
   * 
956
   * @param nHarvests        the nth harvest
957
   * @param maxHarvests      the maximum number of harvests that this process
958
   *                         can run
959
   */
960
  private void startup(int nHarvests, int maxHarvests) {
961
    Boolean ctm;
962
    Integer lp;
963
    String metacatURL;
964
    Date now = new Date();
965
    
966
    timestamp = now.toString();
967
    System.out.println(Harvester.marker);
968
    System.out.print(timestamp + ": Starting Next Harvest");
969
    if (maxHarvests > 0) {
970
      System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
971
    }
972
    System.out.print("\n");
973
    ctm = Boolean.valueOf(options.getOption("connectToMetacat"));
974
    connectToMetacat = ctm.booleanValue();
975
    harvesterAdministrator = options.getOption("harvesterAdministrator");
976
    smtpServer = options.getOption("smtpServer");
977

    
978
    try {
979
      lp = Integer.valueOf(options.getOption("logPeriod"));
980
      logPeriod = lp.intValue();
981
    }
982
    catch (NumberFormatException e) {
983
      System.err.println("NumberFormatException: Error parsing logPeriod " +
984
                         logPeriod + e.getMessage());
985
      System.err.println("Defaulting to logPeriod of 90 days");
986
      logPeriod = 90;
987
    }
988

    
989
    conn = getConnection();
990
    initLogIDs();
991
    setHarvestStartTime(now);
992
    // Log startup operation
993
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
994
      
995
    if (connectToMetacat()) {      
996
      try {
997
        metacatURL = SystemUtil.getServletURL();
998
        System.out.println("Connecting to Metacat: " + metacatURL);
999
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
1000
      } 
1001
      catch (MetacatInaccessibleException e) {
1002
        System.out.println("Metacat connection failed." + e.getMessage());
1003
      } 
1004
      catch (Exception e) {
1005
        System.out.println("Metacat connection failed." + e.getMessage());
1006
      }
1007
    }
1008
  }
1009

    
1010
}
(6-6/11)