Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2005-03-22 09:53:09 -0800 (Tue, 22 Mar 2005) $'
8
 * '$Revision: 2426 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import edu.ucsb.nceas.utilities.Options;
29
import java.io.File;
30
import java.io.IOException;
31
import java.io.PrintStream;
32
import java.sql.Connection;
33
import java.sql.DriverManager;
34
import java.sql.ResultSet;
35
import java.sql.SQLException;
36
import java.sql.SQLWarning;
37
import java.sql.Statement;
38
import java.util.ArrayList;
39
import java.text.SimpleDateFormat;
40
import java.util.Date;
41

    
42
import edu.ucsb.nceas.metacat.client.Metacat;
43
import edu.ucsb.nceas.metacat.client.MetacatFactory;
44
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
45

    
46
/**
47
 * Harvester is the main class for the Harvester application. The main
48
 * method creates a single Harvester object which drives the application.
49
 * 
50
 * @author    costa
51
 * 
52
 */
53
public class Harvester {
54

    
55
  /*
56
   * Class fields
57
   */
58
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
59
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
60
  private static final String CONFIG_NAME = "metacat.properties";
61
  public static final String filler = "*";
62
  public static final String marker =
63
"*****************************************************************************";
64
  public static Options options = null;
65
  private static String schemaLocation = null;
66
   
67

    
68
  /* 
69
   * Class methods
70
   */
71
   
72

    
73
  /**
74
   * Constructor. Creates a new instance of Harvester.
75
   */
76
  public Harvester() {
77
  }
78
    
79

    
80
  /**
81
   * Loads Harvester options from a configuration file.
82
   */
83
  public static void loadOptions(boolean test) {
84
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
85
    File propertyFile = new File(configDir, CONFIG_NAME);
86

    
87
    try {
88
      options = Options.initialize(propertyFile);
89
    } 
90
    catch (IOException e) {
91
      System.out.println("Error in loading options: " + e.getMessage());
92
    }
93
  }
94
  
95
  
96
  /**
97
    * Harvester main method.
98
    * 
99
    * @param args        the command line arguments
100
    * @throws SAXException
101
    * @throws IOException
102
    * @throws ParserConfigurationException
103
    */
104
  public static void main(String[] args) {
105
    Integer delayDefault = new Integer(0); // Default number of hours delay
106
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
107
    Integer d;                            // Used for determining delay
108
    long delta;                           // endTime - startTime
109
    long endTime;                         // time that a harvest completes
110
    Harvester harvester;                  // object for a single harvest run
111
    Integer maxHarvestsDefault = new Integer(0);     // Default max harvests
112
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
113
    Integer mh;                              // used in determining max harvests
114
    int nHarvests = 0;                      // counts the number of harvest runs
115
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
116
    Integer periodDefault = new Integer(24); // Default hours between harvests
117
    int period = periodDefault.intValue();   // Hours between harvests
118
    Integer p;                               // Used in determining the period
119
    long startTime;                          // time that a harvest run starts
120
    boolean test = false;                    // Passed to loadOption()
121
    
122
    if (args.length > 0) {
123
      schemaLocation = args[0];
124
    }
125

    
126
    System.out.println(marker);
127
    System.out.println("Starting Harvester");
128
    Harvester.loadOptions(test);
129

    
130
    // Parse the delay property. Use default if necessary.    
131
    try {
132
      d = Integer.valueOf(options.getOption("delay"));
133
      delay = d.intValue();
134
    }
135
    catch (NumberFormatException e) {
136
      System.out.println("NumberFormatException: Error parsing delay: " +
137
                         e.getMessage());
138
      System.out.println("Defaulting to delay=" + delayDefault);
139
      delay = delayDefault.intValue();
140
    }
141

    
142
    // Parse the maxHarvests property. Use default if necessary.    
143
    try {
144
      mh = Integer.valueOf(options.getOption("maxHarvests"));
145
      maxHarvests = mh.intValue();
146
    }
147
    catch (NumberFormatException e) {
148
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
149
                         e.getMessage());
150
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
151
      maxHarvests = maxHarvestsDefault.intValue();
152
    }
153

    
154
    // Parse the period property. Use default if necessary.    
155
    try {
156
      p = Integer.valueOf(options.getOption("period"));
157
      period = p.intValue();
158
    }
159
    catch (NumberFormatException e) {
160
      System.out.println("NumberFormatException: Error parsing period: " +
161
                         e.getMessage());
162
      System.out.println("Defaulting to period=" + periodDefault);
163
      period = periodDefault.intValue();
164
    }
165
    
166
    // Sleep for delay number of hours prior to starting first harvest
167
    if (delay > 0) {
168
      try {
169
        System.out.print("First harvest will begin in " + delay);
170
        if (delay == 1) {
171
          System.out.println(" hour.");
172
        }
173
        else {
174
          System.out.println(" hours.");
175
        }
176
        Thread.sleep(delay * oneHour);
177
      }
178
      catch (InterruptedException e) {
179
        System.err.println("InterruptedException: " + e.getMessage());
180
        System.exit(1);
181
      }
182
    }
183

    
184
    // Repeat a new harvest once every period number of hours, until we reach
185
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
186
    // Subtract delta from the time period so 
187
    // that each harvest will start at a fixed interval.
188
    //
189
    while ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
190
      nHarvests++;
191
      startTime = System.currentTimeMillis();
192
      harvester = new Harvester();                // New object for this harvest
193
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
194
      harvester.readHarvestSiteSchedule();        // Read the database table
195
      harvester.harvest();                        // Harvest the documents
196
      harvester.shutdown();                       // Shut down Harvester
197
      endTime = System.currentTimeMillis();
198
      delta = endTime - startTime;
199

    
200
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
201
        try {
202
          System.out.println("Next harvest will begin in " + 
203
                             period + " hours.");
204
          Thread.sleep((period * oneHour) - delta);
205
        }
206
        catch (InterruptedException e) {
207
          System.err.println("InterruptedException: " + e.getMessage());
208
          System.exit(1);
209
        }
210
      }
211
    }
212
  }
213

    
214

    
215
  /*
216
   * Object fields
217
   */
218

    
219
  /** Database connection */
220
  private Connection conn = null;
221
  
222
  /** Used during development to determine whether to connect to metacat 
223
   *  Sometimes it's useful to test parts of the code without actually
224
   *  connecting to Metacat.
225
   */
226
  private boolean connectToMetacat;
227

    
228
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
229
  private int detailLogID;
230
  
231
  /** Email address of the Harvester Administrator */
232
  String harvesterAdministrator;
233
  
234
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
235
  private int harvestLogID;
236
  
237
  /** End time of this harvest session */
238
  private Date harvestEndTime;
239
  
240
  /** List of HarvestLog objects. Stores log entries for report generation. */
241
  private ArrayList harvestLogList = new ArrayList();
242
  
243
  /** List of HarvestSiteSchedule objects */
244
  private ArrayList harvestSiteScheduleList = new ArrayList();
245
  
246
  /** Start time of this harvest session */
247
  private Date harvestStartTime;
248
  
249
  /** Number of days to save log records. Any that are older are purged. */
250
  int logPeriod;
251
  
252
  /** Metacat client object */
253
  Metacat metacat;
254
  
255
  /** SMTP server for sending mail messages */
256
  String smtpServer;
257
  
258
  /** The timestamp for this harvest run. Used for output only. */
259
  String timestamp;
260
  
261

    
262
  /*
263
   * Object methods
264
   */
265
   
266
  /**
267
   * Creates a new HarvestLog object and adds it to the harvestLogList.
268
   * 
269
   * @param  status          the status of the harvest operation
270
   * @param  message         the message text of the harvest operation
271
   * @param  harvestOperationCode  the harvest operation code
272
   * @param  siteScheduleID  the siteScheduleID for which this operation was
273
   *                         performed. 0 indicates that the operation did not
274
   *                         involve a particular harvest site.
275
   * @param  harvestDocument the associated HarvestDocument object. May be null.
276
   * @param  errorMessage    additional error message pertaining to document
277
   *                         error.
278
   */
279
  void addLogEntry(int    status,
280
                   String message,
281
                   String harvestOperationCode,
282
                   int    siteScheduleID,
283
                   HarvestDocument harvestDocument,
284
                   String errorMessage
285
                  ) {
286
    HarvestLog harvestLog;
287
    int harvestLogID = getHarvestLogID();
288
    int detailLogID;
289

    
290
    /* If there is no associated harvest document, call the basic constructor;
291
     * else call the extended constructor.
292
     */
293
    if (harvestDocument == null) {    
294
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
295
                                  status, message, harvestOperationCode, 
296
                                  siteScheduleID);
297
    }
298
    else {
299
      detailLogID = getDetailLogID();
300
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
301
                                  harvestStartTime, status, message,
302
                                  harvestOperationCode, siteScheduleID,
303
                                  harvestDocument, errorMessage);
304
    }
305
    
306
    harvestLogList.add(harvestLog);
307
  }
308
  
309
  
310
  public void closeConnection() {
311
    try {
312
      // Close the database connection
313
      System.out.println("Closing the database connection.");
314
      conn.close();
315
    }
316
    catch (SQLException e) {
317
      System.out.println("Database access failed " + e);
318
    }    
319
  }
320

    
321

    
322
  /**
323
   * Determines whether Harvester should attempt to connect to Metacat.
324
   * Used during development and testing.
325
   * 
326
   * @return     true if Harvester should connect, otherwise false
327
   */
328
  boolean connectToMetacat () {
329
    return connectToMetacat;
330
  }
331
  
332

    
333
  /**
334
   * Normalizes text prior to insertion into the HARVEST_LOG or
335
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
336
   * character with the double quote character. This prevents SQL errors
337
   * involving words that contain single quotes. Also removes \n and \r
338
   * characters from the text.
339
   * 
340
   * @param text  the original string
341
   * @return      a string containing the normalized text
342
   */
343
  public String dequoteText(String text) {
344
    char c;
345
    StringBuffer stringBuffer = new StringBuffer();
346
    
347
    for (int i = 0; i < text.length(); i++) {
348
      c = text.charAt(i);
349
      switch (c) {
350
        case '\'':
351
          stringBuffer.append('\"');
352
          break;
353
        case '\r':
354
        case '\n':
355
          break;
356
        default:
357
          stringBuffer.append(c);
358
          break;
359
      }
360
    }
361
    
362
    return stringBuffer.toString();
363
  }
364
  
365
  /**
366
   * Returns a connection to the database. Opens the connection if a connection
367
   * has not already been made previously.
368
   * 
369
   * @return  conn  the database Connection object
370
   */
371
  public Connection getConnection() {
372
    String dbDriver = "";
373
    String defaultDB;
374
    String password;
375
    String user;
376
    SQLWarning warn;
377
    
378
    if (conn == null) {
379
      dbDriver = options.getOption("dbDriver");
380
      defaultDB = options.getOption("defaultDB");
381
      password = options.getOption("password");
382
      user = options.getOption("user");
383

    
384
      // Load the jdbc driver
385
      try {
386
        Class.forName(dbDriver);
387
      }
388
      catch (ClassNotFoundException e) {
389
        System.out.println("Can't load driver " + e);
390
        System.exit(1);
391
      } 
392

    
393
      // Make the database connection
394
      try {
395
        System.out.println("Getting connection to Harvester tables");
396
        conn = DriverManager.getConnection(defaultDB, user, password);
397

    
398
        // If a SQLWarning object is available, print its warning(s).
399
        // There may be multiple warnings chained.
400
        warn = conn.getWarnings();
401
      
402
        if (warn != null) {
403
          while (warn != null) {
404
            System.out.println("SQLState: " + warn.getSQLState());
405
            System.out.println("Message:  " + warn.getMessage());
406
            System.out.println("Vendor: " + warn.getErrorCode());
407
            System.out.println("");
408
            warn = warn.getNextWarning();
409
          }
410
        }
411
      }
412
      catch (SQLException e) {
413
        System.out.println("Database access failed " + e);
414
        System.exit(1);
415
      }
416
    }
417
    
418
    return conn;
419
  }
420

    
421

    
422
  /**
423
   * Gets the current value of the detailLogID for storage as a primary key in
424
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
425
   * 
426
   * @return  the current value of the detailLogID
427
   */
428
  public int getDetailLogID() {
429
    int currentValue = detailLogID;
430
    
431
    detailLogID++;
432
    return currentValue;
433
  }
434
  
435
  
436
  /**
437
   * Gets the current value of the harvestLogID for storage as a primary key in
438
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
439
   * 
440
   * @return  the current value of the detailLogID
441
   */
442
  public int getHarvestLogID() {
443
    int currentValue = harvestLogID;
444
    
445
    harvestLogID++;
446
    return currentValue;
447
  }
448
  
449

    
450
  /** 
451
   * Gets the maximum value of an integer field from a table.
452
   * 
453
   * @param tableName  the database table name
454
   * @param fieldName  the field name of the integer field in the table
455
   * @return  the maximum integer stored in the fieldName field of tableName
456
   */
457
  private int getMaxValue(String tableName, String fieldName) {
458
    int maxValue = 0;
459
    int fieldValue;
460
    String query = "SELECT " + fieldName + " FROM " + tableName;
461
    Statement stmt;
462
    
463
	try {
464
      stmt = conn.createStatement();
465
      ResultSet rs = stmt.executeQuery(query);
466
	
467
      while (rs.next()) {
468
        fieldValue = rs.getInt(fieldName);
469
        maxValue = Math.max(maxValue, fieldValue);
470
      }
471
      
472
      stmt.close();
473
    } 
474
    catch(SQLException ex) {
475
      System.out.println("SQLException: " + ex.getMessage());
476
    }
477
    
478
    return maxValue;
479
  }
480
  
481
  
482
  /** 
483
   * Gets the minimum value of an integer field from a table.
484
   * 
485
   * @param tableName  the database table name
486
   * @param fieldName  the field name of the integer field in the table
487
   * @return  the minimum integer stored in the fieldName field of tableName
488
   */
489
  private int getMinValue(String tableName, String fieldName) {
490
    int minValue = 0;
491
    int fieldValue;
492
    String query = "SELECT " + fieldName + " FROM " + tableName;
493
    Statement stmt;
494
    
495
    try {
496
      stmt = conn.createStatement();
497
      ResultSet rs = stmt.executeQuery(query);
498
	
499
      while (rs.next()) {
500
        fieldValue = rs.getInt(fieldName);
501

    
502
        if (minValue == 0) {
503
          minValue = fieldValue;
504
        }
505
        else {
506
          minValue = Math.min(minValue, fieldValue);
507
        }
508
      }
509
      
510
      stmt.close();
511
    } 
512
    catch(SQLException ex) {
513
      System.out.println("SQLException: " + ex.getMessage());
514
    }
515

    
516
    return minValue;
517
  }
518
  
519
  
520
  /**
521
   * For every Harvest site schedule in the database, harvest the
522
   * documents for that site if they are due to be harvested.
523
   * 
524
   * @throws SAXException
525
   * @throws IOException
526
   * @throws ParserConfigurationException
527
   */
528
  private void harvest() {
529
    HarvestSiteSchedule harvestSiteSchedule;
530

    
531
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
532
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
533
      
534
      if (Harvester.schemaLocation != null) {
535
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
536
      }
537
      
538
      harvestSiteSchedule.harvestDocumentList();
539
    }
540
  }
541
  
542
  
543
  /**
544
   * Initializes the detailLogID and harvestLogID values to their current
545
   * maximums + 1.
546
   */
547
  public void initLogIDs() {
548
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
549
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
550
  }
551
  
552

    
553
  /**
554
   * Prints the header of the harvest report.
555
   * 
556
   * @param out            the PrintStream object to print to
557
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
558
   *                       have a value of 0 if no particular site is involved,
559
   *                       which indicates that the report is being prepared
560
   *                       for the Harvester Administrator rather than for a
561
   *                       particular Site Contact.
562
   */
563
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
564
    HarvestLog harvestLog;
565
    int logSiteScheduleID;
566
    int nErrors = 0;
567
    String phrase;
568
    
569
    for (int i = 0; i < harvestLogList.size(); i++) {
570
      harvestLog = (HarvestLog) harvestLogList.get(i);
571
      logSiteScheduleID = harvestLog.getSiteScheduleID();
572
      
573
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
574
        if (harvestLog.isErrorEntry()) {
575
          nErrors++;
576
        }
577
      }      
578
    }
579

    
580
    out.println(marker);
581
    out.println(filler);
582
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
583
    out.println(filler);
584

    
585
    if (nErrors > 0) {
586
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
587
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
588
      out.println("* Please see the log entries below for additonal details.");
589
    }
590
    else {
591
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
592
    }
593
    
594
    out.println(filler);
595
    out.println(marker);
596
  }
597
    
598

    
599
  /**
600
   * Prints harvest log entries for this harvest run. Entries may be filtered
601
   * for a particular site, or all entries may be printed.
602
   * 
603
   * @param out            the PrintStream object to write to
604
   * @param maxCodeLevel   the maximum code level that should be printed,
605
   *                       e.g. "warning". Any log entries higher than this
606
   *                       level will not be printed.
607
   * @param siteScheduleID if greater than 0, indicates that the log
608
   *                       entry should only be printed for a particular site
609
   *                       as identified by its siteScheduleID. if 0, then
610
   *                       print output for all sites.
611
   */
612
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
613
                      ) {
614
    HarvestLog harvestLog;
615
    int logSiteScheduleID;
616
    int nErrors = 0;
617
    String phrase;
618
    
619
    out.println("");
620
    out.println(marker);
621
    out.println(filler);
622
    out.println("*                       LOG ENTRIES");
623
    out.println(filler);
624
    out.println(marker);
625

    
626
    for (int i = 0; i < harvestLogList.size(); i++) {
627
      harvestLog = (HarvestLog) harvestLogList.get(i);
628
      logSiteScheduleID = harvestLog.getSiteScheduleID();
629
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
630
        harvestLog.printOutput(out, maxCodeLevel);
631
      }
632
    }
633
  }
634
    
635

    
636
  /**
637
   * Prints the site schedule data for a given site.
638
   * 
639
   * @param out              the PrintStream to write to
640
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
641
   */
642
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
643
    HarvestSiteSchedule harvestSiteSchedule;
644

    
645
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
646
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
647
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
648
        harvestSiteSchedule.printOutput(out);
649
      }
650
    }
651
  }
652
  
653

    
654
  /**
655
   * Prunes old records from the HARVEST_LOG table. Records are removed if
656
   * their HARVEST_DATE is older than a given number of days, as stored in the
657
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
658
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
659
   */
660
  private void pruneHarvestLog() {
661
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
662
    Date dateLastLog;                    // Prune everything prior to this date
663
    String deleteString;
664
    String deleteStringDetailLog;
665
    long delta;
666
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
667
    int recordsDeleted;
668
    int recordsDeletedDetail = 0;
669
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
670
    String dateString;
671
    ResultSet rs;
672
    String selectString;
673
    Statement stmt;
674
    long timeLastLog = 0;
675
    SQLWarning warn;
676
     
677
    delta = logPeriod * millisecondsPerDay;
678
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
679
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
680
    deleteStringDetailLog = 
681
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
682
    timeLastLog = currentTime - delta;
683
    dateLastLog = new Date(timeLastLog);
684
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
685
    deleteString += dateString;
686
    selectString += dateString;
687

    
688
    try {
689
      System.out.println(
690
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
691

    
692
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
693
       * table.
694
       */
695
      stmt = conn.createStatement();                            
696
      rs = stmt.executeQuery(selectString);
697
      warn = rs.getWarnings();
698

    
699
      if (warn != null) {
700
        System.out.println("\n---Warning---\n");
701

    
702
        while (warn != null) {
703
          System.out.println("Message: " + warn.getMessage());
704
          System.out.println("SQLState: " + warn.getSQLState());
705
          System.out.print("Vendor error code: ");
706
          System.out.println(warn.getErrorCode());
707
          System.out.println("");
708
          warn = warn.getNextWarning();
709
        }
710
      } 
711

    
712
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
713
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
714
       * be pruned first because its records have a child relationship to those
715
       * in HARVEST_LOG.
716
       */
717
      while (rs.next()) {
718
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
719
        stmt = conn.createStatement();                            
720
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
721
                                            harvestLogID);
722
        recordsDeletedDetail += recordsDeleted;
723
        stmt.close();
724
      }
725
 
726
      /* Now prune entries from the HARVEST_LOG table using a single update.
727
       */
728
      stmt = conn.createStatement();                            
729
      recordsDeleted = stmt.executeUpdate(deleteString);
730
      stmt.close();
731

    
732
      System.out.println("  " + recordsDeletedDetail + 
733
                         " records deleted from HARVEST_DETAIL_LOG");
734
      System.out.println("  " + recordsDeleted + 
735
                         " records deleted from HARVEST_LOG");
736
    }
737
    catch (SQLException e) {
738
      System.out.println("SQLException: " + e.getMessage());
739
    }
740
  }
741
    
742

    
743
  /**
744
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
745
   * a HarvestSiteSchedule object for each row in the table.
746
   */
747
  private void readHarvestSiteSchedule() {
748
    HarvestSiteSchedule harvestSiteSchedule;
749
    ResultSet rs;
750
    SQLWarning warn;
751
    Statement stmt;
752

    
753
    String contactEmail;
754
    String dateLastHarvest;
755
    String dateNextHarvest;
756
    String documentListURL;
757
    String ldapDN;
758
    String ldapPwd;
759
    int siteScheduleID;
760
    String unit;
761
    int updateFrequency;
762
        
763
    try {
764
      // Read the HARVEST_SITE_SCHEDULE table
765
      stmt = conn.createStatement();
766
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
767
      warn = rs.getWarnings();
768

    
769
      if (warn != null) {
770
        System.out.println("\n---Warning---\n");
771

    
772
        while (warn != null) {
773
          System.out.println("Message: " + warn.getMessage());
774
          System.out.println("SQLState: " + warn.getSQLState());
775
          System.out.print("Vendor error code: ");
776
          System.out.println(warn.getErrorCode());
777
          System.out.println("");
778
          warn = warn.getNextWarning();
779
        }
780
      }
781
     
782
      while (rs.next()) {
783
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
784
        documentListURL = rs.getString("DOCUMENTLISTURL");
785
        ldapDN = rs.getString("LDAPDN");
786
        ldapPwd = rs.getString("LDAPPWD");
787
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
788
        dateLastHarvest = rs.getString("DATELASTHARVEST");
789
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
790
        unit = rs.getString("UNIT");
791
        contactEmail = rs.getString("CONTACT_EMAIL");
792
        
793
        warn = rs.getWarnings();
794

    
795
        if (warn != null) {
796
          System.out.println("\n---Warning---\n");
797
      
798
          while (warn != null) {
799
            System.out.println("Message: " + warn.getMessage());
800
            System.out.println("SQLState: " + warn.getSQLState());
801
            System.out.print("Vendor error code: ");
802
            System.out.println(warn.getErrorCode());
803
            System.out.println("");
804
            warn = warn.getNextWarning();
805
          }
806
        }
807
      
808
        harvestSiteSchedule = new HarvestSiteSchedule(this,
809
                                                      siteScheduleID,
810
                                                      documentListURL,
811
                                                      ldapDN,
812
                                                      ldapPwd,
813
                                                      dateNextHarvest,
814
                                                      dateLastHarvest,
815
                                                      updateFrequency,
816
                                                      unit,
817
                                                      contactEmail
818
                                                     );
819
        harvestSiteScheduleList.add(harvestSiteSchedule);
820
      }
821
      
822
      rs.close();
823
      stmt.close();
824
    }
825
    catch (SQLException e) {
826
      System.out.println("Database access failed " + e);
827
      System.exit(1);
828
    }
829
    
830
  }
831
    
832

    
833
  /**
834
   * Sends a report to the Harvester Administrator. The report prints each log
835
   * entry pertaining to this harvest run.
836
   *
837
   * @param maxCodeLevel  the maximum code level that should be printed,
838
   *                      e.g. "warning". Any log entries higher than this
839
   *                      level will not be printed.
840
   */
841
  void reportToAdministrator(String maxCodeLevel) {
842
    PrintStream body;
843
    String from = harvesterAdministrator;
844
    String[] fromArray;
845
    MailMessage msg;
846
    int siteScheduleID = 0;
847
    String subject = "Report from Metacat Harvester: " + timestamp;
848
    String to = harvesterAdministrator;
849
    
850
    if (!to.equals("")) {
851
      System.out.println("Sending report to Harvester Administrator at address "
852
                         + harvesterAdministrator);
853
      
854
      try {
855
        msg = new MailMessage(smtpServer);
856

    
857
        if (from.indexOf(',') > 0) {
858
          fromArray = from.split(",");
859
          
860
          for (int i = 0; i < fromArray.length; i++) {
861
            if (i == 0) {
862
              msg.from(fromArray[i]);
863
            }
864
            
865
            msg.to(fromArray[i]);            
866
          }
867
        }
868
        else if (from.indexOf(';') > 0) {
869
          fromArray = from.split(";");
870

    
871
          for (int i = 0; i < fromArray.length; i++) {
872
            if (i == 0) {
873
              msg.from(fromArray[i]);
874
            }
875
            
876
            msg.to(fromArray[i]);            
877
          }
878
        }
879
        else {
880
          msg.from(from);
881
          msg.to(to);
882
        }
883
        
884
        msg.setSubject(subject);
885
        body = msg.getPrintStream();
886
        printHarvestHeader(body, siteScheduleID);
887
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
888
        msg.sendAndClose();
889
      }
890
      catch (IOException e) {
891
        System.out.println("There was a problem sending email to " + to);
892
        System.out.println("IOException: " + e.getMessage());
893
      }
894
    }
895
  }
896
  
897

    
898
  /**
899
   * Sets the harvest start time for this harvest run.
900
   * 
901
   * @param date
902
   */
903
  public void setHarvestStartTime(Date date) {
904
    harvestStartTime = date;
905
  }
906
    
907

    
908
  /**
909
   * Shuts down Harvester. Performs cleanup operations such as logging out
910
   * of Metacat and disconnecting from the database.
911
   */
912
  private void shutdown() {
913
    String maxCodeLevel = "debug";  // Print all log entries from level 1
914
                                    // ("error") to level 5 ("debug")
915
    int siteScheduleID = 0;
916

    
917
    // Log shutdown operation
918
    System.out.println("Shutting Down Harvester");
919
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
920
    pruneHarvestLog();
921
    closeConnection();
922
    // Print log to standard output and then email the Harvester administrator
923
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
924
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
925
  }
926
    
927

    
928
  /**
929
   * Initializes Harvester at startup. Connects to the database and to Metacat.
930
   * 
931
   * @param nHarvests        the nth harvest
932
   * @param maxHarvests      the maximum number of harvests that this process
933
   *                         can run
934
   */
935
  private void startup(int nHarvests, int maxHarvests) {
936
    Boolean ctm;
937
    String httpserver;
938
    Integer lp;
939
    String metacatURL;
940
    Date now = new Date();
941
    String servletPath;
942
    
943
    timestamp = now.toString();
944
    System.out.println(Harvester.marker);
945
    System.out.print(timestamp + ": Starting Next Harvest");
946
    if (maxHarvests > 0) {
947
      System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
948
    }
949
    System.out.print("\n");
950
    ctm = Boolean.valueOf(options.getOption("connectToMetacat"));
951
    connectToMetacat = ctm.booleanValue();
952
    harvesterAdministrator = options.getOption("harvesterAdministrator");
953
    smtpServer = options.getOption("smtpServer");
954

    
955
    try {
956
      lp = Integer.valueOf(options.getOption("logPeriod"));
957
      logPeriod = lp.intValue();
958
    }
959
    catch (NumberFormatException e) {
960
      System.err.println("NumberFormatException: Error parsing logPeriod " +
961
                         logPeriod + e.getMessage());
962
      System.err.println("Defaulting to logPeriod of 90 days");
963
      logPeriod = 90;
964
    }
965

    
966
    conn = getConnection();
967
    initLogIDs();
968
    setHarvestStartTime(now);
969
    // Log startup operation
970
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
971
      
972
    if (connectToMetacat()) {      
973
      try {
974
        httpserver = options.getOption("httpserver");
975
        servletPath = options.getOption("servletpath");
976
        metacatURL = httpserver + servletPath;
977
        System.out.println("Connecting to Metacat: " + metacatURL);
978
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
979
      } 
980
      catch (MetacatInaccessibleException e) {
981
        System.out.println("Metacat connection failed." + e.getMessage());
982
      } 
983
      catch (Exception e) {
984
        System.out.println("Metacat connection failed." + e.getMessage());
985
      }
986
    }
987
  }
988

    
989
}
(6-6/11)