Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2005-01-13 16:49:36 -0800 (Thu, 13 Jan 2005) $'
8
 * '$Revision: 2367 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import edu.ucsb.nceas.utilities.Options;
29
import java.io.File;
30
import java.io.FileInputStream;
31
import java.io.IOException;
32
import java.io.PrintStream;
33
import java.sql.Connection;
34
import java.sql.DriverManager;
35
import java.sql.ResultSet;
36
import java.sql.SQLException;
37
import java.sql.SQLWarning;
38
import java.sql.Statement;
39
import java.util.ArrayList;
40
import java.text.SimpleDateFormat;
41
import java.util.Date;
42

    
43
import edu.ucsb.nceas.metacat.client.Metacat;
44
import edu.ucsb.nceas.metacat.client.MetacatFactory;
45
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
46

    
47
/**
48
 * Harvester is the main class for the Harvester application. The main
49
 * method creates a single Harvester object which drives the application.
50
 * 
51
 * @author    costa
52
 * 
53
 */
54
public class Harvester {
55

    
56
  /*
57
   * Class fields
58
   */
59
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
60
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
61
  private static final String CONFIG_NAME = "metacat.properties";
62
  public static final String filler = "*";
63
  public static final String marker =
64
"*****************************************************************************";
65
  public static Options options = null;
66
   
67

    
68
  /* 
69
   * Class methods
70
   */
71
   
72

    
73
  /**
74
   * Constructor. Creates a new instance of Harvester.
75
   */
76
  public Harvester() {
77
  }
78
    
79

    
80
  /**
81
   * Loads Harvester options from a configuration file.
82
   */
83
  public static void loadOptions(boolean test) {
84
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
85
    File propertyFile = new File(configDir, CONFIG_NAME);
86

    
87
    try {
88
      options = Options.initialize(propertyFile);
89
    } 
90
    catch (IOException e) {
91
      System.out.println("Error in loading options: " + e.getMessage());
92
    }
93
  }
94
  
95
  
96
  /**
97
    * Harvester main method.
98
    * 
99
    * @param args        the command line arguments
100
    * @throws SAXException
101
    * @throws IOException
102
    * @throws ParserConfigurationException
103
    */
104
  public static void main(String[] args) {
105
    Integer delayDefault = new Integer(0); // Default number of hours delay
106
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
107
    Integer d;                            // Used for determining delay
108
    long delta;                           // endTime - startTime
109
    long endTime;                         // time that a harvest completes
110
    Harvester harvester;                  // object for a single harvest run
111
    Integer maxHarvestsDefault = new Integer(30);    // Default max harvests
112
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
113
    Integer mh;                              // used in determining max harvests
114
    int nHarvests = 0;                      // counts the number of harvest runs
115
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
116
    Integer periodDefault = new Integer(24); // Default hours between harvests
117
    int period = periodDefault.intValue();   // Hours between harvests
118
    Integer p;                               // Used in determining the period
119
    long startTime;                          // time that a harvest run starts
120
    boolean test = false;                    // Passed to loadOption()
121

    
122
    System.out.println(marker);
123
    System.out.println("Starting Harvester");
124
    Harvester.loadOptions(test);
125

    
126
    // Parse the delay property. Use default if necessary.    
127
    try {
128
      d = Integer.valueOf(options.getOption("delay"));
129
      delay = d.intValue();
130
    }
131
    catch (NumberFormatException e) {
132
      System.out.println("NumberFormatException: Error parsing delay: " +
133
                         e.getMessage());
134
      System.out.println("Defaulting to delay=" + delayDefault);
135
      delay = delayDefault.intValue();
136
    }
137

    
138
    // Parse the maxHarvests property. Use default if necessary.    
139
    try {
140
      mh = Integer.valueOf(options.getOption("maxHarvests"));
141
      maxHarvests = mh.intValue();
142
    }
143
    catch (NumberFormatException e) {
144
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
145
                         e.getMessage());
146
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
147
      maxHarvests = maxHarvestsDefault.intValue();
148
    }
149

    
150
    // Parse the period property. Use default if necessary.    
151
    try {
152
      p = Integer.valueOf(options.getOption("period"));
153
      period = p.intValue();
154
    }
155
    catch (NumberFormatException e) {
156
      System.out.println("NumberFormatException: Error parsing period: " +
157
                         e.getMessage());
158
      System.out.println("Defaulting to period=" + periodDefault);
159
      period = periodDefault.intValue();
160
    }
161
    
162
    // Sleep for delay number of hours prior to starting first harvest
163
    if (delay > 0) {
164
      try {
165
        System.out.print("First harvest will begin in " + delay);
166
        if (delay == 1) {
167
          System.out.println(" hour.");
168
        }
169
        else {
170
          System.out.println(" hours.");
171
        }
172
        Thread.sleep(delay * oneHour);
173
      }
174
      catch (InterruptedException e) {
175
          System.err.println("InterruptedException: " + e.getMessage());
176
          System.exit(1);
177
      }
178
    }
179

    
180
    // Repeat a new harvest once every period number of hours, until we reach
181
    // the maximum number of harvests. Subtract delta from the time period so 
182
    // that each harvest will start at a fixed interval.
183
    //
184
    while (nHarvests < maxHarvests) {
185
      nHarvests++;
186
      startTime = System.currentTimeMillis();
187
      harvester = new Harvester();                // New object for this harvest
188
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
189
      harvester.readHarvestSiteSchedule();        // Read the database table
190
      harvester.harvest();                        // Harvest the documents
191
      harvester.shutdown();                       // Shut down Harvester
192
      endTime = System.currentTimeMillis();
193
      delta = endTime - startTime;
194

    
195
      if (nHarvests < maxHarvests) {
196
        try {
197
          System.out.println("Next harvest will begin in " + 
198
                             period + " hours.");
199
          Thread.sleep((period * oneHour) - delta);
200
        }
201
        catch (InterruptedException e) {
202
          System.err.println("InterruptedException: " + e.getMessage());
203
          System.exit(1);
204
        }
205
      }
206
    }
207
  }
208

    
209

    
210
  /*
211
   * Object fields
212
   */
213

    
214
  /** Database connection */
215
  private Connection conn = null;
216
  
217
  /** Used during development to determine whether to connect to metacat 
218
   *  Sometimes it's useful to test parts of the code without actually
219
   *  connecting to Metacat.
220
   */
221
  private boolean connectToMetacat;
222

    
223
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
224
  private int detailLogID;
225
  
226
  /** Email address of the Harvester Administrator */
227
  String harvesterAdministrator;
228
  
229
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
230
  private int harvestLogID;
231
  
232
  /** End time of this harvest session */
233
  private Date harvestEndTime;
234
  
235
  /** List of HarvestLog objects. Stores log entries for report generation. */
236
  private ArrayList harvestLogList = new ArrayList();
237
  
238
  /** List of HarvestSiteSchedule objects */
239
  private ArrayList harvestSiteScheduleList = new ArrayList();
240
  
241
  /** Start time of this harvest session */
242
  private Date harvestStartTime;
243
  
244
  /** Number of days to save log records. Any that are older are purged. */
245
  int logPeriod;
246
  
247
  /** Metacat client object */
248
  Metacat metacat;
249
  
250
  /** SMTP server for sending mail messages */
251
  String smtpServer;
252
  
253
  /** The timestamp for this harvest run. Used for output only. */
254
  String timestamp;
255
  
256

    
257
  /*
258
   * Object methods
259
   */
260
   
261
  /**
262
   * Creates a new HarvestLog object and adds it to the harvestLogList.
263
   * 
264
   * @param  status          the status of the harvest operation
265
   * @param  message         the message text of the harvest operation
266
   * @param  harvestOperationCode  the harvest operation code
267
   * @param  siteScheduleID  the siteScheduleID for which this operation was
268
   *                         performed. 0 indicates that the operation did not
269
   *                         involve a particular harvest site.
270
   * @param  harvestDocument the associated HarvestDocument object. May be null.
271
   * @param  errorMessage    additional error message pertaining to document
272
   *                         error.
273
   */
274
  void addLogEntry(int    status,
275
                   String message,
276
                   String harvestOperationCode,
277
                   int    siteScheduleID,
278
                   HarvestDocument harvestDocument,
279
                   String errorMessage
280
                  ) {
281
    HarvestLog harvestLog;
282
    int harvestLogID = getHarvestLogID();
283
    int detailLogID;
284

    
285
    /* If there is no associated harvest document, call the basic constructor;
286
     * else call the extended constructor.
287
     */
288
    if (harvestDocument == null) {    
289
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
290
                                  status, message, harvestOperationCode, 
291
                                  siteScheduleID);
292
    }
293
    else {
294
      detailLogID = getDetailLogID();
295
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
296
                                  harvestStartTime, status, message,
297
                                  harvestOperationCode, siteScheduleID,
298
                                  harvestDocument, errorMessage);
299
    }
300
    
301
    harvestLogList.add(harvestLog);
302
  }
303
  
304
  
305
  public void closeConnection() {
306
    try {
307
      // Close the database connection
308
      System.out.println("Closing the database connection.");
309
      conn.close();
310
    }
311
    catch (SQLException e) {
312
      System.out.println("Database access failed " + e);
313
    }    
314
  }
315

    
316

    
317
  /**
318
   * Determines whether Harvester should attempt to connect to Metacat.
319
   * Used during development and testing.
320
   * 
321
   * @return     true if Harvester should connect, otherwise false
322
   */
323
  boolean connectToMetacat () {
324
    return connectToMetacat;
325
  }
326
  
327

    
328
  /**
329
   * Normalizes text prior to insertion into the HARVEST_LOG or
330
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
331
   * character with the double quote character. This prevents SQL errors
332
   * involving words that contain single quotes. Also removes \n and \r
333
   * characters from the text.
334
   * 
335
   * @param text  the original string
336
   * @return      a string containing the normalized text
337
   */
338
  public String dequoteText(String text) {
339
    char c;
340
    StringBuffer stringBuffer = new StringBuffer();
341
    
342
    for (int i = 0; i < text.length(); i++) {
343
      c = text.charAt(i);
344
      switch (c) {
345
        case '\'':
346
          stringBuffer.append('\"');
347
          break;
348
        case '\r':
349
        case '\n':
350
          break;
351
        default:
352
          stringBuffer.append(c);
353
          break;
354
      }
355
    }
356
    
357
    return stringBuffer.toString();
358
  }
359
  
360
  /**
361
   * Returns a connection to the database. Opens the connection if a connection
362
   * has not already been made previously.
363
   * 
364
   * @return  conn  the database Connection object
365
   */
366
  public Connection getConnection() {
367
    String dbDriver = "";
368
		String defaultDB;
369
    String password;
370
    String user;
371
    SQLWarning warn;
372
    
373
    if (conn == null) {
374
      dbDriver = options.getOption("dbDriver");
375
      defaultDB = options.getOption("defaultDB");
376
      password = options.getOption("password");
377
      user = options.getOption("user");
378

    
379
      // Load the jdbc driver
380
      try {
381
        Class.forName(dbDriver);
382
      }
383
      catch (ClassNotFoundException e) {
384
        System.out.println("Can't load driver " + e);
385
        System.exit(1);
386
      } 
387

    
388
      // Make the database connection
389
      try {
390
        System.out.println("Getting connection to Harvester tables");
391
        conn = DriverManager.getConnection(defaultDB, user, password);
392

    
393
        // If a SQLWarning object is available, print its warning(s).
394
        // There may be multiple warnings chained.
395
        warn = conn.getWarnings();
396
      
397
        if (warn != null) {
398
          while (warn != null) {
399
            System.out.println("SQLState: " + warn.getSQLState());
400
            System.out.println("Message:  " + warn.getMessage());
401
            System.out.println("Vendor: " + warn.getErrorCode());
402
            System.out.println("");
403
            warn = warn.getNextWarning();
404
          }
405
        }
406
      }
407
      catch (SQLException e) {
408
        System.out.println("Database access failed " + e);
409
        System.exit(1);
410
      }
411
    }
412
    
413
    return conn;
414
  }
415

    
416

    
417
  /**
418
   * Gets the current value of the detailLogID for storage as a primary key in
419
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
420
   * 
421
   * @return  the current value of the detailLogID
422
   */
423
  public int getDetailLogID() {
424
    int currentValue = detailLogID;
425
    
426
    detailLogID++;
427
    return currentValue;
428
  }
429
  
430
  
431
  /**
432
   * Gets the current value of the harvestLogID for storage as a primary key in
433
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
434
   * 
435
   * @return  the current value of the detailLogID
436
   */
437
  public int getHarvestLogID() {
438
    int currentValue = harvestLogID;
439
    
440
    harvestLogID++;
441
    return currentValue;
442
  }
443
  
444

    
445
  /** 
446
   * Gets the maximum value of an integer field from a table.
447
   * 
448
   * @param tableName  the database table name
449
   * @param fieldName  the field name of the integer field in the table
450
   * @return  the maximum integer stored in the fieldName field of tableName
451
   */
452
  private int getMaxValue(String tableName, String fieldName) {
453
    int maxValue = 0;
454
    int fieldValue;
455
		String query = "SELECT " + fieldName + " FROM " + tableName;
456
		Statement stmt;
457
    
458
		try {
459
			stmt = conn.createStatement();							
460
			ResultSet rs = stmt.executeQuery(query);
461
	
462
			while (rs.next()) {
463
				fieldValue = rs.getInt(fieldName);
464
        maxValue = Math.max(maxValue, fieldValue);
465
			}
466
	
467
			stmt.close();	
468
		} 
469
    catch(SQLException ex) {
470
			System.out.println("SQLException: " + ex.getMessage());
471
		}
472
    
473
    return maxValue;
474
  }
475
  
476
  
477
  /** 
478
   * Gets the minimum value of an integer field from a table.
479
   * 
480
   * @param tableName  the database table name
481
   * @param fieldName  the field name of the integer field in the table
482
   * @return  the minimum integer stored in the fieldName field of tableName
483
   */
484
  private int getMinValue(String tableName, String fieldName) {
485
    int minValue = 0;
486
    int fieldValue;
487
		String query = "SELECT " + fieldName + " FROM " + tableName;
488
		Statement stmt;
489
    
490
		try {
491
			stmt = conn.createStatement();							
492
			ResultSet rs = stmt.executeQuery(query);
493
	
494
			while (rs.next()) {
495
				fieldValue = rs.getInt(fieldName);
496

    
497
        if (minValue == 0) {
498
          minValue = fieldValue;
499
        }
500
        else {
501
          minValue = Math.min(minValue, fieldValue);
502
        }
503
			}
504
	
505
			stmt.close();	
506
		} 
507
    catch(SQLException ex) {
508
			System.out.println("SQLException: " + ex.getMessage());
509
		}
510
    
511
    return minValue;
512
  }
513
  
514
  
515
  /**
516
   * For every Harvest site schedule in the database, harvest the
517
   * documents for that site if they are due to be harvested.
518
   * 
519
   * @throws SAXException
520
   * @throws IOException
521
   * @throws ParserConfigurationException
522
   */
523
  private void harvest() {
524
    HarvestSiteSchedule harvestSiteSchedule;
525

    
526
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
527
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
528
      harvestSiteSchedule.harvestDocumentList();
529
    }
530
  }
531
  
532
  
533
  /**
534
   * Initializes the detailLogID and harvestLogID values to their current
535
   * maximums + 1.
536
   */
537
  public void initLogIDs() {
538
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
539
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
540
  }
541
  
542

    
543
  /**
544
   * Prints the header of the harvest report.
545
   * 
546
   * @param out            the PrintStream object to print to
547
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
548
   *                       have a value of 0 if no particular site is involved,
549
   *                       which indicates that the report is being prepared
550
   *                       for the Harvester Administrator rather than for a
551
   *                       particular Site Contact.
552
   */
553
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
554
    HarvestLog harvestLog;
555
    int logSiteScheduleID;
556
    int nErrors = 0;
557
    String phrase;
558
    
559
    for (int i = 0; i < harvestLogList.size(); i++) {
560
      harvestLog = (HarvestLog) harvestLogList.get(i);
561
      logSiteScheduleID = harvestLog.getSiteScheduleID();
562
      
563
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
564
        if (harvestLog.isErrorEntry()) {
565
          nErrors++;
566
        }
567
      }      
568
    }
569

    
570
    out.println(marker);
571
    out.println(filler);
572
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
573
    out.println(filler);
574

    
575
    if (nErrors > 0) {
576
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
577
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
578
      out.println("* Please see the log entries below for additonal details.");
579
    }
580
    else {
581
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
582
    }
583
    
584
    out.println(filler);
585
    out.println(marker);
586
  }
587
    
588

    
589
  /**
590
   * Prints harvest log entries for this harvest run. Entries may be filtered
591
   * for a particular site, or all entries may be printed.
592
   * 
593
   * @param out            the PrintStream object to write to
594
   * @param maxCodeLevel   the maximum code level that should be printed,
595
   *                       e.g. "warning". Any log entries higher than this
596
   *                       level will not be printed.
597
   * @param siteScheduleID if greater than 0, indicates that the log
598
   *                       entry should only be printed for a particular site
599
   *                       as identified by its siteScheduleID. if 0, then
600
   *                       print output for all sites.
601
   */
602
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
603
                      ) {
604
    HarvestLog harvestLog;
605
    int logSiteScheduleID;
606
    int nErrors = 0;
607
    String phrase;
608
    
609
    out.println("");
610
    out.println(marker);
611
    out.println(filler);
612
    out.println("*                       LOG ENTRIES");
613
    out.println(filler);
614
    out.println(marker);
615

    
616
    for (int i = 0; i < harvestLogList.size(); i++) {
617
      harvestLog = (HarvestLog) harvestLogList.get(i);
618
      logSiteScheduleID = harvestLog.getSiteScheduleID();
619
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
620
        harvestLog.printOutput(out, maxCodeLevel);
621
      }
622
    }
623
  }
624
    
625

    
626
  /**
627
   * Prints the site schedule data for a given site.
628
   * 
629
   * @param out              the PrintStream to write to
630
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
631
   */
632
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
633
     HarvestSiteSchedule harvestSiteSchedule;
634

    
635
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
636
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
637
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
638
        harvestSiteSchedule.printOutput(out);
639
      }
640
    }
641
  }
642
  
643

    
644
  /**
645
   * Prunes old records from the HARVEST_DETAIL_LOG table. Records are
646
   * removed if the HARVEST_LOG_ID foreign key is less than the lowest
647
   * HARVEST_LOG_ID primary key in the HARVEST_LOG table.
648
   */
649
  private void pruneHarvestDetailLog() {
650
		String deleteString;
651
    int minHarvestLogID;
652
    int recordsDeleted;
653
		Statement stmt;
654
    
655
    minHarvestLogID = getMinValue("HARVEST_LOG", "HARVEST_LOG_ID");
656
    deleteString = "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID < " +
657
                   minHarvestLogID;
658

    
659
		try {
660
			System.out.print("Pruning log entries from HARVEST_DETAIL_LOG: ");
661
      System.out.println(deleteString);
662
			stmt = conn.createStatement();							
663
			recordsDeleted = stmt.executeUpdate(deleteString);
664
			System.out.println(recordsDeleted + " records deleted");
665
			stmt.close();
666
		}
667
    catch(SQLException e) {
668
			System.out.println("SQLException: " + e.getMessage());
669
		}
670
  }
671
    
672

    
673
  /**
674
   * Prunes old records from the HARVEST_LOG table. Records are removed if
675
   * their HARVEST_DATE is older than a given number of days, as stored in the
676
   * logPeriod object field.
677
   */
678
  private void pruneHarvestLog() {
679
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
680
    Date dateLastLog;                    // Prune everything prior to this date
681
		String deleteString;
682
    long delta;
683
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
684
    int recordsDeleted;
685
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
686
    String dateString;
687
		Statement stmt;
688
    long timeLastLog = 0;
689
    
690
    delta = logPeriod * millisecondsPerDay;
691
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
692
    timeLastLog = currentTime - delta;
693
    dateLastLog = new Date(timeLastLog);
694
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
695
    deleteString += dateString;
696

    
697
		try {
698
			System.out.print("Pruning log entries from HARVEST_LOG: ");
699
      System.out.println(deleteString);
700
			stmt = conn.createStatement();							
701
			recordsDeleted = stmt.executeUpdate(deleteString);
702
			System.out.println(recordsDeleted + " records deleted");
703
			stmt.close();
704
		}
705
    catch (SQLException e) {
706
			System.out.println("SQLException: " + e.getMessage());
707
		}
708
  }
709
    
710

    
711
  /**
712
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
713
   * a HarvestSiteSchedule object for each row in the table.
714
   */
715
  private void readHarvestSiteSchedule() {
716
    HarvestSiteSchedule harvestSiteSchedule;
717
    ResultSet rs;
718
    SQLWarning warn;
719
    Statement stmt;
720

    
721
    String contactEmail;
722
    String dateLastHarvest;
723
    String dateNextHarvest;
724
    String documentListURL;
725
    String ldapDN;
726
    String ldapPwd;
727
    int siteScheduleID;
728
    String unit;
729
    int updateFrequency;
730
        
731
    try {
732
      // Read the HARVEST_SITE_SCHEDULE table
733
      stmt = conn.createStatement();
734
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
735
      warn = rs.getWarnings();
736

    
737
      if (warn != null) {
738
        System.out.println("\n---Warning---\n");
739

    
740
        while (warn != null) {
741
          System.out.println("Message: " + warn.getMessage());
742
          System.out.println("SQLState: " + warn.getSQLState());
743
          System.out.print("Vendor error code: ");
744
          System.out.println(warn.getErrorCode());
745
          System.out.println("");
746
          warn = warn.getNextWarning();
747
        }
748
      }
749
     
750
      while (rs.next()) {
751
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
752
        documentListURL = rs.getString("DOCUMENTLISTURL");
753
        ldapDN = rs.getString("LDAPDN");
754
        ldapPwd = rs.getString("LDAPPWD");
755
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
756
        dateLastHarvest = rs.getString("DATELASTHARVEST");
757
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
758
        unit = rs.getString("UNIT");
759
        contactEmail = rs.getString("CONTACT_EMAIL");
760
        
761
        warn = rs.getWarnings();
762

    
763
        if (warn != null) {
764
          System.out.println("\n---Warning---\n");
765
      
766
          while (warn != null) {
767
            System.out.println("Message: " + warn.getMessage());
768
            System.out.println("SQLState: " + warn.getSQLState());
769
            System.out.print("Vendor error code: ");
770
            System.out.println(warn.getErrorCode());
771
            System.out.println("");
772
            warn = warn.getNextWarning();
773
          }
774
        }
775
      
776
        harvestSiteSchedule = new HarvestSiteSchedule(this,
777
                                                      siteScheduleID,
778
                                                      documentListURL,
779
                                                      ldapDN,
780
                                                      ldapPwd,
781
                                                      dateNextHarvest,
782
                                                      dateLastHarvest,
783
                                                      updateFrequency,
784
                                                      unit,
785
                                                      contactEmail
786
                                                     );
787
        harvestSiteScheduleList.add(harvestSiteSchedule);
788
      }
789
      
790
      rs.close();
791
      stmt.close();
792
    }
793
    catch (SQLException e) {
794
      System.out.println("Database access failed " + e);
795
      System.exit(1);
796
    }
797
    
798
  }
799
    
800

    
801
  /**
802
   * Sends a report to the Harvester Administrator. The report prints each log
803
   * entry pertaining to this harvest run.
804
   *
805
   * @param maxCodeLevel  the maximum code level that should be printed,
806
   *                      e.g. "warning". Any log entries higher than this
807
   *                      level will not be printed.
808
   */
809
  void reportToAdministrator(String maxCodeLevel) {
810
    PrintStream body;
811
    String from = harvesterAdministrator;
812
    String[] fromArray;
813
    MailMessage msg;
814
    int siteScheduleID = 0;
815
    String subject = "Report from Metacat Harvester: " + timestamp;
816
    String to = harvesterAdministrator;
817
    
818
    if (!to.equals("")) {
819
      System.out.println("Sending report to Harvester Administrator at address "
820
                         + harvesterAdministrator);
821
      
822
      try {
823
        msg = new MailMessage(smtpServer);
824

    
825
        if (from.indexOf(',') > 0) {
826
          fromArray = from.split(",");
827
          
828
          for (int i = 0; i < fromArray.length; i++) {
829
            if (i == 0) {
830
              msg.from(fromArray[i]);
831
            }
832
            
833
            msg.to(fromArray[i]);            
834
          }
835
        }
836
        else if (from.indexOf(';') > 0) {
837
          fromArray = from.split(";");
838

    
839
          for (int i = 0; i < fromArray.length; i++) {
840
            if (i == 0) {
841
              msg.from(fromArray[i]);
842
            }
843
            
844
            msg.to(fromArray[i]);            
845
          }
846
        }
847
        else {
848
          msg.from(from);
849
          msg.to(to);
850
        }
851
        
852
        msg.setSubject(subject);
853
        body = msg.getPrintStream();
854
        printHarvestHeader(body, siteScheduleID);
855
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
856
        msg.sendAndClose();
857
      }
858
      catch (IOException e) {
859
        System.out.println("There was a problem sending email to " + to);
860
        System.out.println("IOException: " + e.getMessage());
861
      }
862
    }
863
  }
864
  
865

    
866
  /**
867
   * Sets the harvest start time for this harvest run.
868
   * 
869
   * @param date
870
   */
871
  public void setHarvestStartTime(Date date) {
872
    harvestStartTime = date;
873
  }
874
    
875

    
876
  /**
877
   * Shuts down Harvester. Performs cleanup operations such as logging out
878
   * of Metacat and disconnecting from the database.
879
   */
880
  private void shutdown() {
881
    String maxCodeLevel = "debug";  // Print all log entries from level 1
882
                                    // ("error") to level 5 ("debug")
883
    int siteScheduleID = 0;
884

    
885
    // Log shutdown operation
886
    System.out.println("Shutting Down Harvester");
887
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
888
    pruneHarvestLog();
889
    pruneHarvestDetailLog();
890
    closeConnection();
891
    // Print log to standard output and then email the Harvester administrator
892
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
893
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
894
  }
895
    
896

    
897
  /**
898
   * Initializes Harvester at startup. Connects to the database and to Metacat.
899
   * 
900
   * @param nHarvests        the nth harvest
901
   * @param maxHarvests      the maximum number of harvests that this process
902
   *                         can run
903
   */
904
  private void startup(int nHarvests, int maxHarvests) {
905
    Boolean ctm;
906
    String httpserver;
907
    Integer lp;
908
    String metacatURL;
909
    Date now = new Date();
910
    String servletPath;
911
    
912
    timestamp = now.toString();
913
    System.out.println(Harvester.marker);
914
    System.out.println(timestamp + ": Starting Next Harvest (" +
915
                       nHarvests + "/" + maxHarvests + ")");
916
    ctm = Boolean.valueOf(options.getOption("connectToMetacat"));
917
    connectToMetacat = ctm.booleanValue();
918
    harvesterAdministrator = options.getOption("harvesterAdministrator");
919
    smtpServer = options.getOption("smtpServer");
920

    
921
    try {
922
      lp = Integer.valueOf(options.getOption("logPeriod"));
923
      logPeriod = lp.intValue();
924
    }
925
    catch (NumberFormatException e) {
926
      System.err.println("NumberFormatException: Error parsing logPeriod " +
927
                         logPeriod + e.getMessage());
928
      System.err.println("Defaulting to logPeriod of 90 days");
929
      logPeriod = 90;
930
    }
931

    
932
    conn = getConnection();
933
    initLogIDs();
934
    setHarvestStartTime(now);
935
    // Log startup operation
936
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
937
      
938
    if (connectToMetacat()) {      
939
      try {
940
        httpserver = options.getOption("httpserver");
941
        servletPath = options.getOption("servletpath");
942
        metacatURL = httpserver + servletPath;
943
        System.out.println("Connecting to Metacat: " + metacatURL);
944
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
945
      } 
946
      catch (MetacatInaccessibleException e) {
947
        System.out.println("Metacat connection failed." + e.getMessage());
948
      } 
949
      catch (Exception e) {
950
        System.out.println("Metacat connection failed." + e.getMessage());
951
      }
952
    }
953
  }
954

    
955
}
(6-6/10)