Project

General

Profile

1 2094 jones
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the
4
 *                  Regents of the University of California
5 2022 costa
 *
6 2094 jones
 *   '$Author$'
7
 *     '$Date$'
8
 * '$Revision$'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 2022 costa
 */
24
25
package edu.ucsb.nceas.metacat.harvesterClient;
26
27 2086 costa
import com.oreilly.servlet.MailMessage;
28 2031 costa
import java.io.File;
29
import java.io.FileInputStream;
30
import java.io.IOException;
31 2086 costa
import java.io.PrintStream;
32 2031 costa
import java.sql.Connection;
33
import java.sql.DriverManager;
34
import java.sql.ResultSet;
35
import java.sql.SQLException;
36
import java.sql.SQLWarning;
37
import java.sql.Statement;
38
import java.util.ArrayList;
39
import java.text.SimpleDateFormat;
40
import java.util.Date;
41
import java.util.Properties;
42 2022 costa
43 2031 costa
import edu.ucsb.nceas.metacat.client.Metacat;
44
import edu.ucsb.nceas.metacat.client.MetacatFactory;
45
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
46 2022 costa
47
/**
48
 * Harvester is the main class for the Harvester application. The main
49
 * method creates a single Harvester object which drives the application.
50
 *
51
 * @author    costa
52
 *
53
 */
54
public class Harvester {
55
56
  /*
57
   * Class fields
58
   */
59 2062 costa
  private static final String marker =
60
"*****************************************************************************";
61 2108 costa
62
  static Properties properties;
63 2022 costa
64
65
  /*
66
   * Class methods
67
   */
68
69 2031 costa
70 2022 costa
  /**
71
   * Constructor. Creates a new instance of Harvester.
72
   */
73
  public Harvester() {
74
  }
75
76 2031 costa
77 2022 costa
  /**
78 2062 costa
   * Loads Harvester properties from a configuration file.
79 2022 costa
   */
80 2108 costa
  private static void loadProperties() {
81 2062 costa
    File configfile = new File("../../lib/harvester", "harvester.properties");
82 2108 costa
    properties = new Properties();
83 2062 costa
84
    try {
85
      properties.load(new FileInputStream(configfile));
86
    }
87
    catch (IOException e) {
88
      System.out.println("IOException: " + e.getMessage());
89
      System.exit(1);
90
    }
91 2022 costa
  }
92
93 2062 costa
94 2022 costa
  /**
95
    * Harvester main method.
96
    *
97
    * @param args        the command line arguments
98
    * @throws SAXException
99
    * @throws IOException
100
    * @throws ParserConfigurationException
101
    */
102
  public static void main(String[] args) {
103 2062 costa
    Integer delayDefault = new Integer(0); // Default number of hours delay
104
    int delay = delayDefault.intValue();  // Delay in hours before first harvest
105
    Integer d;                            // Used for determining delay
106
    long delta;                           // endTime - startTime
107
    long endTime;                         // time that a harvest completes
108
    Harvester harvester;                  // object for a single harvest run
109
    Integer maxHarvestsDefault = new Integer(30);    // Default max harvests
110
    int maxHarvests = maxHarvestsDefault.intValue(); // Max number of harvests
111
    Integer mh;                              // used in determining max harvests
112
    int nHarvests = 0;                      // counts the number of harvest runs
113 2108 costa
    final long oneHour = (60 * 60 * 1000);   // milliseconds in one hour
114 2062 costa
    Integer periodDefault = new Integer(24); // Default hours between harvests
115
    int period = periodDefault.intValue();   // Hours between harvests
116
    Integer p;                               // Used in determining the period
117 2108 costa
    long startTime;                          // time that a harvest run starts
118 2062 costa
119
    System.out.println(marker);
120
    System.out.println("Starting Harvester");
121 2108 costa
    Harvester.loadProperties();
122
    //properties.list(System.out);
123 2062 costa
124
    // Parse the delay property. Use default if necessary.
125
    try {
126
      d = Integer.valueOf(properties.getProperty(
127
                                                 "delay",
128
                                                 delayDefault.toString()
129
                                                ));
130
      delay = d.intValue();
131
    }
132
    catch (NumberFormatException e) {
133
      System.out.println("NumberFormatException: Error parsing delay: " +
134
                         e.getMessage());
135
      System.out.println("Defaulting to delay=" + delayDefault);
136
      delay = delayDefault.intValue();
137
    }
138
139
    // Parse the maxHarvests property. Use default if necessary.
140
    try {
141
      mh = Integer.valueOf(properties.getProperty(
142
                                                  "maxHarvests",
143
                                                  maxHarvestsDefault.toString()
144
                                                 ));
145
      maxHarvests = mh.intValue();
146
    }
147
    catch (NumberFormatException e) {
148
      System.out.println("NumberFormatException: Error parsing maxHarvests: " +
149
                         e.getMessage());
150
      System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
151
      maxHarvests = maxHarvestsDefault.intValue();
152
    }
153
154
    // Parse the period property. Use default if necessary.
155
    try {
156
      p = Integer.valueOf(properties.getProperty("period",
157
                                                 periodDefault.toString()
158
                                                ));
159
      period = p.intValue();
160
    }
161
    catch (NumberFormatException e) {
162
      System.out.println("NumberFormatException: Error parsing period: " +
163
                         e.getMessage());
164
      System.out.println("Defaulting to period=" + periodDefault);
165
      period = periodDefault.intValue();
166
    }
167 2022 costa
168 2062 costa
    // Sleep for delay number of hours prior to starting first harvest
169
    if (delay > 0) {
170
      try {
171
        System.out.print("First harvest will begin in " + delay);
172
        if (delay == 1) {
173
          System.out.println(" hour.");
174
        }
175
        else {
176
          System.out.println(" hours.");
177
        }
178 2105 costa
        Thread.sleep(delay * oneHour);
179 2062 costa
      }
180
      catch (InterruptedException e) {
181
          System.err.println("InterruptedException: " + e.getMessage());
182
          System.exit(1);
183
      }
184
    }
185
186
    // Repeat a new harvest once every period number of hours, until we reach
187
    // the maximum number of harvests. Subtract delta from the time period so
188
    // that each harvest will start at a fixed interval.
189
    //
190
    while (nHarvests < maxHarvests) {
191
      nHarvests++;
192
      startTime = System.currentTimeMillis();
193
      harvester = new Harvester();                // New object for this harvest
194
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
195
      harvester.readHarvestSiteSchedule();        // Read the database table
196
      harvester.harvest();                        // Harvest the documents
197
      harvester.shutdown();                       // Shut down Harvester
198
      endTime = System.currentTimeMillis();
199
      delta = endTime - startTime;
200
201
      if (nHarvests < maxHarvests) {
202
        try {
203 2108 costa
          Thread.sleep((period * oneHour) - delta);
204 2062 costa
        }
205
        catch (InterruptedException e) {
206
          System.err.println("InterruptedException: " + e.getMessage());
207
          System.exit(1);
208
        }
209
      }
210
    }
211 2022 costa
  }
212
213
214
  /*
215
   * Object fields
216
   */
217
218 2031 costa
  /** Database connection */
219 2022 costa
  Connection conn;
220 2031 costa
221 2062 costa
  /** Used during development to determine whether to connect to metacat
222
   *  Sometimes it's useful to test parts of the code without actually
223
   *  connecting to Metacat.
224
   */
225 2031 costa
  private boolean connectToMetacat;
226
227
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
228
  private int detailLogID;
229
230 2061 costa
  /** Email address of the Harvester Administrator */
231 2105 costa
  String harvesterAdministrator;
232 2061 costa
233 2031 costa
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
234
  private int harvestLogID;
235
236
  /** End time of this harvest session */
237
  private Date harvestEndTime;
238
239
  /** List of HarvestLog objects. Stores log entries for report generation. */
240
  private ArrayList harvestLogList = new ArrayList();
241
242
  /** List of HarvestSiteSchedule objects */
243
  private ArrayList harvestSiteScheduleList = new ArrayList();
244
245
  /** Start time of this harvest session */
246
  private Date harvestStartTime;
247
248
  /** Number of days to save log records. Any that are older are purged. */
249
  int logPeriod;
250
251
  /** Metacat client object */
252 2022 costa
  Metacat metacat;
253 2031 costa
254 2086 costa
  /** SMTP server for sending mail messages */
255
  String smtpServer;
256
257 2108 costa
  /** The timestamp for this harvest run. Used for output only. */
258
  String timestamp;
259
260 2022 costa
261
  /*
262
   * Object methods
263
   */
264 2031 costa
265
  /**
266
   * Creates a new HarvestLog object and adds it to the harvestLogList.
267
   *
268
   * @param  status          the status of the harvest operation
269
   * @param  message         the message text of the harvest operation
270
   * @param  harvestOperationCode  the harvest operation code
271
   * @param  siteScheduleID  the siteScheduleID for which this operation was
272
   *                         performed. 0 indicates that the operation did not
273
   *                         involve a particular harvest site.
274
   * @param  harvestDocument the associated HarvestDocument object. May be null.
275
   * @param  errorMessage    additional error message pertaining to document
276
   *                         error.
277
   */
278
  void addLogEntry(int    status,
279
                   String message,
280
                   String harvestOperationCode,
281
                   int    siteScheduleID,
282
                   HarvestDocument harvestDocument,
283
                   String errorMessage
284
                  ) {
285
    HarvestLog harvestLog;
286 2022 costa
287 2031 costa
    /* If there is no associated harvest document, call the basic constructor;
288
     * else call the extended constructor.
289
     */
290
    if (harvestDocument == null) {
291
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
292
                                  harvestOperationCode, siteScheduleID);
293
    }
294
    else {
295
      harvestLog = new HarvestLog(this, harvestStartTime, status, message,
296
                                  harvestOperationCode, siteScheduleID,
297
                                  harvestDocument, errorMessage);
298
    }
299
300
    harvestLogList.add(harvestLog);
301
  }
302
303
304 2022 costa
  /**
305
   * Determines whether Harvester should attempt to connect to Metacat.
306
   * Used during development and testing.
307
   *
308
   * @return     true if Harvester should connect, otherwise false
309
   */
310
  boolean connectToMetacat () {
311
    return connectToMetacat;
312
  }
313 2036 costa
314
315
  /**
316
   * Normalizes text prior to insertion into the HARVEST_LOG or
317
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
318
   * character with the double quote character. This prevents SQL errors
319
   * involving words that contain single quotes. Also removes \n and \r
320
   * characters from the text.
321
   *
322
   * @param text  the original string
323
   * @return      a string containing the normalized text
324
   */
325
  String dequoteText(String text) {
326
    char c;
327
    StringBuffer stringBuffer = new StringBuffer();
328 2022 costa
329 2036 costa
    for (int i = 0; i < text.length(); i++) {
330
      c = text.charAt(i);
331
      switch (c) {
332
        case '\'':
333
          stringBuffer.append('\"');
334
          break;
335
        case '\r':
336
        case '\n':
337
          break;
338
        default:
339
          stringBuffer.append(c);
340
          break;
341
      }
342
    }
343
344
    return stringBuffer.toString();
345
  }
346 2022 costa
347 2036 costa
348 2022 costa
  /**
349 2031 costa
   * Gets the current value of the detailLogID for storage as a primary key in
350
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
351
   *
352
   * @return  the current value of the detailLogID
353
   */
354
  int getDetailLogID() {
355
    int currentValue = detailLogID;
356
357
    detailLogID++;
358
    return currentValue;
359
  }
360
361
362
  /**
363
   * Gets the current value of the harvestLogID for storage as a primary key in
364
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
365
   *
366
   * @return  the current value of the detailLogID
367
   */
368
  int getHarvestLogID() {
369
    int currentValue = harvestLogID;
370
371
    harvestLogID++;
372
    return currentValue;
373
  }
374
375
376
  /**
377
   * Gets the maximum value of an integer field from a table.
378
   *
379
   * @param tableName  the database table name
380
   * @param fieldName  the field name of the integer field in the table
381
   * @return  the maximum integer stored in the fieldName field of tableName
382
   */
383
  private int getMaxValue(String tableName, String fieldName) {
384
    int maxValue = 100;
385
    int fieldValue;
386
		String query = "SELECT " + fieldName + " FROM " + tableName;
387
		Statement stmt;
388
389
		try {
390
			stmt = conn.createStatement();
391
			ResultSet rs = stmt.executeQuery(query);
392
393
			while (rs.next()) {
394
				fieldValue = rs.getInt(fieldName);
395
        maxValue = Math.max(maxValue, fieldValue);
396
			}
397
398
			stmt.close();
399
		}
400
    catch(SQLException ex) {
401
			System.out.println("SQLException: " + ex.getMessage());
402
		}
403
404
    return maxValue;
405
  }
406
407
408
  /**
409
   * Gets the minimum value of an integer field from a table.
410
   *
411
   * @param tableName  the database table name
412
   * @param fieldName  the field name of the integer field in the table
413
   * @return  the minimum integer stored in the fieldName field of tableName
414
   */
415
  private int getMinValue(String tableName, String fieldName) {
416
    int minValue = 0;
417
    int fieldValue;
418
		String query = "SELECT " + fieldName + " FROM " + tableName;
419
		Statement stmt;
420
421
		try {
422
			stmt = conn.createStatement();
423
			ResultSet rs = stmt.executeQuery(query);
424
425
			while (rs.next()) {
426
				fieldValue = rs.getInt(fieldName);
427
428
        if (minValue == 0) {
429
          minValue = fieldValue;
430
        }
431
        else {
432
          minValue = Math.min(minValue, fieldValue);
433
        }
434
			}
435
436
			stmt.close();
437
		}
438
    catch(SQLException ex) {
439
			System.out.println("SQLException: " + ex.getMessage());
440
		}
441
442
    return minValue;
443
  }
444
445
446
  /**
447 2022 costa
   * For every Harvest site schedule in the database, harvest the
448
   * documents for that site if they are due to be harvested.
449
   *
450
   * @throws SAXException
451
   * @throws IOException
452
   * @throws ParserConfigurationException
453
   */
454
  private void harvest() {
455
    HarvestSiteSchedule harvestSiteSchedule;
456
457 2031 costa
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
458
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
459
      harvestSiteSchedule.harvestDocumentList();
460 2022 costa
    }
461
  }
462
463
464
  /**
465 2031 costa
   * Initializes the detailLogID and harvestLogID values to their current
466
   * maximums + 1.
467 2022 costa
   */
468 2031 costa
  private void initLogIDs() {
469
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
470
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
471
  }
472
473 2062 costa
474 2031 costa
  /**
475 2105 costa
   * Prints harvest log entries for this harvest run. Entries may be filtered
476
   * for a particular site, or all entries may be printed.
477 2086 costa
   *
478 2105 costa
   * @param out            the PrintStream object to write to
479
   * @param maxCodeLevel   the maximum code level that should be printed,
480
   *                       e.g. "warning". Any log entries higher than this
481
   *                       level will not be printed.
482
   * @param siteScheduleID if greater than 0, indicates that the log
483
   *                       entry should only be printed for a particular site
484
   *                       as identified by its siteScheduleID. if 0, then
485
   *                       print output for all sites.
486 2086 costa
   */
487 2105 costa
  void printHarvestLog(PrintStream out,
488
                       String maxCodeLevel,
489
                       int siteScheduleID
490
                      ) {
491 2086 costa
    HarvestLog harvestLog;
492 2105 costa
    int logSiteScheduleID;
493
    int nErrors = 0;
494
    String phrase;
495 2086 costa
496
    for (int i = 0; i < harvestLogList.size(); i++) {
497
      harvestLog = (HarvestLog) harvestLogList.get(i);
498 2105 costa
      logSiteScheduleID = harvestLog.getSiteScheduleID();
499
500
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
501
        if (harvestLog.isErrorEntry()) {
502
          nErrors++;
503
        }
504
      }
505 2086 costa
    }
506 2105 costa
507
    out.println(marker);
508
    out.println("*");
509 2108 costa
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
510 2105 costa
    out.println("*");
511
512
    if (nErrors > 0) {
513
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
514
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
515
      out.println("* Please see the log entries below for additonal details.");
516
    }
517
    else {
518
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
519
    }
520
521
    out.println("*");
522
    out.println(marker);
523
524
    for (int i = 0; i < harvestLogList.size(); i++) {
525
      harvestLog = (HarvestLog) harvestLogList.get(i);
526
      logSiteScheduleID = harvestLog.getSiteScheduleID();
527
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
528
        harvestLog.printOutput(out, maxCodeLevel);
529
      }
530
    }
531 2086 costa
  }
532
533
534
  /**
535 2062 costa
   * Prints the site schedule data for a given site.
536
   *
537 2086 costa
   * @param out              the PrintStream to write to
538 2062 costa
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
539 2031 costa
   */
540 2086 costa
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
541 2031 costa
     HarvestSiteSchedule harvestSiteSchedule;
542
543
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
544
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
545
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
546 2086 costa
        harvestSiteSchedule.printOutput(out);
547 2031 costa
      }
548
    }
549
  }
550
551
552
  /**
553
   * Prunes old records from the HARVEST_DETAIL_LOG table. Records are
554
   * removed if the HARVEST_LOG_ID foreign key is less than the lowest
555
   * HARVEST_LOG_ID primary key in the HARVEST_LOG table.
556
   */
557
  private void pruneHarvestDetailLog() {
558
		String deleteString;
559
    int minHarvestLogID;
560
    int recordsDeleted;
561
		Statement stmt;
562 2022 costa
563 2031 costa
    minHarvestLogID = getMinValue("HARVEST_LOG", "HARVEST_LOG_ID");
564
    deleteString = "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID < " +
565
                   minHarvestLogID;
566 2022 costa
567 2031 costa
		try {
568
			System.out.print("Pruning log entries from HARVEST_DETAIL_LOG: ");
569
			stmt = conn.createStatement();
570
			recordsDeleted = stmt.executeUpdate(deleteString);
571
			System.out.println(recordsDeleted + " records deleted");
572
			stmt.close();
573
		}
574
    catch(SQLException e) {
575
			System.out.println("SQLException: " + e.getMessage());
576
		}
577
  }
578
579
580 2022 costa
  /**
581 2031 costa
   * Prunes old records from the HARVEST_LOG table. Records are removed if
582
   * their HARVEST_DATE is older than a given number of days, as stored in the
583
   * logPeriod object field.
584
   */
585
  private void pruneHarvestLog() {
586
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
587
    Date dateLastLog;                    // Prune everything prior to this date
588
		String deleteString;
589
    long delta;
590
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
591
    int recordsDeleted;
592
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
593
    String dateString;
594
		Statement stmt;
595
    long timeLastLog = 0;
596
597
    delta = logPeriod * millisecondsPerDay;
598
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
599
    timeLastLog = currentTime - delta;
600
    dateLastLog = new Date(timeLastLog);
601
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
602
    deleteString += dateString;
603
604
		try {
605
			System.out.print("Pruning log entries from HARVEST_LOG: ");
606
			stmt = conn.createStatement();
607
			recordsDeleted = stmt.executeUpdate(deleteString);
608
			System.out.println(recordsDeleted + " records deleted");
609
			stmt.close();
610
		}
611
    catch (SQLException e) {
612
			System.out.println("SQLException: " + e.getMessage());
613
		}
614
  }
615
616
617
  /**
618 2022 costa
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
619
   * a HarvestSiteSchedule object for each row in the table.
620
   */
621
  private void readHarvestSiteSchedule() {
622
    HarvestSiteSchedule harvestSiteSchedule;
623
    ResultSet rs;
624
    SQLWarning warn;
625
    Statement stmt;
626
627
    String contactEmail;
628
    String dateLastHarvest;
629
    String dateNextHarvest;
630
    String documentListURL;
631
    String ldapDN;
632 2031 costa
    String ldapPwd;
633 2022 costa
    int siteScheduleID;
634
    String unit;
635
    int updateFrequency;
636
637
    try {
638
      // Read the HARVEST_SITE_SCHEDULE table
639
      stmt = conn.createStatement();
640
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
641
      warn = rs.getWarnings();
642
643
      if (warn != null) {
644
        System.out.println("\n---Warning---\n");
645
646
        while (warn != null) {
647
          System.out.println("Message: " + warn.getMessage());
648
          System.out.println("SQLState: " + warn.getSQLState());
649
          System.out.print("Vendor error code: ");
650
          System.out.println(warn.getErrorCode());
651
          System.out.println("");
652
          warn = warn.getNextWarning();
653
        }
654
      }
655
656
      while (rs.next()) {
657
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
658
        documentListURL = rs.getString("DOCUMENTLISTURL");
659
        ldapDN = rs.getString("LDAPDN");
660 2031 costa
        ldapPwd = rs.getString("LDAPPWD");
661 2022 costa
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
662
        dateLastHarvest = rs.getString("DATELASTHARVEST");
663
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
664
        unit = rs.getString("UNIT");
665
        contactEmail = rs.getString("CONTACT_EMAIL");
666
667
        warn = rs.getWarnings();
668
669
        if (warn != null) {
670
          System.out.println("\n---Warning---\n");
671
672
          while (warn != null) {
673
            System.out.println("Message: " + warn.getMessage());
674
            System.out.println("SQLState: " + warn.getSQLState());
675
            System.out.print("Vendor error code: ");
676
            System.out.println(warn.getErrorCode());
677
            System.out.println("");
678
            warn = warn.getNextWarning();
679
          }
680
        }
681
682 2031 costa
        harvestSiteSchedule = new HarvestSiteSchedule(this,
683 2022 costa
                                                      siteScheduleID,
684
                                                      documentListURL,
685
                                                      ldapDN,
686 2031 costa
                                                      ldapPwd,
687 2022 costa
                                                      dateNextHarvest,
688
                                                      dateLastHarvest,
689
                                                      updateFrequency,
690
                                                      unit,
691
                                                      contactEmail
692
                                                     );
693 2031 costa
        harvestSiteScheduleList.add(harvestSiteSchedule);
694 2022 costa
      }
695 2031 costa
696
      rs.close();
697
      stmt.close();
698
    }
699 2022 costa
    catch (SQLException e) {
700
      System.out.println("Database access failed " + e);
701
      System.exit(1);
702
    }
703
704
  }
705
706
707
  /**
708 2086 costa
   * Sends a report to the Harvester administrator. The report prints each log
709
   * entry pertaining to this harvest run.
710 2105 costa
   *
711
   * @param maxCodeLevel  the maximum code level that should be printed,
712
   *                      e.g. "warning". Any log entries higher than this
713
   *                      level will not be printed.
714 2022 costa
   */
715 2105 costa
  void reportToAdministrator(String maxCodeLevel) {
716 2086 costa
    PrintStream body;
717
    String from = harvesterAdministrator;
718
    MailMessage msg;
719 2105 costa
    int siteScheduleID = 0;
720 2108 costa
    String subject = "Report from Metacat Harvester: " + timestamp;
721 2086 costa
    String to = harvesterAdministrator;
722
723
    if (!to.equals("")) {
724
      System.out.println("Sending report to Harvester Administrator at address "
725
                         + harvesterAdministrator);
726
727
      try {
728
        msg = new MailMessage(smtpServer);
729
        msg.from(from);
730
        msg.to(to);
731
        msg.setSubject(subject);
732
        body = msg.getPrintStream();
733 2105 costa
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
734 2086 costa
        msg.sendAndClose();
735
      }
736
      catch (IOException e) {
737
        System.out.println("There was a problem sending email to " + to);
738
        System.out.println("IOException: " + e.getMessage());
739
      }
740 2022 costa
  }
741 2086 costa
}
742 2022 costa
743
744
  /**
745
   * Shuts down Harvester. Performs cleanup operations such as logging out
746
   * of Metacat and disconnecting from the database.
747
   */
748
  private void shutdown() {
749 2105 costa
    String maxCodeLevel = "debug";  // Print all log entries from level 1
750
                                    // ("error") to level 5 ("debug")
751
    int siteScheduleID = 0;
752
753 2022 costa
    // Log shutdown operation
754
    System.out.println("Shutting Down Harvester");
755 2031 costa
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
756
    pruneHarvestLog();
757
    pruneHarvestDetailLog();
758 2022 costa
759
    try {
760
      // Close the database connection
761
      conn.close();
762
    }
763
    catch (SQLException e) {
764
      System.out.println("Database access failed " + e);
765
    }
766 2031 costa
767 2105 costa
    // Print log to standard output and then email the Harvester administrator
768
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
769
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
770 2022 costa
  }
771
772
773
  /**
774
   * Initializes Harvester at startup. Connects to the database and to Metacat.
775 2062 costa
   *
776
   * @param nHarvests        the nth harvest
777
   * @param maxHarvests      the maximum number of harvests that this process
778
   *                         can run
779 2022 costa
   */
780 2062 costa
  private void startup(int nHarvests, int maxHarvests) {
781 2031 costa
    Boolean ctm;
782 2022 costa
    String dbDriver;
783 2031 costa
    Integer lp;
784 2022 costa
    String metacatURL;
785 2062 costa
    Date now;
786 2022 costa
    String password;
787 2062 costa
    Properties properties;
788 2031 costa
    //String response;
789 2022 costa
    String sessionId;
790
		String url;
791
    String user;
792
    String userName = System.getProperty("user.name");
793
    SQLWarning warn;
794
795
    // Log startup operation
796 2062 costa
    System.out.println(Harvester.marker);
797
    now = new Date();
798 2108 costa
    timestamp = now.toString();
799
    System.out.println(timestamp + ": Starting Next Harvest (" +
800 2062 costa
                       nHarvests + "/" + maxHarvests + ")");
801 2108 costa
    Harvester.loadProperties();
802
    properties = Harvester.properties;
803 2031 costa
    ctm = Boolean.valueOf(properties.getProperty("connectToMetacat", "true"));
804
    connectToMetacat = ctm.booleanValue();
805 2022 costa
    dbDriver = properties.getProperty("dbDriver");
806 2061 costa
    harvesterAdministrator = properties.getProperty("harvesterAdministrator");
807 2062 costa
808 2031 costa
    try {
809
      lp = Integer.valueOf(properties.getProperty("logPeriod", "90"));
810
      logPeriod = lp.intValue();
811
    }
812
    catch (NumberFormatException e) {
813
      System.err.println("NumberFormatException: Error parsing logPeriod " +
814
                         logPeriod + e.getMessage());
815
      System.err.println("Defaulting to logPeriod of 90 days");
816
      logPeriod = 90;
817
    }
818 2062 costa
819 2031 costa
    metacatURL = properties.getProperty("metacatURL");
820
    password = properties.getProperty("password");
821 2086 costa
    smtpServer = properties.getProperty("smtpServer", "localhost");
822 2022 costa
    url = properties.getProperty("url");
823
    user = properties.getProperty("user");
824
825
    // Load the jdbc driver
826
    try {
827
      Class.forName(dbDriver);
828
    }
829
    catch (ClassNotFoundException e) {
830
      System.out.println("Can't load driver " + e);
831
      System.exit(1);
832
    }
833
834
    // Make the database connection
835
    try {
836
      System.out.println("Getting connection to Harvester tables");
837
      conn = DriverManager.getConnection(url, user, password);
838
839
      // If a SQLWarning object is available, print its warning(s).
840
      // There may be multiple warnings chained.
841
      warn = conn.getWarnings();
842
843
      if (warn != null) {
844
        while (warn != null) {
845
          System.out.println("SQLState: " + warn.getSQLState());
846
          System.out.println("Message:  " + warn.getMessage());
847
          System.out.println("Vendor: " + warn.getErrorCode());
848
          System.out.println("");
849
          warn = warn.getNextWarning();
850
        }
851
      }
852
    }
853
    catch (SQLException e) {
854
      System.out.println("Database access failed " + e);
855
      System.exit(1);
856
    }
857 2031 costa
858
    initLogIDs();
859
    harvestStartTime = new Date();
860
    addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
861 2022 costa
862
    if (connectToMetacat()) {
863
      try {
864
        System.out.println("Connecting to Metacat: " + metacatURL);
865
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
866
      }
867
      catch (MetacatInaccessibleException e) {
868
        System.out.println("Metacat connection failed." + e.getMessage());
869
      }
870
      catch (Exception e) {
871
        System.out.println("Metacat connection failed." + e.getMessage());
872
      }
873
    }
874
  }
875
876
877
}