Project

General

Profile

1 2094 jones
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the
4
 *                  Regents of the University of California
5 2022 costa
 *
6 2094 jones
 *   '$Author$'
7
 *     '$Date$'
8
 * '$Revision$'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 2022 costa
 */
24
25
package edu.ucsb.nceas.metacat.harvesterClient;
26
27 2086 costa
import com.oreilly.servlet.MailMessage;
28 2031 costa
import java.io.IOException;
29 2086 costa
import java.io.PrintStream;
30 2031 costa
import java.sql.Connection;
31
import java.sql.DriverManager;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.sql.SQLWarning;
35
import java.sql.Statement;
36
import java.util.ArrayList;
37
import java.text.SimpleDateFormat;
38
import java.util.Date;
39 2022 costa
40 2031 costa
import edu.ucsb.nceas.metacat.client.Metacat;
41
import edu.ucsb.nceas.metacat.client.MetacatFactory;
42
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
43 5030 daigle
import edu.ucsb.nceas.metacat.properties.PropertyService;
44 4080 daigle
import edu.ucsb.nceas.metacat.util.SystemUtil;
45 4125 daigle
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
46 2022 costa
47
/**
48
 * Harvester is the main class for the Harvester application. The main
49
 * method creates a single Harvester object which drives the application.
50
 *
51
 * @author    costa
52
 *
53
 */
54
public class Harvester {
55
56
  /*
57
   * Class fields
58
   */
59 8018 leinfelder
60 2155 costa
  public static final String filler = "*";
61 2995 costa
  private static boolean keepRunning = true;
62 2155 costa
  public static final String marker =
63 2062 costa
"*****************************************************************************";
64 4125 daigle
//  public static PropertyService propertyService = null;
65 2384 costa
  private static String schemaLocation = null;
66 2022 costa
67
68
  /*
69
   * Class methods
70
   */
71
72 2031 costa
73 2022 costa
  /**
74
   * Constructor. Creates a new instance of Harvester.
75
   */
76
  public Harvester() {
77
  }
78
79 2031 costa
80 2022 costa
  /**
81 2155 costa
   * Loads Harvester options from a configuration file.
82 2022 costa
   */
83 8018 leinfelder
  public static void loadProperties(String metacatContextDir) {
84 2062 costa
85
    try {
86 8018 leinfelder
    	PropertyService.getInstance(metacatContextDir + "/WEB-INF");
87 2155 costa
    }
88 8018 leinfelder
    catch (Exception e) {
89 4125 daigle
      System.out.println("Error in loading properties: " + e.getMessage());
90 8018 leinfelder
      System.exit(1);
91 2062 costa
    }
92 2022 costa
  }
93
94 2062 costa
95 4125 daigle
    /**
96
	 * Harvester main method.
97
	 *
98 4999 costa
	 * @param args               the command line arguments
99
	 *
100
	 *   args[0] if "false", then this is not command-line mode,
101
	 *           Command-line mode is true by default.
102
	 *
103
	 *   args[1] if present, represents the path to the harvest list schema file.
104
	 *           Specifying it overrides the default path to the schema file.
105
	 *
106 4125 daigle
	 * @throws SAXException
107
	 * @throws IOException
108
	 * @throws ParserConfigurationException
109
	 */
110
	public static void main(String[] args) {
111 4999 costa
112
	    Integer delayDefault = new Integer(0); // Default number of hours delay
113 4125 daigle
		int delay = delayDefault.intValue(); // Delay in hours before first
114
												// harvest
115
		Integer d; // Used for determining delay
116
		long delta; // endTime - startTime
117
		long endTime; // time that a harvest completes
118
		Harvester harvester; // object for a single harvest run
119
		Integer maxHarvestsDefault = new Integer(0); // Default max harvests
120
		int maxHarvests = maxHarvestsDefault.intValue(); // Max number of
121
															// harvests
122
		Integer mh; // used in determining max harvests
123
		int nHarvests = 0; // counts the number of harvest runs
124
		final long oneHour = (60 * 60 * 1000); // milliseconds in one hour
125
		Integer periodDefault = new Integer(24); // Default hours between
126
													// harvests
127
		int period = periodDefault.intValue(); // Hours between harvests
128
		Integer p; // Used in determining the period
129
		long startTime; // time that a harvest run starts
130 4999 costa
131 8018 leinfelder
		String metacatContextDir = null;
132
133 4999 costa
		if ((args.length > 0) && (args[0] != null)) {
134 8018 leinfelder
			metacatContextDir = args[0];
135 4999 costa
		}
136 2062 costa
137 4999 costa
		/*
138
		 * If there is a second argument, it is the schemaLocation value
139
		 */
140
		if (args.length > 1) {
141
			schemaLocation = args[1];
142 4125 daigle
			System.err.println("schemaLocation: " + schemaLocation);
143 2062 costa
144 4125 daigle
			try {
145
				Thread.sleep(10000);
146
			} catch (InterruptedException e) {
147
				e.printStackTrace();
148
			}
149
		}
150 2062 costa
151 4125 daigle
		System.out.println(marker);
152
		System.out.println("Starting Harvester");
153 8018 leinfelder
		Harvester.loadProperties(metacatContextDir);
154 2062 costa
155 4125 daigle
		// Parse the delay property. Use default if necessary.
156
		try {
157 4175 daigle
			d = Integer.valueOf(PropertyService.getProperty("harvester.delay"));
158 4125 daigle
			delay = d.intValue();
159
		} catch (NumberFormatException e) {
160
			System.out.println("NumberFormatException: Error parsing delay: "
161
					+ e.getMessage());
162
			System.out.println("Defaulting to delay=" + delayDefault);
163
			delay = delayDefault.intValue();
164
		} catch (PropertyNotFoundException pnfe) {
165
			System.out.println("PropertyNotFoundException: Error finding delay: "
166
					+ pnfe.getMessage());
167
			System.out.println("Defaulting to delay=" + delayDefault);
168
			delay = delayDefault.intValue();
169
		}
170 2062 costa
171 4125 daigle
		// Parse the maxHarvests property. Use default if necessary.
172
		try {
173 4175 daigle
			mh = Integer.valueOf(PropertyService.getProperty("harvester.maxHarvests"));
174 4125 daigle
			maxHarvests = mh.intValue();
175
		} catch (NumberFormatException e) {
176
			System.out.println("NumberFormatException: Error parsing maxHarvests: "
177
					+ e.getMessage());
178
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
179
			maxHarvests = maxHarvestsDefault.intValue();
180
		} catch (PropertyNotFoundException pnfe) {
181
			System.out.println("PropertyNotFoundException: Error finding maxHarvests: "
182
					+ pnfe.getMessage());
183
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
184
			maxHarvests = maxHarvestsDefault.intValue();
185
		}
186
187
		// Parse the period property. Use default if necessary.
188
		try {
189 4175 daigle
			p = Integer.valueOf(PropertyService.getProperty("harvester.period"));
190 4125 daigle
			period = p.intValue();
191
		} catch (NumberFormatException e) {
192
			System.out.println("NumberFormatException: Error parsing period: "
193
					+ e.getMessage());
194
			System.out.println("Defaulting to period=" + periodDefault);
195
			period = periodDefault.intValue();
196
		} catch (PropertyNotFoundException pnfe) {
197
			System.out.println("PropertyNotFoundException: Error finding period: "
198
					+ pnfe.getMessage());
199
			System.out.println("Defaulting to period=" + periodDefault);
200
			period = periodDefault.intValue();
201
		}
202
203
		// Sleep for delay number of hours prior to starting first harvest
204
		if (delay > 0) {
205
			try {
206
				System.out.print("First harvest will begin in " + delay);
207
				if (delay == 1) {
208
					System.out.println(" hour.");
209
				} else {
210
					System.out.println(" hours.");
211
				}
212
				Thread.sleep(delay * oneHour);
213
			} catch (InterruptedException e) {
214
				System.err.println("InterruptedException: " + e.getMessage());
215
				System.exit(1);
216
			}
217
		}
218
219 2062 costa
    // Repeat a new harvest once every period number of hours, until we reach
220 2426 costa
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
221 4125 daigle
    // Subtract delta from the time period so
222 2062 costa
    // that each harvest will start at a fixed interval.
223
    //
224 2995 costa
    while (keepRunning && ((nHarvests < maxHarvests) || (maxHarvests <= 0))) {
225 2062 costa
      nHarvests++;
226
      startTime = System.currentTimeMillis();
227 4125 daigle
      harvester = new Harvester();                // New object for this
228
													// harvest
229 2062 costa
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
230
      harvester.readHarvestSiteSchedule();        // Read the database table
231
      harvester.harvest();                        // Harvest the documents
232
      harvester.shutdown();                       // Shut down Harvester
233
      endTime = System.currentTimeMillis();
234
      delta = endTime - startTime;
235
236 2426 costa
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
237 2062 costa
        try {
238 2203 costa
          System.out.println("Next harvest will begin in " +
239
                             period + " hours.");
240 2108 costa
          Thread.sleep((period * oneHour) - delta);
241 2062 costa
        }
242
        catch (InterruptedException e) {
243
          System.err.println("InterruptedException: " + e.getMessage());
244
          System.exit(1);
245
        }
246
      }
247
    }
248 2022 costa
  }
249 2995 costa
250
251
  /**
252
   * Set the keepRunning flag. If set to false, the main program will end
253
   * the while loop that keeps harvester running every period number of hours.
254
   * The static method is intended to be called from the HarvesterServlet class
255
   * which creates a thread to run Harvester. When the thread is destroyed, the
256
   * thread's destroy() method calls Harvester.setKeepRunning(false).
257
   *
258
   * @param keepRunning
259
   */
260
  static void setKeepRunning(boolean keepRunning) {
261
    Harvester.keepRunning = keepRunning;
262
  }
263 2022 costa
264 2995 costa
265 2022 costa
  /*
266
   * Object fields
267
   */
268
269 2031 costa
  /** Database connection */
270 2139 costa
  private Connection conn = null;
271 2031 costa
272 2062 costa
  /** Used during development to determine whether to connect to metacat
273
   *  Sometimes it's useful to test parts of the code without actually
274
   *  connecting to Metacat.
275
   */
276 2031 costa
  private boolean connectToMetacat;
277
278
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
279
  private int detailLogID;
280
281 2061 costa
  /** Email address of the Harvester Administrator */
282 2105 costa
  String harvesterAdministrator;
283 2061 costa
284 2031 costa
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
285
  private int harvestLogID;
286
287
  /** End time of this harvest session */
288
  private Date harvestEndTime;
289
290
  /** List of HarvestLog objects. Stores log entries for report generation. */
291
  private ArrayList harvestLogList = new ArrayList();
292
293
  /** List of HarvestSiteSchedule objects */
294
  private ArrayList harvestSiteScheduleList = new ArrayList();
295
296
  /** Start time of this harvest session */
297
  private Date harvestStartTime;
298
299
  /** Number of days to save log records. Any that are older are purged. */
300
  int logPeriod;
301
302
  /** Metacat client object */
303 2022 costa
  Metacat metacat;
304 2031 costa
305 2086 costa
  /** SMTP server for sending mail messages */
306
  String smtpServer;
307
308 2108 costa
  /** The timestamp for this harvest run. Used for output only. */
309
  String timestamp;
310
311 2022 costa
312
  /*
313
   * Object methods
314
   */
315 2031 costa
316
  /**
317
   * Creates a new HarvestLog object and adds it to the harvestLogList.
318
   *
319
   * @param  status          the status of the harvest operation
320
   * @param  message         the message text of the harvest operation
321
   * @param  harvestOperationCode  the harvest operation code
322
   * @param  siteScheduleID  the siteScheduleID for which this operation was
323
   *                         performed. 0 indicates that the operation did not
324
   *                         involve a particular harvest site.
325
   * @param  harvestDocument the associated HarvestDocument object. May be null.
326
   * @param  errorMessage    additional error message pertaining to document
327
   *                         error.
328
   */
329
  void addLogEntry(int    status,
330
                   String message,
331
                   String harvestOperationCode,
332
                   int    siteScheduleID,
333
                   HarvestDocument harvestDocument,
334
                   String errorMessage
335
                  ) {
336
    HarvestLog harvestLog;
337 2139 costa
    int harvestLogID = getHarvestLogID();
338
    int detailLogID;
339 2022 costa
340 2031 costa
    /* If there is no associated harvest document, call the basic constructor;
341
     * else call the extended constructor.
342
     */
343
    if (harvestDocument == null) {
344 2139 costa
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime,
345
                                  status, message, harvestOperationCode,
346
                                  siteScheduleID);
347 2031 costa
    }
348
    else {
349 2139 costa
      detailLogID = getDetailLogID();
350
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID,
351
                                  harvestStartTime, status, message,
352 2031 costa
                                  harvestOperationCode, siteScheduleID,
353
                                  harvestDocument, errorMessage);
354
    }
355
356
    harvestLogList.add(harvestLog);
357
  }
358 2139 costa
359
360
  public void closeConnection() {
361
    try {
362
      // Close the database connection
363
      System.out.println("Closing the database connection.");
364
      conn.close();
365
    }
366
    catch (SQLException e) {
367
      System.out.println("Database access failed " + e);
368
    }
369
  }
370 2031 costa
371
372 2022 costa
  /**
373
   * Determines whether Harvester should attempt to connect to Metacat.
374
   * Used during development and testing.
375
   *
376
   * @return     true if Harvester should connect, otherwise false
377
   */
378
  boolean connectToMetacat () {
379
    return connectToMetacat;
380
  }
381 2036 costa
382
383
  /**
384
   * Normalizes text prior to insertion into the HARVEST_LOG or
385
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
386
   * character with the double quote character. This prevents SQL errors
387
   * involving words that contain single quotes. Also removes \n and \r
388
   * characters from the text.
389
   *
390
   * @param text  the original string
391
   * @return      a string containing the normalized text
392
   */
393 2139 costa
  public String dequoteText(String text) {
394 2036 costa
    char c;
395
    StringBuffer stringBuffer = new StringBuffer();
396 2022 costa
397 2036 costa
    for (int i = 0; i < text.length(); i++) {
398
      c = text.charAt(i);
399
      switch (c) {
400
        case '\'':
401
          stringBuffer.append('\"');
402
          break;
403
        case '\r':
404
        case '\n':
405
          break;
406
        default:
407
          stringBuffer.append(c);
408
          break;
409
      }
410
    }
411
412
    return stringBuffer.toString();
413
  }
414 2139 costa
415
  /**
416
   * Returns a connection to the database. Opens the connection if a connection
417
   * has not already been made previously.
418
   *
419
   * @return  conn  the database Connection object
420
   */
421
  public Connection getConnection() {
422
    String dbDriver = "";
423 4125 daigle
    String defaultDB = null;
424
    String password = null;
425
    String user = null;
426 2139 costa
    SQLWarning warn;
427
428
    if (conn == null) {
429 4125 daigle
    	try {
430
			dbDriver = PropertyService.getProperty("database.driver");
431
			defaultDB = PropertyService.getProperty("database.connectionURI");
432
			password = PropertyService.getProperty("database.password");
433
			user = PropertyService.getProperty("database.user");
434
		} catch (PropertyNotFoundException pnfe) {
435
			System.out.println("Can't find property " + pnfe);
436
	        System.exit(1);
437
		}
438 2022 costa
439 2139 costa
      // Load the jdbc driver
440
      try {
441
        Class.forName(dbDriver);
442
      }
443
      catch (ClassNotFoundException e) {
444
        System.out.println("Can't load driver " + e);
445
        System.exit(1);
446
      }
447
448
      // Make the database connection
449
      try {
450
        System.out.println("Getting connection to Harvester tables");
451
        conn = DriverManager.getConnection(defaultDB, user, password);
452
453
        // If a SQLWarning object is available, print its warning(s).
454
        // There may be multiple warnings chained.
455
        warn = conn.getWarnings();
456
457
        if (warn != null) {
458
          while (warn != null) {
459
            System.out.println("SQLState: " + warn.getSQLState());
460
            System.out.println("Message:  " + warn.getMessage());
461
            System.out.println("Vendor: " + warn.getErrorCode());
462
            System.out.println("");
463
            warn = warn.getNextWarning();
464
          }
465
        }
466
      }
467
      catch (SQLException e) {
468
        System.out.println("Database access failed " + e);
469
        System.exit(1);
470
      }
471
    }
472
473
    return conn;
474
  }
475
476
477 2022 costa
  /**
478 2031 costa
   * Gets the current value of the detailLogID for storage as a primary key in
479
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
480
   *
481
   * @return  the current value of the detailLogID
482
   */
483 2139 costa
  public int getDetailLogID() {
484 2031 costa
    int currentValue = detailLogID;
485
486
    detailLogID++;
487
    return currentValue;
488
  }
489
490
491
  /**
492
   * Gets the current value of the harvestLogID for storage as a primary key in
493
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
494
   *
495
   * @return  the current value of the detailLogID
496
   */
497 2139 costa
  public int getHarvestLogID() {
498 2031 costa
    int currentValue = harvestLogID;
499
500
    harvestLogID++;
501
    return currentValue;
502
  }
503
504
505
  /**
506
   * Gets the maximum value of an integer field from a table.
507
   *
508
   * @param tableName  the database table name
509
   * @param fieldName  the field name of the integer field in the table
510
   * @return  the maximum integer stored in the fieldName field of tableName
511
   */
512
  private int getMaxValue(String tableName, String fieldName) {
513 2139 costa
    int maxValue = 0;
514 2031 costa
    int fieldValue;
515 2381 costa
    String query = "SELECT " + fieldName + " FROM " + tableName;
516
    Statement stmt;
517 2031 costa
518 2381 costa
	try {
519
      stmt = conn.createStatement();
520
      ResultSet rs = stmt.executeQuery(query);
521 2031 costa
522 2381 costa
      while (rs.next()) {
523
        fieldValue = rs.getInt(fieldName);
524 2031 costa
        maxValue = Math.max(maxValue, fieldValue);
525 2381 costa
      }
526
527
      stmt.close();
528
    }
529 2031 costa
    catch(SQLException ex) {
530 2381 costa
      System.out.println("SQLException: " + ex.getMessage());
531
    }
532 2031 costa
533
    return maxValue;
534
  }
535
536
537
  /**
538
   * Gets the minimum value of an integer field from a table.
539
   *
540
   * @param tableName  the database table name
541
   * @param fieldName  the field name of the integer field in the table
542
   * @return  the minimum integer stored in the fieldName field of tableName
543
   */
544
  private int getMinValue(String tableName, String fieldName) {
545
    int minValue = 0;
546
    int fieldValue;
547 2381 costa
    String query = "SELECT " + fieldName + " FROM " + tableName;
548
    Statement stmt;
549 2031 costa
550 2381 costa
    try {
551
      stmt = conn.createStatement();
552
      ResultSet rs = stmt.executeQuery(query);
553 2031 costa
554 2381 costa
      while (rs.next()) {
555
        fieldValue = rs.getInt(fieldName);
556 2031 costa
557
        if (minValue == 0) {
558
          minValue = fieldValue;
559
        }
560
        else {
561
          minValue = Math.min(minValue, fieldValue);
562
        }
563 2381 costa
      }
564
565
      stmt.close();
566
    }
567 2031 costa
    catch(SQLException ex) {
568 2381 costa
      System.out.println("SQLException: " + ex.getMessage());
569
    }
570
571 2031 costa
    return minValue;
572
  }
573
574
575
  /**
576 2022 costa
   * For every Harvest site schedule in the database, harvest the
577
   * documents for that site if they are due to be harvested.
578
   *
579
   * @throws SAXException
580
   * @throws IOException
581
   * @throws ParserConfigurationException
582
   */
583
  private void harvest() {
584
    HarvestSiteSchedule harvestSiteSchedule;
585
586 2031 costa
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
587
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
588 2384 costa
589
      if (Harvester.schemaLocation != null) {
590
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
591
      }
592
593 2031 costa
      harvestSiteSchedule.harvestDocumentList();
594 2022 costa
    }
595
  }
596
597
598
  /**
599 2031 costa
   * Initializes the detailLogID and harvestLogID values to their current
600
   * maximums + 1.
601 2022 costa
   */
602 2139 costa
  public void initLogIDs() {
603 2031 costa
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
604
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
605
  }
606
607 2062 costa
608 2031 costa
  /**
609 2155 costa
   * Prints the header of the harvest report.
610 2086 costa
   *
611 2155 costa
   * @param out            the PrintStream object to print to
612
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
613
   *                       have a value of 0 if no particular site is involved,
614
   *                       which indicates that the report is being prepared
615
   *                       for the Harvester Administrator rather than for a
616
   *                       particular Site Contact.
617 2086 costa
   */
618 2155 costa
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
619 2086 costa
    HarvestLog harvestLog;
620 2105 costa
    int logSiteScheduleID;
621
    int nErrors = 0;
622
    String phrase;
623 2086 costa
624
    for (int i = 0; i < harvestLogList.size(); i++) {
625
      harvestLog = (HarvestLog) harvestLogList.get(i);
626 2105 costa
      logSiteScheduleID = harvestLog.getSiteScheduleID();
627
628
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
629
        if (harvestLog.isErrorEntry()) {
630
          nErrors++;
631
        }
632
      }
633 2086 costa
    }
634 2105 costa
635
    out.println(marker);
636 2155 costa
    out.println(filler);
637 2108 costa
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
638 2155 costa
    out.println(filler);
639 2105 costa
640
    if (nErrors > 0) {
641
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
642
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
643
      out.println("* Please see the log entries below for additonal details.");
644
    }
645
    else {
646
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
647
    }
648
649 2155 costa
    out.println(filler);
650 2105 costa
    out.println(marker);
651 2155 costa
  }
652
653 2105 costa
654 2155 costa
  /**
655
   * Prints harvest log entries for this harvest run. Entries may be filtered
656
   * for a particular site, or all entries may be printed.
657
   *
658
   * @param out            the PrintStream object to write to
659
   * @param maxCodeLevel   the maximum code level that should be printed,
660
   *                       e.g. "warning". Any log entries higher than this
661
   *                       level will not be printed.
662
   * @param siteScheduleID if greater than 0, indicates that the log
663
   *                       entry should only be printed for a particular site
664
   *                       as identified by its siteScheduleID. if 0, then
665
   *                       print output for all sites.
666
   */
667
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
668
                      ) {
669
    HarvestLog harvestLog;
670
    int logSiteScheduleID;
671
    int nErrors = 0;
672
    String phrase;
673
674
    out.println("");
675
    out.println(marker);
676
    out.println(filler);
677
    out.println("*                       LOG ENTRIES");
678
    out.println(filler);
679
    out.println(marker);
680
681 2105 costa
    for (int i = 0; i < harvestLogList.size(); i++) {
682
      harvestLog = (HarvestLog) harvestLogList.get(i);
683
      logSiteScheduleID = harvestLog.getSiteScheduleID();
684
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
685
        harvestLog.printOutput(out, maxCodeLevel);
686
      }
687
    }
688 2086 costa
  }
689
690
691
  /**
692 2062 costa
   * Prints the site schedule data for a given site.
693
   *
694 2086 costa
   * @param out              the PrintStream to write to
695 2062 costa
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
696 2031 costa
   */
697 2086 costa
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
698 2381 costa
    HarvestSiteSchedule harvestSiteSchedule;
699 2031 costa
700
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
701
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
702
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
703 2086 costa
        harvestSiteSchedule.printOutput(out);
704 2031 costa
      }
705
    }
706
  }
707
708
709
  /**
710
   * Prunes old records from the HARVEST_LOG table. Records are removed if
711
   * their HARVEST_DATE is older than a given number of days, as stored in the
712 2381 costa
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
713
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
714 2031 costa
   */
715
  private void pruneHarvestLog() {
716
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
717
    Date dateLastLog;                    // Prune everything prior to this date
718 2381 costa
    String deleteString;
719
    String deleteStringDetailLog;
720 2031 costa
    long delta;
721
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
722
    int recordsDeleted;
723 2381 costa
    int recordsDeletedDetail = 0;
724 2031 costa
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
725
    String dateString;
726 2381 costa
    ResultSet rs;
727
    String selectString;
728
    Statement stmt;
729 2031 costa
    long timeLastLog = 0;
730 2381 costa
    SQLWarning warn;
731
732 2031 costa
    delta = logPeriod * millisecondsPerDay;
733
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
734 2381 costa
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
735
    deleteStringDetailLog =
736
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
737 2031 costa
    timeLastLog = currentTime - delta;
738
    dateLastLog = new Date(timeLastLog);
739
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
740
    deleteString += dateString;
741 2381 costa
    selectString += dateString;
742 2031 costa
743 2381 costa
    try {
744
      System.out.println(
745
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
746
747
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
748
       * table.
749
       */
750
      stmt = conn.createStatement();
751
      rs = stmt.executeQuery(selectString);
752
      warn = rs.getWarnings();
753
754
      if (warn != null) {
755
        System.out.println("\n---Warning---\n");
756
757
        while (warn != null) {
758
          System.out.println("Message: " + warn.getMessage());
759
          System.out.println("SQLState: " + warn.getSQLState());
760
          System.out.print("Vendor error code: ");
761
          System.out.println(warn.getErrorCode());
762
          System.out.println("");
763
          warn = warn.getNextWarning();
764
        }
765
      }
766
767
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
768
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
769
       * be pruned first because its records have a child relationship to those
770
       * in HARVEST_LOG.
771
       */
772
      while (rs.next()) {
773
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
774
        stmt = conn.createStatement();
775
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog +
776
                                            harvestLogID);
777
        recordsDeletedDetail += recordsDeleted;
778
        stmt.close();
779
      }
780
781
      /* Now prune entries from the HARVEST_LOG table using a single update.
782
       */
783
      stmt = conn.createStatement();
784
      recordsDeleted = stmt.executeUpdate(deleteString);
785
      stmt.close();
786
787
      System.out.println("  " + recordsDeletedDetail +
788
                         " records deleted from HARVEST_DETAIL_LOG");
789
      System.out.println("  " + recordsDeleted +
790
                         " records deleted from HARVEST_LOG");
791
    }
792 2031 costa
    catch (SQLException e) {
793 2381 costa
      System.out.println("SQLException: " + e.getMessage());
794
    }
795 2031 costa
  }
796
797
798
  /**
799 2022 costa
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
800
   * a HarvestSiteSchedule object for each row in the table.
801
   */
802
  private void readHarvestSiteSchedule() {
803
    HarvestSiteSchedule harvestSiteSchedule;
804
    ResultSet rs;
805
    SQLWarning warn;
806
    Statement stmt;
807
808
    String contactEmail;
809
    String dateLastHarvest;
810
    String dateNextHarvest;
811
    String documentListURL;
812
    String ldapDN;
813 2031 costa
    String ldapPwd;
814 2022 costa
    int siteScheduleID;
815
    String unit;
816
    int updateFrequency;
817
818
    try {
819
      // Read the HARVEST_SITE_SCHEDULE table
820
      stmt = conn.createStatement();
821
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
822
      warn = rs.getWarnings();
823
824
      if (warn != null) {
825
        System.out.println("\n---Warning---\n");
826
827
        while (warn != null) {
828
          System.out.println("Message: " + warn.getMessage());
829
          System.out.println("SQLState: " + warn.getSQLState());
830
          System.out.print("Vendor error code: ");
831
          System.out.println(warn.getErrorCode());
832
          System.out.println("");
833
          warn = warn.getNextWarning();
834
        }
835
      }
836
837
      while (rs.next()) {
838
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
839
        documentListURL = rs.getString("DOCUMENTLISTURL");
840
        ldapDN = rs.getString("LDAPDN");
841 2031 costa
        ldapPwd = rs.getString("LDAPPWD");
842 2022 costa
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
843
        dateLastHarvest = rs.getString("DATELASTHARVEST");
844
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
845
        unit = rs.getString("UNIT");
846
        contactEmail = rs.getString("CONTACT_EMAIL");
847
848
        warn = rs.getWarnings();
849
850
        if (warn != null) {
851
          System.out.println("\n---Warning---\n");
852
853
          while (warn != null) {
854
            System.out.println("Message: " + warn.getMessage());
855
            System.out.println("SQLState: " + warn.getSQLState());
856
            System.out.print("Vendor error code: ");
857
            System.out.println(warn.getErrorCode());
858
            System.out.println("");
859
            warn = warn.getNextWarning();
860
          }
861
        }
862
863 2031 costa
        harvestSiteSchedule = new HarvestSiteSchedule(this,
864 2022 costa
                                                      siteScheduleID,
865
                                                      documentListURL,
866
                                                      ldapDN,
867 2031 costa
                                                      ldapPwd,
868 2022 costa
                                                      dateNextHarvest,
869
                                                      dateLastHarvest,
870
                                                      updateFrequency,
871
                                                      unit,
872
                                                      contactEmail
873
                                                     );
874 2031 costa
        harvestSiteScheduleList.add(harvestSiteSchedule);
875 2022 costa
      }
876 2031 costa
877
      rs.close();
878
      stmt.close();
879
    }
880 2022 costa
    catch (SQLException e) {
881
      System.out.println("Database access failed " + e);
882
      System.exit(1);
883
    }
884
885
  }
886
887
888
  /**
889 2155 costa
   * Sends a report to the Harvester Administrator. The report prints each log
890 2086 costa
   * entry pertaining to this harvest run.
891 2105 costa
   *
892
   * @param maxCodeLevel  the maximum code level that should be printed,
893
   *                      e.g. "warning". Any log entries higher than this
894
   *                      level will not be printed.
895 2022 costa
   */
896 2105 costa
  void reportToAdministrator(String maxCodeLevel) {
897 2086 costa
    PrintStream body;
898
    String from = harvesterAdministrator;
899 2330 costa
    String[] fromArray;
900 2086 costa
    MailMessage msg;
901 2105 costa
    int siteScheduleID = 0;
902 2108 costa
    String subject = "Report from Metacat Harvester: " + timestamp;
903 2086 costa
    String to = harvesterAdministrator;
904
905
    if (!to.equals("")) {
906
      System.out.println("Sending report to Harvester Administrator at address "
907
                         + harvesterAdministrator);
908
909
      try {
910
        msg = new MailMessage(smtpServer);
911 2330 costa
912
        if (from.indexOf(',') > 0) {
913
          fromArray = from.split(",");
914
915
          for (int i = 0; i < fromArray.length; i++) {
916
            if (i == 0) {
917
              msg.from(fromArray[i]);
918
            }
919
920
            msg.to(fromArray[i]);
921
          }
922
        }
923
        else if (from.indexOf(';') > 0) {
924
          fromArray = from.split(";");
925
926
          for (int i = 0; i < fromArray.length; i++) {
927
            if (i == 0) {
928
              msg.from(fromArray[i]);
929
            }
930
931
            msg.to(fromArray[i]);
932
          }
933
        }
934
        else {
935
          msg.from(from);
936
          msg.to(to);
937
        }
938
939 2086 costa
        msg.setSubject(subject);
940
        body = msg.getPrintStream();
941 2155 costa
        printHarvestHeader(body, siteScheduleID);
942 2105 costa
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
943 2086 costa
        msg.sendAndClose();
944
      }
945
      catch (IOException e) {
946
        System.out.println("There was a problem sending email to " + to);
947
        System.out.println("IOException: " + e.getMessage());
948
      }
949 2139 costa
    }
950 2022 costa
  }
951 2139 costa
952
953
  /**
954
   * Sets the harvest start time for this harvest run.
955
   *
956
   * @param date
957
   */
958
  public void setHarvestStartTime(Date date) {
959
    harvestStartTime = date;
960
  }
961 2022 costa
962
963
  /**
964
   * Shuts down Harvester. Performs cleanup operations such as logging out
965
   * of Metacat and disconnecting from the database.
966
   */
967
  private void shutdown() {
968 2105 costa
    String maxCodeLevel = "debug";  // Print all log entries from level 1
969
                                    // ("error") to level 5 ("debug")
970
    int siteScheduleID = 0;
971
972 2022 costa
    // Log shutdown operation
973
    System.out.println("Shutting Down Harvester");
974 4175 daigle
    addLogEntry(0, "Shutting Down Harvester", "harvester.HarvesterShutdown", 0, null, "");
975 2031 costa
    pruneHarvestLog();
976 2139 costa
    closeConnection();
977 2105 costa
    // Print log to standard output and then email the Harvester administrator
978
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
979
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
980 2022 costa
  }
981
982
983 4125 daigle
    /**
984
	 * Initializes Harvester at startup. Connects to the database and to Metacat.
985
	 *
986
	 * @param nHarvests        the nth harvest
987
	 * @param maxHarvests      the maximum number of harvests that this process
988
	 *                         can run
989
	 */
990
	private void startup(int nHarvests, int maxHarvests) {
991
		Boolean ctm;
992
		Integer lp;
993
		String metacatURL;
994
		Date now = new Date();
995 2062 costa
996 4125 daigle
		timestamp = now.toString();
997
		System.out.println(Harvester.marker);
998
		System.out.print(timestamp + ": Starting Next Harvest");
999
		if (maxHarvests > 0) {
1000
			System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
1001
		}
1002
		System.out.print("\n");
1003
		try {
1004 4175 daigle
			ctm = Boolean.valueOf(PropertyService.getProperty("harvester.connectToMetacat"));
1005 4125 daigle
			connectToMetacat = ctm.booleanValue();
1006
			harvesterAdministrator = PropertyService
1007 4175 daigle
					.getProperty("harvester.administrator");
1008
			smtpServer = PropertyService.getProperty("harvester.smtpServer");
1009 2062 costa
1010 4175 daigle
			lp = Integer.valueOf(PropertyService.getProperty("harvester.logPeriod"));
1011 4125 daigle
			logPeriod = lp.intValue();
1012
		} catch (NumberFormatException e) {
1013
			System.err.println("NumberFormatException: Error parsing logPeriod "
1014
					+ logPeriod + e.getMessage());
1015
			System.err.println("Defaulting to logPeriod of 90 days");
1016
			logPeriod = 90;
1017
		} catch (PropertyNotFoundException pnfe) {
1018
			System.out.println("PropertyNotFoundException: Error getting property: "
1019
					+ pnfe.getMessage());
1020
			return;
1021
		}
1022 2022 costa
1023 4125 daigle
		conn = getConnection();
1024
		initLogIDs();
1025
		setHarvestStartTime(now);
1026
		// Log startup operation
1027 4175 daigle
		addLogEntry(0, "Starting Up Harvester", "harvester.HarvesterStartup", 0, null, "");
1028 4125 daigle
1029
		if (connectToMetacat()) {
1030
			try {
1031
				metacatURL = SystemUtil.getServletURL();
1032
				System.out.println("Connecting to Metacat: " + metacatURL);
1033
				metacat = MetacatFactory.createMetacatConnection(metacatURL);
1034
			} catch (MetacatInaccessibleException e) {
1035
				System.out.println("Metacat connection failed." + e.getMessage());
1036
			} catch (Exception e) {
1037
				System.out.println("Metacat connection failed." + e.getMessage());
1038
			}
1039
		}
1040
	}
1041
1042 2022 costa
}