Project

General

Profile

1 2094 jones
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the
4
 *                  Regents of the University of California
5 2022 costa
 *
6 2094 jones
 *   '$Author$'
7
 *     '$Date$'
8
 * '$Revision$'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 2022 costa
 */
24
25
package edu.ucsb.nceas.metacat.harvesterClient;
26
27 2086 costa
import com.oreilly.servlet.MailMessage;
28 2031 costa
import java.io.IOException;
29 2086 costa
import java.io.PrintStream;
30 2031 costa
import java.sql.Connection;
31
import java.sql.DriverManager;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.sql.SQLWarning;
35
import java.sql.Statement;
36
import java.util.ArrayList;
37
import java.text.SimpleDateFormat;
38
import java.util.Date;
39 2022 costa
40 4999 costa
import org.apache.log4j.Logger;
41
import org.apache.log4j.PropertyConfigurator;
42 4080 daigle
43 2031 costa
import edu.ucsb.nceas.metacat.client.Metacat;
44
import edu.ucsb.nceas.metacat.client.MetacatFactory;
45
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
46 5030 daigle
import edu.ucsb.nceas.metacat.properties.PropertyService;
47 5015 daigle
import edu.ucsb.nceas.metacat.shared.ServiceException;
48 4080 daigle
import edu.ucsb.nceas.metacat.util.SystemUtil;
49 4125 daigle
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
50 2022 costa
51
/**
52
 * Harvester is the main class for the Harvester application. The main
53
 * method creates a single Harvester object which drives the application.
54
 *
55
 * @author    costa
56
 *
57
 */
58
public class Harvester {
59
60
  /*
61
   * Class fields
62
   */
63 2155 costa
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
64
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
65 4125 daigle
//  private static final String CONFIG_NAME = "metacat.properties";
66 4999 costa
  private static final String LOG_CONFIG_NAME = "../../build/war/WEB-INF/log4j.properties";
67 2155 costa
  public static final String filler = "*";
68 2995 costa
  private static boolean keepRunning = true;
69 2155 costa
  public static final String marker =
70 2062 costa
"*****************************************************************************";
71 4125 daigle
//  public static PropertyService propertyService = null;
72 2384 costa
  private static String schemaLocation = null;
73 2022 costa
74
75
  /*
76
   * Class methods
77
   */
78
79 2031 costa
80 2022 costa
  /**
81
   * Constructor. Creates a new instance of Harvester.
82
   */
83
  public Harvester() {
84
  }
85
86 2031 costa
87 2022 costa
  /**
88 2155 costa
   * Loads Harvester options from a configuration file.
89 2022 costa
   */
90 4999 costa
  public static void loadProperties(boolean commandLineMode, boolean test) {
91 2155 costa
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;
92 2062 costa
93
    try {
94 4999 costa
      if (commandLineMode) {
95 5030 daigle
        PropertyService.getInstance(configDir);
96 4999 costa
      }
97
      else {
98
        PropertyService.getInstance();
99
      }
100 2155 costa
    }
101 4125 daigle
    catch (ServiceException e) {
102
      System.out.println("Error in loading properties: " + e.getMessage());
103 2062 costa
    }
104 2022 costa
  }
105
106 2062 costa
107 4125 daigle
    /**
108
	 * Harvester main method.
109
	 *
110 4999 costa
	 * @param args               the command line arguments
111
	 *
112
	 *   args[0] if "false", then this is not command-line mode,
113
	 *           Command-line mode is true by default.
114
	 *
115
	 *   args[1] if present, represents the path to the harvest list schema file.
116
	 *           Specifying it overrides the default path to the schema file.
117
	 *
118 4125 daigle
	 * @throws SAXException
119
	 * @throws IOException
120
	 * @throws ParserConfigurationException
121
	 */
122
	public static void main(String[] args) {
123 4999 costa
	    boolean commandLineMode = true;
124
	    boolean test = false;   // set to true for JUnit testing
125
126
	    Integer delayDefault = new Integer(0); // Default number of hours delay
127 4125 daigle
		int delay = delayDefault.intValue(); // Delay in hours before first
128
												// harvest
129
		Integer d; // Used for determining delay
130
		long delta; // endTime - startTime
131
		long endTime; // time that a harvest completes
132
		Harvester harvester; // object for a single harvest run
133
		Integer maxHarvestsDefault = new Integer(0); // Default max harvests
134
		int maxHarvests = maxHarvestsDefault.intValue(); // Max number of
135
															// harvests
136
		Integer mh; // used in determining max harvests
137
		int nHarvests = 0; // counts the number of harvest runs
138
		final long oneHour = (60 * 60 * 1000); // milliseconds in one hour
139
		Integer periodDefault = new Integer(24); // Default hours between
140
													// harvests
141
		int period = periodDefault.intValue(); // Hours between harvests
142
		Integer p; // Used in determining the period
143
		long startTime; // time that a harvest run starts
144 4999 costa
145
		if ((args.length > 0) && (args[0] != null)) {
146
		  if (args[0].equals("false")) {
147
		    commandLineMode = false;
148
		  }
149
		  else {
150
		    // If commandLineMode is true, initialize log4j properties
151
	        PropertyConfigurator.configureAndWatch(LOG_CONFIG_NAME);
152
		  }
153
		}
154 2062 costa
155 4999 costa
		/*
156
		 * If there is a second argument, it is the schemaLocation value
157
		 */
158
		if (args.length > 1) {
159
			schemaLocation = args[1];
160 4125 daigle
			System.err.println("schemaLocation: " + schemaLocation);
161 2062 costa
162 4125 daigle
			try {
163
				Thread.sleep(10000);
164
			} catch (InterruptedException e) {
165
				e.printStackTrace();
166
			}
167
		}
168 2062 costa
169 4125 daigle
		System.out.println(marker);
170
		System.out.println("Starting Harvester");
171 4999 costa
		Harvester.loadProperties(commandLineMode, test);
172 2062 costa
173 4125 daigle
		// Parse the delay property. Use default if necessary.
174
		try {
175 4175 daigle
			d = Integer.valueOf(PropertyService.getProperty("harvester.delay"));
176 4125 daigle
			delay = d.intValue();
177
		} catch (NumberFormatException e) {
178
			System.out.println("NumberFormatException: Error parsing delay: "
179
					+ e.getMessage());
180
			System.out.println("Defaulting to delay=" + delayDefault);
181
			delay = delayDefault.intValue();
182
		} catch (PropertyNotFoundException pnfe) {
183
			System.out.println("PropertyNotFoundException: Error finding delay: "
184
					+ pnfe.getMessage());
185
			System.out.println("Defaulting to delay=" + delayDefault);
186
			delay = delayDefault.intValue();
187
		}
188 2062 costa
189 4125 daigle
		// Parse the maxHarvests property. Use default if necessary.
190
		try {
191 4175 daigle
			mh = Integer.valueOf(PropertyService.getProperty("harvester.maxHarvests"));
192 4125 daigle
			maxHarvests = mh.intValue();
193
		} catch (NumberFormatException e) {
194
			System.out.println("NumberFormatException: Error parsing maxHarvests: "
195
					+ e.getMessage());
196
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
197
			maxHarvests = maxHarvestsDefault.intValue();
198
		} catch (PropertyNotFoundException pnfe) {
199
			System.out.println("PropertyNotFoundException: Error finding maxHarvests: "
200
					+ pnfe.getMessage());
201
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
202
			maxHarvests = maxHarvestsDefault.intValue();
203
		}
204
205
		// Parse the period property. Use default if necessary.
206
		try {
207 4175 daigle
			p = Integer.valueOf(PropertyService.getProperty("harvester.period"));
208 4125 daigle
			period = p.intValue();
209
		} catch (NumberFormatException e) {
210
			System.out.println("NumberFormatException: Error parsing period: "
211
					+ e.getMessage());
212
			System.out.println("Defaulting to period=" + periodDefault);
213
			period = periodDefault.intValue();
214
		} catch (PropertyNotFoundException pnfe) {
215
			System.out.println("PropertyNotFoundException: Error finding period: "
216
					+ pnfe.getMessage());
217
			System.out.println("Defaulting to period=" + periodDefault);
218
			period = periodDefault.intValue();
219
		}
220
221
		// Sleep for delay number of hours prior to starting first harvest
222
		if (delay > 0) {
223
			try {
224
				System.out.print("First harvest will begin in " + delay);
225
				if (delay == 1) {
226
					System.out.println(" hour.");
227
				} else {
228
					System.out.println(" hours.");
229
				}
230
				Thread.sleep(delay * oneHour);
231
			} catch (InterruptedException e) {
232
				System.err.println("InterruptedException: " + e.getMessage());
233
				System.exit(1);
234
			}
235
		}
236
237 2062 costa
    // Repeat a new harvest once every period number of hours, until we reach
238 2426 costa
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
239 4125 daigle
    // Subtract delta from the time period so
240 2062 costa
    // that each harvest will start at a fixed interval.
241
    //
242 2995 costa
    while (keepRunning && ((nHarvests < maxHarvests) || (maxHarvests <= 0))) {
243 2062 costa
      nHarvests++;
244
      startTime = System.currentTimeMillis();
245 4125 daigle
      harvester = new Harvester();                // New object for this
246
													// harvest
247 2062 costa
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
248
      harvester.readHarvestSiteSchedule();        // Read the database table
249
      harvester.harvest();                        // Harvest the documents
250
      harvester.shutdown();                       // Shut down Harvester
251
      endTime = System.currentTimeMillis();
252
      delta = endTime - startTime;
253
254 2426 costa
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
255 2062 costa
        try {
256 2203 costa
          System.out.println("Next harvest will begin in " +
257
                             period + " hours.");
258 2108 costa
          Thread.sleep((period * oneHour) - delta);
259 2062 costa
        }
260
        catch (InterruptedException e) {
261
          System.err.println("InterruptedException: " + e.getMessage());
262
          System.exit(1);
263
        }
264
      }
265
    }
266 2022 costa
  }
267 2995 costa
268
269
  /**
270
   * Set the keepRunning flag. If set to false, the main program will end
271
   * the while loop that keeps harvester running every period number of hours.
272
   * The static method is intended to be called from the HarvesterServlet class
273
   * which creates a thread to run Harvester. When the thread is destroyed, the
274
   * thread's destroy() method calls Harvester.setKeepRunning(false).
275
   *
276
   * @param keepRunning
277
   */
278
  static void setKeepRunning(boolean keepRunning) {
279
    Harvester.keepRunning = keepRunning;
280
  }
281 2022 costa
282 2995 costa
283 2022 costa
  /*
284
   * Object fields
285
   */
286
287 2031 costa
  /** Database connection */
288 2139 costa
  private Connection conn = null;
289 2031 costa
290 2062 costa
  /** Used during development to determine whether to connect to metacat
291
   *  Sometimes it's useful to test parts of the code without actually
292
   *  connecting to Metacat.
293
   */
294 2031 costa
  private boolean connectToMetacat;
295
296
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
297
  private int detailLogID;
298
299 2061 costa
  /** Email address of the Harvester Administrator */
300 2105 costa
  String harvesterAdministrator;
301 2061 costa
302 2031 costa
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
303
  private int harvestLogID;
304
305
  /** End time of this harvest session */
306
  private Date harvestEndTime;
307
308
  /** List of HarvestLog objects. Stores log entries for report generation. */
309
  private ArrayList harvestLogList = new ArrayList();
310
311
  /** List of HarvestSiteSchedule objects */
312
  private ArrayList harvestSiteScheduleList = new ArrayList();
313
314
  /** Start time of this harvest session */
315
  private Date harvestStartTime;
316
317
  /** Number of days to save log records. Any that are older are purged. */
318
  int logPeriod;
319
320
  /** Metacat client object */
321 2022 costa
  Metacat metacat;
322 2031 costa
323 2086 costa
  /** SMTP server for sending mail messages */
324
  String smtpServer;
325
326 2108 costa
  /** The timestamp for this harvest run. Used for output only. */
327
  String timestamp;
328
329 2022 costa
330
  /*
331
   * Object methods
332
   */
333 2031 costa
334
  /**
335
   * Creates a new HarvestLog object and adds it to the harvestLogList.
336
   *
337
   * @param  status          the status of the harvest operation
338
   * @param  message         the message text of the harvest operation
339
   * @param  harvestOperationCode  the harvest operation code
340
   * @param  siteScheduleID  the siteScheduleID for which this operation was
341
   *                         performed. 0 indicates that the operation did not
342
   *                         involve a particular harvest site.
343
   * @param  harvestDocument the associated HarvestDocument object. May be null.
344
   * @param  errorMessage    additional error message pertaining to document
345
   *                         error.
346
   */
347
  void addLogEntry(int    status,
348
                   String message,
349
                   String harvestOperationCode,
350
                   int    siteScheduleID,
351
                   HarvestDocument harvestDocument,
352
                   String errorMessage
353
                  ) {
354
    HarvestLog harvestLog;
355 2139 costa
    int harvestLogID = getHarvestLogID();
356
    int detailLogID;
357 2022 costa
358 2031 costa
    /* If there is no associated harvest document, call the basic constructor;
359
     * else call the extended constructor.
360
     */
361
    if (harvestDocument == null) {
362 2139 costa
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime,
363
                                  status, message, harvestOperationCode,
364
                                  siteScheduleID);
365 2031 costa
    }
366
    else {
367 2139 costa
      detailLogID = getDetailLogID();
368
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID,
369
                                  harvestStartTime, status, message,
370 2031 costa
                                  harvestOperationCode, siteScheduleID,
371
                                  harvestDocument, errorMessage);
372
    }
373
374
    harvestLogList.add(harvestLog);
375
  }
376 2139 costa
377
378
  public void closeConnection() {
379
    try {
380
      // Close the database connection
381
      System.out.println("Closing the database connection.");
382
      conn.close();
383
    }
384
    catch (SQLException e) {
385
      System.out.println("Database access failed " + e);
386
    }
387
  }
388 2031 costa
389
390 2022 costa
  /**
391
   * Determines whether Harvester should attempt to connect to Metacat.
392
   * Used during development and testing.
393
   *
394
   * @return     true if Harvester should connect, otherwise false
395
   */
396
  boolean connectToMetacat () {
397
    return connectToMetacat;
398
  }
399 2036 costa
400
401
  /**
402
   * Normalizes text prior to insertion into the HARVEST_LOG or
403
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
404
   * character with the double quote character. This prevents SQL errors
405
   * involving words that contain single quotes. Also removes \n and \r
406
   * characters from the text.
407
   *
408
   * @param text  the original string
409
   * @return      a string containing the normalized text
410
   */
411 2139 costa
  public String dequoteText(String text) {
412 2036 costa
    char c;
413
    StringBuffer stringBuffer = new StringBuffer();
414 2022 costa
415 2036 costa
    for (int i = 0; i < text.length(); i++) {
416
      c = text.charAt(i);
417
      switch (c) {
418
        case '\'':
419
          stringBuffer.append('\"');
420
          break;
421
        case '\r':
422
        case '\n':
423
          break;
424
        default:
425
          stringBuffer.append(c);
426
          break;
427
      }
428
    }
429
430
    return stringBuffer.toString();
431
  }
432 2139 costa
433
  /**
434
   * Returns a connection to the database. Opens the connection if a connection
435
   * has not already been made previously.
436
   *
437
   * @return  conn  the database Connection object
438
   */
439
  public Connection getConnection() {
440
    String dbDriver = "";
441 4125 daigle
    String defaultDB = null;
442
    String password = null;
443
    String user = null;
444 2139 costa
    SQLWarning warn;
445
446
    if (conn == null) {
447 4125 daigle
    	try {
448
			dbDriver = PropertyService.getProperty("database.driver");
449
			defaultDB = PropertyService.getProperty("database.connectionURI");
450
			password = PropertyService.getProperty("database.password");
451
			user = PropertyService.getProperty("database.user");
452
		} catch (PropertyNotFoundException pnfe) {
453
			System.out.println("Can't find property " + pnfe);
454
	        System.exit(1);
455
		}
456 2022 costa
457 2139 costa
      // Load the jdbc driver
458
      try {
459
        Class.forName(dbDriver);
460
      }
461
      catch (ClassNotFoundException e) {
462
        System.out.println("Can't load driver " + e);
463
        System.exit(1);
464
      }
465
466
      // Make the database connection
467
      try {
468
        System.out.println("Getting connection to Harvester tables");
469
        conn = DriverManager.getConnection(defaultDB, user, password);
470
471
        // If a SQLWarning object is available, print its warning(s).
472
        // There may be multiple warnings chained.
473
        warn = conn.getWarnings();
474
475
        if (warn != null) {
476
          while (warn != null) {
477
            System.out.println("SQLState: " + warn.getSQLState());
478
            System.out.println("Message:  " + warn.getMessage());
479
            System.out.println("Vendor: " + warn.getErrorCode());
480
            System.out.println("");
481
            warn = warn.getNextWarning();
482
          }
483
        }
484
      }
485
      catch (SQLException e) {
486
        System.out.println("Database access failed " + e);
487
        System.exit(1);
488
      }
489
    }
490
491
    return conn;
492
  }
493
494
495 2022 costa
  /**
496 2031 costa
   * Gets the current value of the detailLogID for storage as a primary key in
497
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
498
   *
499
   * @return  the current value of the detailLogID
500
   */
501 2139 costa
  public int getDetailLogID() {
502 2031 costa
    int currentValue = detailLogID;
503
504
    detailLogID++;
505
    return currentValue;
506
  }
507
508
509
  /**
510
   * Gets the current value of the harvestLogID for storage as a primary key in
511
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
512
   *
513
   * @return  the current value of the detailLogID
514
   */
515 2139 costa
  public int getHarvestLogID() {
516 2031 costa
    int currentValue = harvestLogID;
517
518
    harvestLogID++;
519
    return currentValue;
520
  }
521
522
523
  /**
524
   * Gets the maximum value of an integer field from a table.
525
   *
526
   * @param tableName  the database table name
527
   * @param fieldName  the field name of the integer field in the table
528
   * @return  the maximum integer stored in the fieldName field of tableName
529
   */
530
  private int getMaxValue(String tableName, String fieldName) {
531 2139 costa
    int maxValue = 0;
532 2031 costa
    int fieldValue;
533 2381 costa
    String query = "SELECT " + fieldName + " FROM " + tableName;
534
    Statement stmt;
535 2031 costa
536 2381 costa
	try {
537
      stmt = conn.createStatement();
538
      ResultSet rs = stmt.executeQuery(query);
539 2031 costa
540 2381 costa
      while (rs.next()) {
541
        fieldValue = rs.getInt(fieldName);
542 2031 costa
        maxValue = Math.max(maxValue, fieldValue);
543 2381 costa
      }
544
545
      stmt.close();
546
    }
547 2031 costa
    catch(SQLException ex) {
548 2381 costa
      System.out.println("SQLException: " + ex.getMessage());
549
    }
550 2031 costa
551
    return maxValue;
552
  }
553
554
555
  /**
556
   * Gets the minimum value of an integer field from a table.
557
   *
558
   * @param tableName  the database table name
559
   * @param fieldName  the field name of the integer field in the table
560
   * @return  the minimum integer stored in the fieldName field of tableName
561
   */
562
  private int getMinValue(String tableName, String fieldName) {
563
    int minValue = 0;
564
    int fieldValue;
565 2381 costa
    String query = "SELECT " + fieldName + " FROM " + tableName;
566
    Statement stmt;
567 2031 costa
568 2381 costa
    try {
569
      stmt = conn.createStatement();
570
      ResultSet rs = stmt.executeQuery(query);
571 2031 costa
572 2381 costa
      while (rs.next()) {
573
        fieldValue = rs.getInt(fieldName);
574 2031 costa
575
        if (minValue == 0) {
576
          minValue = fieldValue;
577
        }
578
        else {
579
          minValue = Math.min(minValue, fieldValue);
580
        }
581 2381 costa
      }
582
583
      stmt.close();
584
    }
585 2031 costa
    catch(SQLException ex) {
586 2381 costa
      System.out.println("SQLException: " + ex.getMessage());
587
    }
588
589 2031 costa
    return minValue;
590
  }
591
592
593
  /**
594 2022 costa
   * For every Harvest site schedule in the database, harvest the
595
   * documents for that site if they are due to be harvested.
596
   *
597
   * @throws SAXException
598
   * @throws IOException
599
   * @throws ParserConfigurationException
600
   */
601
  private void harvest() {
602
    HarvestSiteSchedule harvestSiteSchedule;
603
604 2031 costa
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
605
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
606 2384 costa
607
      if (Harvester.schemaLocation != null) {
608
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
609
      }
610
611 2031 costa
      harvestSiteSchedule.harvestDocumentList();
612 2022 costa
    }
613
  }
614
615
616
  /**
617 2031 costa
   * Initializes the detailLogID and harvestLogID values to their current
618
   * maximums + 1.
619 2022 costa
   */
620 2139 costa
  public void initLogIDs() {
621 2031 costa
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
622
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
623
  }
624
625 2062 costa
626 2031 costa
  /**
627 2155 costa
   * Prints the header of the harvest report.
628 2086 costa
   *
629 2155 costa
   * @param out            the PrintStream object to print to
630
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
631
   *                       have a value of 0 if no particular site is involved,
632
   *                       which indicates that the report is being prepared
633
   *                       for the Harvester Administrator rather than for a
634
   *                       particular Site Contact.
635 2086 costa
   */
636 2155 costa
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
637 2086 costa
    HarvestLog harvestLog;
638 2105 costa
    int logSiteScheduleID;
639
    int nErrors = 0;
640
    String phrase;
641 2086 costa
642
    for (int i = 0; i < harvestLogList.size(); i++) {
643
      harvestLog = (HarvestLog) harvestLogList.get(i);
644 2105 costa
      logSiteScheduleID = harvestLog.getSiteScheduleID();
645
646
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
647
        if (harvestLog.isErrorEntry()) {
648
          nErrors++;
649
        }
650
      }
651 2086 costa
    }
652 2105 costa
653
    out.println(marker);
654 2155 costa
    out.println(filler);
655 2108 costa
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
656 2155 costa
    out.println(filler);
657 2105 costa
658
    if (nErrors > 0) {
659
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
660
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
661
      out.println("* Please see the log entries below for additonal details.");
662
    }
663
    else {
664
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
665
    }
666
667 2155 costa
    out.println(filler);
668 2105 costa
    out.println(marker);
669 2155 costa
  }
670
671 2105 costa
672 2155 costa
  /**
673
   * Prints harvest log entries for this harvest run. Entries may be filtered
674
   * for a particular site, or all entries may be printed.
675
   *
676
   * @param out            the PrintStream object to write to
677
   * @param maxCodeLevel   the maximum code level that should be printed,
678
   *                       e.g. "warning". Any log entries higher than this
679
   *                       level will not be printed.
680
   * @param siteScheduleID if greater than 0, indicates that the log
681
   *                       entry should only be printed for a particular site
682
   *                       as identified by its siteScheduleID. if 0, then
683
   *                       print output for all sites.
684
   */
685
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
686
                      ) {
687
    HarvestLog harvestLog;
688
    int logSiteScheduleID;
689
    int nErrors = 0;
690
    String phrase;
691
692
    out.println("");
693
    out.println(marker);
694
    out.println(filler);
695
    out.println("*                       LOG ENTRIES");
696
    out.println(filler);
697
    out.println(marker);
698
699 2105 costa
    for (int i = 0; i < harvestLogList.size(); i++) {
700
      harvestLog = (HarvestLog) harvestLogList.get(i);
701
      logSiteScheduleID = harvestLog.getSiteScheduleID();
702
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
703
        harvestLog.printOutput(out, maxCodeLevel);
704
      }
705
    }
706 2086 costa
  }
707
708
709
  /**
710 2062 costa
   * Prints the site schedule data for a given site.
711
   *
712 2086 costa
   * @param out              the PrintStream to write to
713 2062 costa
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
714 2031 costa
   */
715 2086 costa
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
716 2381 costa
    HarvestSiteSchedule harvestSiteSchedule;
717 2031 costa
718
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
719
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
720
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
721 2086 costa
        harvestSiteSchedule.printOutput(out);
722 2031 costa
      }
723
    }
724
  }
725
726
727
  /**
728
   * Prunes old records from the HARVEST_LOG table. Records are removed if
729
   * their HARVEST_DATE is older than a given number of days, as stored in the
730 2381 costa
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
731
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
732 2031 costa
   */
733
  private void pruneHarvestLog() {
734
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
735
    Date dateLastLog;                    // Prune everything prior to this date
736 2381 costa
    String deleteString;
737
    String deleteStringDetailLog;
738 2031 costa
    long delta;
739
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
740
    int recordsDeleted;
741 2381 costa
    int recordsDeletedDetail = 0;
742 2031 costa
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
743
    String dateString;
744 2381 costa
    ResultSet rs;
745
    String selectString;
746
    Statement stmt;
747 2031 costa
    long timeLastLog = 0;
748 2381 costa
    SQLWarning warn;
749
750 2031 costa
    delta = logPeriod * millisecondsPerDay;
751
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
752 2381 costa
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
753
    deleteStringDetailLog =
754
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
755 2031 costa
    timeLastLog = currentTime - delta;
756
    dateLastLog = new Date(timeLastLog);
757
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
758
    deleteString += dateString;
759 2381 costa
    selectString += dateString;
760 2031 costa
761 2381 costa
    try {
762
      System.out.println(
763
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
764
765
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
766
       * table.
767
       */
768
      stmt = conn.createStatement();
769
      rs = stmt.executeQuery(selectString);
770
      warn = rs.getWarnings();
771
772
      if (warn != null) {
773
        System.out.println("\n---Warning---\n");
774
775
        while (warn != null) {
776
          System.out.println("Message: " + warn.getMessage());
777
          System.out.println("SQLState: " + warn.getSQLState());
778
          System.out.print("Vendor error code: ");
779
          System.out.println(warn.getErrorCode());
780
          System.out.println("");
781
          warn = warn.getNextWarning();
782
        }
783
      }
784
785
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
786
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
787
       * be pruned first because its records have a child relationship to those
788
       * in HARVEST_LOG.
789
       */
790
      while (rs.next()) {
791
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
792
        stmt = conn.createStatement();
793
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog +
794
                                            harvestLogID);
795
        recordsDeletedDetail += recordsDeleted;
796
        stmt.close();
797
      }
798
799
      /* Now prune entries from the HARVEST_LOG table using a single update.
800
       */
801
      stmt = conn.createStatement();
802
      recordsDeleted = stmt.executeUpdate(deleteString);
803
      stmt.close();
804
805
      System.out.println("  " + recordsDeletedDetail +
806
                         " records deleted from HARVEST_DETAIL_LOG");
807
      System.out.println("  " + recordsDeleted +
808
                         " records deleted from HARVEST_LOG");
809
    }
810 2031 costa
    catch (SQLException e) {
811 2381 costa
      System.out.println("SQLException: " + e.getMessage());
812
    }
813 2031 costa
  }
814
815
816
  /**
817 2022 costa
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
818
   * a HarvestSiteSchedule object for each row in the table.
819
   */
820
  private void readHarvestSiteSchedule() {
821
    HarvestSiteSchedule harvestSiteSchedule;
822
    ResultSet rs;
823
    SQLWarning warn;
824
    Statement stmt;
825
826
    String contactEmail;
827
    String dateLastHarvest;
828
    String dateNextHarvest;
829
    String documentListURL;
830
    String ldapDN;
831 2031 costa
    String ldapPwd;
832 2022 costa
    int siteScheduleID;
833
    String unit;
834
    int updateFrequency;
835
836
    try {
837
      // Read the HARVEST_SITE_SCHEDULE table
838
      stmt = conn.createStatement();
839
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
840
      warn = rs.getWarnings();
841
842
      if (warn != null) {
843
        System.out.println("\n---Warning---\n");
844
845
        while (warn != null) {
846
          System.out.println("Message: " + warn.getMessage());
847
          System.out.println("SQLState: " + warn.getSQLState());
848
          System.out.print("Vendor error code: ");
849
          System.out.println(warn.getErrorCode());
850
          System.out.println("");
851
          warn = warn.getNextWarning();
852
        }
853
      }
854
855
      while (rs.next()) {
856
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
857
        documentListURL = rs.getString("DOCUMENTLISTURL");
858
        ldapDN = rs.getString("LDAPDN");
859 2031 costa
        ldapPwd = rs.getString("LDAPPWD");
860 2022 costa
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
861
        dateLastHarvest = rs.getString("DATELASTHARVEST");
862
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
863
        unit = rs.getString("UNIT");
864
        contactEmail = rs.getString("CONTACT_EMAIL");
865
866
        warn = rs.getWarnings();
867
868
        if (warn != null) {
869
          System.out.println("\n---Warning---\n");
870
871
          while (warn != null) {
872
            System.out.println("Message: " + warn.getMessage());
873
            System.out.println("SQLState: " + warn.getSQLState());
874
            System.out.print("Vendor error code: ");
875
            System.out.println(warn.getErrorCode());
876
            System.out.println("");
877
            warn = warn.getNextWarning();
878
          }
879
        }
880
881 2031 costa
        harvestSiteSchedule = new HarvestSiteSchedule(this,
882 2022 costa
                                                      siteScheduleID,
883
                                                      documentListURL,
884
                                                      ldapDN,
885 2031 costa
                                                      ldapPwd,
886 2022 costa
                                                      dateNextHarvest,
887
                                                      dateLastHarvest,
888
                                                      updateFrequency,
889
                                                      unit,
890
                                                      contactEmail
891
                                                     );
892 2031 costa
        harvestSiteScheduleList.add(harvestSiteSchedule);
893 2022 costa
      }
894 2031 costa
895
      rs.close();
896
      stmt.close();
897
    }
898 2022 costa
    catch (SQLException e) {
899
      System.out.println("Database access failed " + e);
900
      System.exit(1);
901
    }
902
903
  }
904
905
906
  /**
907 2155 costa
   * Sends a report to the Harvester Administrator. The report prints each log
908 2086 costa
   * entry pertaining to this harvest run.
909 2105 costa
   *
910
   * @param maxCodeLevel  the maximum code level that should be printed,
911
   *                      e.g. "warning". Any log entries higher than this
912
   *                      level will not be printed.
913 2022 costa
   */
914 2105 costa
  void reportToAdministrator(String maxCodeLevel) {
915 2086 costa
    PrintStream body;
916
    String from = harvesterAdministrator;
917 2330 costa
    String[] fromArray;
918 2086 costa
    MailMessage msg;
919 2105 costa
    int siteScheduleID = 0;
920 2108 costa
    String subject = "Report from Metacat Harvester: " + timestamp;
921 2086 costa
    String to = harvesterAdministrator;
922
923
    if (!to.equals("")) {
924
      System.out.println("Sending report to Harvester Administrator at address "
925
                         + harvesterAdministrator);
926
927
      try {
928
        msg = new MailMessage(smtpServer);
929 2330 costa
930
        if (from.indexOf(',') > 0) {
931
          fromArray = from.split(",");
932
933
          for (int i = 0; i < fromArray.length; i++) {
934
            if (i == 0) {
935
              msg.from(fromArray[i]);
936
            }
937
938
            msg.to(fromArray[i]);
939
          }
940
        }
941
        else if (from.indexOf(';') > 0) {
942
          fromArray = from.split(";");
943
944
          for (int i = 0; i < fromArray.length; i++) {
945
            if (i == 0) {
946
              msg.from(fromArray[i]);
947
            }
948
949
            msg.to(fromArray[i]);
950
          }
951
        }
952
        else {
953
          msg.from(from);
954
          msg.to(to);
955
        }
956
957 2086 costa
        msg.setSubject(subject);
958
        body = msg.getPrintStream();
959 2155 costa
        printHarvestHeader(body, siteScheduleID);
960 2105 costa
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
961 2086 costa
        msg.sendAndClose();
962
      }
963
      catch (IOException e) {
964
        System.out.println("There was a problem sending email to " + to);
965
        System.out.println("IOException: " + e.getMessage());
966
      }
967 2139 costa
    }
968 2022 costa
  }
969 2139 costa
970
971
  /**
972
   * Sets the harvest start time for this harvest run.
973
   *
974
   * @param date
975
   */
976
  public void setHarvestStartTime(Date date) {
977
    harvestStartTime = date;
978
  }
979 2022 costa
980
981
  /**
982
   * Shuts down Harvester. Performs cleanup operations such as logging out
983
   * of Metacat and disconnecting from the database.
984
   */
985
  private void shutdown() {
986 2105 costa
    String maxCodeLevel = "debug";  // Print all log entries from level 1
987
                                    // ("error") to level 5 ("debug")
988
    int siteScheduleID = 0;
989
990 2022 costa
    // Log shutdown operation
991
    System.out.println("Shutting Down Harvester");
992 4175 daigle
    addLogEntry(0, "Shutting Down Harvester", "harvester.HarvesterShutdown", 0, null, "");
993 2031 costa
    pruneHarvestLog();
994 2139 costa
    closeConnection();
995 2105 costa
    // Print log to standard output and then email the Harvester administrator
996
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
997
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
998 2022 costa
  }
999
1000
1001 4125 daigle
    /**
1002
	 * Initializes Harvester at startup. Connects to the database and to Metacat.
1003
	 *
1004
	 * @param nHarvests        the nth harvest
1005
	 * @param maxHarvests      the maximum number of harvests that this process
1006
	 *                         can run
1007
	 */
1008
	private void startup(int nHarvests, int maxHarvests) {
1009
		Boolean ctm;
1010
		Integer lp;
1011
		String metacatURL;
1012
		Date now = new Date();
1013 2062 costa
1014 4125 daigle
		timestamp = now.toString();
1015
		System.out.println(Harvester.marker);
1016
		System.out.print(timestamp + ": Starting Next Harvest");
1017
		if (maxHarvests > 0) {
1018
			System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
1019
		}
1020
		System.out.print("\n");
1021
		try {
1022 4175 daigle
			ctm = Boolean.valueOf(PropertyService.getProperty("harvester.connectToMetacat"));
1023 4125 daigle
			connectToMetacat = ctm.booleanValue();
1024
			harvesterAdministrator = PropertyService
1025 4175 daigle
					.getProperty("harvester.administrator");
1026
			smtpServer = PropertyService.getProperty("harvester.smtpServer");
1027 2062 costa
1028 4175 daigle
			lp = Integer.valueOf(PropertyService.getProperty("harvester.logPeriod"));
1029 4125 daigle
			logPeriod = lp.intValue();
1030
		} catch (NumberFormatException e) {
1031
			System.err.println("NumberFormatException: Error parsing logPeriod "
1032
					+ logPeriod + e.getMessage());
1033
			System.err.println("Defaulting to logPeriod of 90 days");
1034
			logPeriod = 90;
1035
		} catch (PropertyNotFoundException pnfe) {
1036
			System.out.println("PropertyNotFoundException: Error getting property: "
1037
					+ pnfe.getMessage());
1038
			return;
1039
		}
1040 2022 costa
1041 4125 daigle
		conn = getConnection();
1042
		initLogIDs();
1043
		setHarvestStartTime(now);
1044
		// Log startup operation
1045 4175 daigle
		addLogEntry(0, "Starting Up Harvester", "harvester.HarvesterStartup", 0, null, "");
1046 4125 daigle
1047
		if (connectToMetacat()) {
1048
			try {
1049
				metacatURL = SystemUtil.getServletURL();
1050
				System.out.println("Connecting to Metacat: " + metacatURL);
1051
				metacat = MetacatFactory.createMetacatConnection(metacatURL);
1052
			} catch (MetacatInaccessibleException e) {
1053
				System.out.println("Metacat connection failed." + e.getMessage());
1054
			} catch (Exception e) {
1055
				System.out.println("Metacat connection failed." + e.getMessage());
1056
			}
1057
		}
1058
	}
1059
1060 2022 costa
}