Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: daigle $'
7
 *     '$Date: 2009-08-24 14:34:17 -0700 (Mon, 24 Aug 2009) $'
8
 * '$Revision: 5030 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.IOException;
29
import java.io.PrintStream;
30
import java.sql.Connection;
31
import java.sql.DriverManager;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.sql.SQLWarning;
35
import java.sql.Statement;
36
import java.util.ArrayList;
37
import java.text.SimpleDateFormat;
38
import java.util.Date;
39

    
40
import org.apache.log4j.Logger;
41
import org.apache.log4j.PropertyConfigurator;
42

    
43
import edu.ucsb.nceas.metacat.client.Metacat;
44
import edu.ucsb.nceas.metacat.client.MetacatFactory;
45
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
46
import edu.ucsb.nceas.metacat.properties.PropertyService;
47
import edu.ucsb.nceas.metacat.shared.ServiceException;
48
import edu.ucsb.nceas.metacat.util.SystemUtil;
49
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
50

    
51
/**
52
 * Harvester is the main class for the Harvester application. The main
53
 * method creates a single Harvester object which drives the application.
54
 * 
55
 * @author    costa
56
 * 
57
 */
58
public class Harvester {
59

    
60
  /*
61
   * Class fields
62
   */
63
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
64
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
65
//  private static final String CONFIG_NAME = "metacat.properties";
66
  private static final String LOG_CONFIG_NAME = "../../build/war/WEB-INF/log4j.properties";
67
  public static final String filler = "*";
68
  private static boolean keepRunning = true;
69
  public static final String marker =
70
"*****************************************************************************";
71
//  public static PropertyService propertyService = null;
72
  private static String schemaLocation = null;
73
   
74

    
75
  /* 
76
   * Class methods
77
   */
78
   
79

    
80
  /**
81
   * Constructor. Creates a new instance of Harvester.
82
   */
83
  public Harvester() {
84
  }
85
    
86

    
87
  /**
88
   * Loads Harvester options from a configuration file.
89
   */
90
  public static void loadProperties(boolean commandLineMode, boolean test) {
91
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
92

    
93
    try {
94
      if (commandLineMode) {
95
        PropertyService.getInstance(configDir);
96
      }
97
      else {
98
        PropertyService.getInstance();
99
      }
100
    } 
101
    catch (ServiceException e) {
102
      System.out.println("Error in loading properties: " + e.getMessage());
103
    }
104
  }
105
  
106
  
107
    /**
108
	 * Harvester main method.
109
	 * 
110
	 * @param args               the command line arguments
111
	 * 
112
	 *   args[0] if "false", then this is not command-line mode,
113
	 *           Command-line mode is true by default.
114
	 *           
115
	 *   args[1] if present, represents the path to the harvest list schema file.
116
	 *           Specifying it overrides the default path to the schema file.
117
	 *   
118
	 * @throws SAXException
119
	 * @throws IOException
120
	 * @throws ParserConfigurationException
121
	 */
122
	public static void main(String[] args) {
123
	    boolean commandLineMode = true;
124
	    boolean test = false;   // set to true for JUnit testing
125

    
126
	    Integer delayDefault = new Integer(0); // Default number of hours delay
127
		int delay = delayDefault.intValue(); // Delay in hours before first
128
												// harvest
129
		Integer d; // Used for determining delay
130
		long delta; // endTime - startTime
131
		long endTime; // time that a harvest completes
132
		Harvester harvester; // object for a single harvest run
133
		Integer maxHarvestsDefault = new Integer(0); // Default max harvests
134
		int maxHarvests = maxHarvestsDefault.intValue(); // Max number of
135
															// harvests
136
		Integer mh; // used in determining max harvests
137
		int nHarvests = 0; // counts the number of harvest runs
138
		final long oneHour = (60 * 60 * 1000); // milliseconds in one hour
139
		Integer periodDefault = new Integer(24); // Default hours between
140
													// harvests
141
		int period = periodDefault.intValue(); // Hours between harvests
142
		Integer p; // Used in determining the period
143
		long startTime; // time that a harvest run starts
144
		
145
		if ((args.length > 0) && (args[0] != null)) {
146
		  if (args[0].equals("false")) {
147
		    commandLineMode = false;
148
		  }
149
		  else {
150
		    // If commandLineMode is true, initialize log4j properties
151
	        PropertyConfigurator.configureAndWatch(LOG_CONFIG_NAME);
152
		  }
153
		}
154

    
155
		/*
156
		 * If there is a second argument, it is the schemaLocation value
157
		 */
158
		if (args.length > 1) {
159
			schemaLocation = args[1];
160
			System.err.println("schemaLocation: " + schemaLocation);
161

    
162
			try {
163
				Thread.sleep(10000);
164
			} catch (InterruptedException e) {
165
				e.printStackTrace();
166
			}
167
		}
168

    
169
		System.out.println(marker);
170
		System.out.println("Starting Harvester");
171
		Harvester.loadProperties(commandLineMode, test);
172

    
173
		// Parse the delay property. Use default if necessary.
174
		try {
175
			d = Integer.valueOf(PropertyService.getProperty("harvester.delay"));
176
			delay = d.intValue();
177
		} catch (NumberFormatException e) {
178
			System.out.println("NumberFormatException: Error parsing delay: "
179
					+ e.getMessage());
180
			System.out.println("Defaulting to delay=" + delayDefault);
181
			delay = delayDefault.intValue();
182
		} catch (PropertyNotFoundException pnfe) {
183
			System.out.println("PropertyNotFoundException: Error finding delay: "
184
					+ pnfe.getMessage());
185
			System.out.println("Defaulting to delay=" + delayDefault);
186
			delay = delayDefault.intValue();
187
		}
188

    
189
		// Parse the maxHarvests property. Use default if necessary.
190
		try {
191
			mh = Integer.valueOf(PropertyService.getProperty("harvester.maxHarvests"));
192
			maxHarvests = mh.intValue();
193
		} catch (NumberFormatException e) {
194
			System.out.println("NumberFormatException: Error parsing maxHarvests: "
195
					+ e.getMessage());
196
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
197
			maxHarvests = maxHarvestsDefault.intValue();
198
		} catch (PropertyNotFoundException pnfe) {
199
			System.out.println("PropertyNotFoundException: Error finding maxHarvests: "
200
					+ pnfe.getMessage());
201
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
202
			maxHarvests = maxHarvestsDefault.intValue();
203
		}
204

    
205
		// Parse the period property. Use default if necessary.
206
		try {
207
			p = Integer.valueOf(PropertyService.getProperty("harvester.period"));
208
			period = p.intValue();
209
		} catch (NumberFormatException e) {
210
			System.out.println("NumberFormatException: Error parsing period: "
211
					+ e.getMessage());
212
			System.out.println("Defaulting to period=" + periodDefault);
213
			period = periodDefault.intValue();
214
		} catch (PropertyNotFoundException pnfe) {
215
			System.out.println("PropertyNotFoundException: Error finding period: "
216
					+ pnfe.getMessage());
217
			System.out.println("Defaulting to period=" + periodDefault);
218
			period = periodDefault.intValue();
219
		}
220

    
221
		// Sleep for delay number of hours prior to starting first harvest
222
		if (delay > 0) {
223
			try {
224
				System.out.print("First harvest will begin in " + delay);
225
				if (delay == 1) {
226
					System.out.println(" hour.");
227
				} else {
228
					System.out.println(" hours.");
229
				}
230
				Thread.sleep(delay * oneHour);
231
			} catch (InterruptedException e) {
232
				System.err.println("InterruptedException: " + e.getMessage());
233
				System.exit(1);
234
			}
235
		}
236

    
237
    // Repeat a new harvest once every period number of hours, until we reach
238
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
239
    // Subtract delta from the time period so
240
    // that each harvest will start at a fixed interval.
241
    //
242
    while (keepRunning && ((nHarvests < maxHarvests) || (maxHarvests <= 0))) {
243
      nHarvests++;
244
      startTime = System.currentTimeMillis();
245
      harvester = new Harvester();                // New object for this
246
													// harvest
247
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
248
      harvester.readHarvestSiteSchedule();        // Read the database table
249
      harvester.harvest();                        // Harvest the documents
250
      harvester.shutdown();                       // Shut down Harvester
251
      endTime = System.currentTimeMillis();
252
      delta = endTime - startTime;
253

    
254
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
255
        try {
256
          System.out.println("Next harvest will begin in " + 
257
                             period + " hours.");
258
          Thread.sleep((period * oneHour) - delta);
259
        }
260
        catch (InterruptedException e) {
261
          System.err.println("InterruptedException: " + e.getMessage());
262
          System.exit(1);
263
        }
264
      }
265
    }
266
  }
267
  
268
  
269
  /**
270
   * Set the keepRunning flag. If set to false, the main program will end
271
   * the while loop that keeps harvester running every period number of hours.
272
   * The static method is intended to be called from the HarvesterServlet class
273
   * which creates a thread to run Harvester. When the thread is destroyed, the
274
   * thread's destroy() method calls Harvester.setKeepRunning(false).
275
   * 
276
   * @param keepRunning
277
   */
278
  static void setKeepRunning(boolean keepRunning) {
279
    Harvester.keepRunning = keepRunning;
280
  }
281

    
282
  
283
  /*
284
   * Object fields
285
   */
286

    
287
  /** Database connection */
288
  private Connection conn = null;
289
  
290
  /** Used during development to determine whether to connect to metacat 
291
   *  Sometimes it's useful to test parts of the code without actually
292
   *  connecting to Metacat.
293
   */
294
  private boolean connectToMetacat;
295

    
296
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
297
  private int detailLogID;
298
  
299
  /** Email address of the Harvester Administrator */
300
  String harvesterAdministrator;
301
  
302
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
303
  private int harvestLogID;
304
  
305
  /** End time of this harvest session */
306
  private Date harvestEndTime;
307
  
308
  /** List of HarvestLog objects. Stores log entries for report generation. */
309
  private ArrayList harvestLogList = new ArrayList();
310
  
311
  /** List of HarvestSiteSchedule objects */
312
  private ArrayList harvestSiteScheduleList = new ArrayList();
313
  
314
  /** Start time of this harvest session */
315
  private Date harvestStartTime;
316
  
317
  /** Number of days to save log records. Any that are older are purged. */
318
  int logPeriod;
319
  
320
  /** Metacat client object */
321
  Metacat metacat;
322
  
323
  /** SMTP server for sending mail messages */
324
  String smtpServer;
325
  
326
  /** The timestamp for this harvest run. Used for output only. */
327
  String timestamp;
328
  
329

    
330
  /*
331
   * Object methods
332
   */
333
   
334
  /**
335
   * Creates a new HarvestLog object and adds it to the harvestLogList.
336
   * 
337
   * @param  status          the status of the harvest operation
338
   * @param  message         the message text of the harvest operation
339
   * @param  harvestOperationCode  the harvest operation code
340
   * @param  siteScheduleID  the siteScheduleID for which this operation was
341
   *                         performed. 0 indicates that the operation did not
342
   *                         involve a particular harvest site.
343
   * @param  harvestDocument the associated HarvestDocument object. May be null.
344
   * @param  errorMessage    additional error message pertaining to document
345
   *                         error.
346
   */
347
  void addLogEntry(int    status,
348
                   String message,
349
                   String harvestOperationCode,
350
                   int    siteScheduleID,
351
                   HarvestDocument harvestDocument,
352
                   String errorMessage
353
                  ) {
354
    HarvestLog harvestLog;
355
    int harvestLogID = getHarvestLogID();
356
    int detailLogID;
357

    
358
    /* If there is no associated harvest document, call the basic constructor;
359
     * else call the extended constructor.
360
     */
361
    if (harvestDocument == null) {    
362
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
363
                                  status, message, harvestOperationCode, 
364
                                  siteScheduleID);
365
    }
366
    else {
367
      detailLogID = getDetailLogID();
368
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
369
                                  harvestStartTime, status, message,
370
                                  harvestOperationCode, siteScheduleID,
371
                                  harvestDocument, errorMessage);
372
    }
373
    
374
    harvestLogList.add(harvestLog);
375
  }
376
  
377
  
378
  public void closeConnection() {
379
    try {
380
      // Close the database connection
381
      System.out.println("Closing the database connection.");
382
      conn.close();
383
    }
384
    catch (SQLException e) {
385
      System.out.println("Database access failed " + e);
386
    }    
387
  }
388

    
389

    
390
  /**
391
   * Determines whether Harvester should attempt to connect to Metacat.
392
   * Used during development and testing.
393
   * 
394
   * @return     true if Harvester should connect, otherwise false
395
   */
396
  boolean connectToMetacat () {
397
    return connectToMetacat;
398
  }
399
  
400

    
401
  /**
402
   * Normalizes text prior to insertion into the HARVEST_LOG or
403
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
404
   * character with the double quote character. This prevents SQL errors
405
   * involving words that contain single quotes. Also removes \n and \r
406
   * characters from the text.
407
   * 
408
   * @param text  the original string
409
   * @return      a string containing the normalized text
410
   */
411
  public String dequoteText(String text) {
412
    char c;
413
    StringBuffer stringBuffer = new StringBuffer();
414
    
415
    for (int i = 0; i < text.length(); i++) {
416
      c = text.charAt(i);
417
      switch (c) {
418
        case '\'':
419
          stringBuffer.append('\"');
420
          break;
421
        case '\r':
422
        case '\n':
423
          break;
424
        default:
425
          stringBuffer.append(c);
426
          break;
427
      }
428
    }
429
    
430
    return stringBuffer.toString();
431
  }
432
  
433
  /**
434
   * Returns a connection to the database. Opens the connection if a connection
435
   * has not already been made previously.
436
   * 
437
   * @return  conn  the database Connection object
438
   */
439
  public Connection getConnection() {
440
    String dbDriver = "";
441
    String defaultDB = null;
442
    String password = null;
443
    String user = null;
444
    SQLWarning warn;
445
    
446
    if (conn == null) {
447
    	try {
448
			dbDriver = PropertyService.getProperty("database.driver");
449
			defaultDB = PropertyService.getProperty("database.connectionURI");
450
			password = PropertyService.getProperty("database.password");
451
			user = PropertyService.getProperty("database.user");
452
		} catch (PropertyNotFoundException pnfe) {
453
			System.out.println("Can't find property " + pnfe);
454
	        System.exit(1);
455
		}
456

    
457
      // Load the jdbc driver
458
      try {
459
        Class.forName(dbDriver);
460
      }
461
      catch (ClassNotFoundException e) {
462
        System.out.println("Can't load driver " + e);
463
        System.exit(1);
464
      } 
465

    
466
      // Make the database connection
467
      try {
468
        System.out.println("Getting connection to Harvester tables");
469
        conn = DriverManager.getConnection(defaultDB, user, password);
470

    
471
        // If a SQLWarning object is available, print its warning(s).
472
        // There may be multiple warnings chained.
473
        warn = conn.getWarnings();
474
      
475
        if (warn != null) {
476
          while (warn != null) {
477
            System.out.println("SQLState: " + warn.getSQLState());
478
            System.out.println("Message:  " + warn.getMessage());
479
            System.out.println("Vendor: " + warn.getErrorCode());
480
            System.out.println("");
481
            warn = warn.getNextWarning();
482
          }
483
        }
484
      }
485
      catch (SQLException e) {
486
        System.out.println("Database access failed " + e);
487
        System.exit(1);
488
      }
489
    }
490
    
491
    return conn;
492
  }
493

    
494

    
495
  /**
496
   * Gets the current value of the detailLogID for storage as a primary key in
497
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
498
   * 
499
   * @return  the current value of the detailLogID
500
   */
501
  public int getDetailLogID() {
502
    int currentValue = detailLogID;
503
    
504
    detailLogID++;
505
    return currentValue;
506
  }
507
  
508
  
509
  /**
510
   * Gets the current value of the harvestLogID for storage as a primary key in
511
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
512
   * 
513
   * @return  the current value of the detailLogID
514
   */
515
  public int getHarvestLogID() {
516
    int currentValue = harvestLogID;
517
    
518
    harvestLogID++;
519
    return currentValue;
520
  }
521
  
522

    
523
  /** 
524
   * Gets the maximum value of an integer field from a table.
525
   * 
526
   * @param tableName  the database table name
527
   * @param fieldName  the field name of the integer field in the table
528
   * @return  the maximum integer stored in the fieldName field of tableName
529
   */
530
  private int getMaxValue(String tableName, String fieldName) {
531
    int maxValue = 0;
532
    int fieldValue;
533
    String query = "SELECT " + fieldName + " FROM " + tableName;
534
    Statement stmt;
535
    
536
	try {
537
      stmt = conn.createStatement();
538
      ResultSet rs = stmt.executeQuery(query);
539
	
540
      while (rs.next()) {
541
        fieldValue = rs.getInt(fieldName);
542
        maxValue = Math.max(maxValue, fieldValue);
543
      }
544
      
545
      stmt.close();
546
    } 
547
    catch(SQLException ex) {
548
      System.out.println("SQLException: " + ex.getMessage());
549
    }
550
    
551
    return maxValue;
552
  }
553
  
554
  
555
  /** 
556
   * Gets the minimum value of an integer field from a table.
557
   * 
558
   * @param tableName  the database table name
559
   * @param fieldName  the field name of the integer field in the table
560
   * @return  the minimum integer stored in the fieldName field of tableName
561
   */
562
  private int getMinValue(String tableName, String fieldName) {
563
    int minValue = 0;
564
    int fieldValue;
565
    String query = "SELECT " + fieldName + " FROM " + tableName;
566
    Statement stmt;
567
    
568
    try {
569
      stmt = conn.createStatement();
570
      ResultSet rs = stmt.executeQuery(query);
571
	
572
      while (rs.next()) {
573
        fieldValue = rs.getInt(fieldName);
574

    
575
        if (minValue == 0) {
576
          minValue = fieldValue;
577
        }
578
        else {
579
          minValue = Math.min(minValue, fieldValue);
580
        }
581
      }
582
      
583
      stmt.close();
584
    } 
585
    catch(SQLException ex) {
586
      System.out.println("SQLException: " + ex.getMessage());
587
    }
588

    
589
    return minValue;
590
  }
591
  
592
  
593
  /**
594
   * For every Harvest site schedule in the database, harvest the
595
   * documents for that site if they are due to be harvested.
596
   * 
597
   * @throws SAXException
598
   * @throws IOException
599
   * @throws ParserConfigurationException
600
   */
601
  private void harvest() {
602
    HarvestSiteSchedule harvestSiteSchedule;
603

    
604
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
605
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
606
      
607
      if (Harvester.schemaLocation != null) {
608
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
609
      }
610
      
611
      harvestSiteSchedule.harvestDocumentList();
612
    }
613
  }
614
  
615
  
616
  /**
617
   * Initializes the detailLogID and harvestLogID values to their current
618
   * maximums + 1.
619
   */
620
  public void initLogIDs() {
621
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
622
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
623
  }
624
  
625

    
626
  /**
627
   * Prints the header of the harvest report.
628
   * 
629
   * @param out            the PrintStream object to print to
630
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
631
   *                       have a value of 0 if no particular site is involved,
632
   *                       which indicates that the report is being prepared
633
   *                       for the Harvester Administrator rather than for a
634
   *                       particular Site Contact.
635
   */
636
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
637
    HarvestLog harvestLog;
638
    int logSiteScheduleID;
639
    int nErrors = 0;
640
    String phrase;
641
    
642
    for (int i = 0; i < harvestLogList.size(); i++) {
643
      harvestLog = (HarvestLog) harvestLogList.get(i);
644
      logSiteScheduleID = harvestLog.getSiteScheduleID();
645
      
646
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
647
        if (harvestLog.isErrorEntry()) {
648
          nErrors++;
649
        }
650
      }      
651
    }
652

    
653
    out.println(marker);
654
    out.println(filler);
655
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
656
    out.println(filler);
657

    
658
    if (nErrors > 0) {
659
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
660
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
661
      out.println("* Please see the log entries below for additonal details.");
662
    }
663
    else {
664
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
665
    }
666
    
667
    out.println(filler);
668
    out.println(marker);
669
  }
670
    
671

    
672
  /**
673
   * Prints harvest log entries for this harvest run. Entries may be filtered
674
   * for a particular site, or all entries may be printed.
675
   * 
676
   * @param out            the PrintStream object to write to
677
   * @param maxCodeLevel   the maximum code level that should be printed,
678
   *                       e.g. "warning". Any log entries higher than this
679
   *                       level will not be printed.
680
   * @param siteScheduleID if greater than 0, indicates that the log
681
   *                       entry should only be printed for a particular site
682
   *                       as identified by its siteScheduleID. if 0, then
683
   *                       print output for all sites.
684
   */
685
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
686
                      ) {
687
    HarvestLog harvestLog;
688
    int logSiteScheduleID;
689
    int nErrors = 0;
690
    String phrase;
691
    
692
    out.println("");
693
    out.println(marker);
694
    out.println(filler);
695
    out.println("*                       LOG ENTRIES");
696
    out.println(filler);
697
    out.println(marker);
698

    
699
    for (int i = 0; i < harvestLogList.size(); i++) {
700
      harvestLog = (HarvestLog) harvestLogList.get(i);
701
      logSiteScheduleID = harvestLog.getSiteScheduleID();
702
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
703
        harvestLog.printOutput(out, maxCodeLevel);
704
      }
705
    }
706
  }
707
    
708

    
709
  /**
710
   * Prints the site schedule data for a given site.
711
   * 
712
   * @param out              the PrintStream to write to
713
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
714
   */
715
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
716
    HarvestSiteSchedule harvestSiteSchedule;
717

    
718
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
719
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
720
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
721
        harvestSiteSchedule.printOutput(out);
722
      }
723
    }
724
  }
725
  
726

    
727
  /**
728
   * Prunes old records from the HARVEST_LOG table. Records are removed if
729
   * their HARVEST_DATE is older than a given number of days, as stored in the
730
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
731
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
732
   */
733
  private void pruneHarvestLog() {
734
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
735
    Date dateLastLog;                    // Prune everything prior to this date
736
    String deleteString;
737
    String deleteStringDetailLog;
738
    long delta;
739
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
740
    int recordsDeleted;
741
    int recordsDeletedDetail = 0;
742
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
743
    String dateString;
744
    ResultSet rs;
745
    String selectString;
746
    Statement stmt;
747
    long timeLastLog = 0;
748
    SQLWarning warn;
749
     
750
    delta = logPeriod * millisecondsPerDay;
751
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
752
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
753
    deleteStringDetailLog = 
754
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
755
    timeLastLog = currentTime - delta;
756
    dateLastLog = new Date(timeLastLog);
757
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
758
    deleteString += dateString;
759
    selectString += dateString;
760

    
761
    try {
762
      System.out.println(
763
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
764

    
765
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
766
       * table.
767
       */
768
      stmt = conn.createStatement();                            
769
      rs = stmt.executeQuery(selectString);
770
      warn = rs.getWarnings();
771

    
772
      if (warn != null) {
773
        System.out.println("\n---Warning---\n");
774

    
775
        while (warn != null) {
776
          System.out.println("Message: " + warn.getMessage());
777
          System.out.println("SQLState: " + warn.getSQLState());
778
          System.out.print("Vendor error code: ");
779
          System.out.println(warn.getErrorCode());
780
          System.out.println("");
781
          warn = warn.getNextWarning();
782
        }
783
      } 
784

    
785
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
786
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
787
       * be pruned first because its records have a child relationship to those
788
       * in HARVEST_LOG.
789
       */
790
      while (rs.next()) {
791
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
792
        stmt = conn.createStatement();                            
793
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
794
                                            harvestLogID);
795
        recordsDeletedDetail += recordsDeleted;
796
        stmt.close();
797
      }
798
 
799
      /* Now prune entries from the HARVEST_LOG table using a single update.
800
       */
801
      stmt = conn.createStatement();                            
802
      recordsDeleted = stmt.executeUpdate(deleteString);
803
      stmt.close();
804

    
805
      System.out.println("  " + recordsDeletedDetail + 
806
                         " records deleted from HARVEST_DETAIL_LOG");
807
      System.out.println("  " + recordsDeleted + 
808
                         " records deleted from HARVEST_LOG");
809
    }
810
    catch (SQLException e) {
811
      System.out.println("SQLException: " + e.getMessage());
812
    }
813
  }
814
    
815

    
816
  /**
817
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
818
   * a HarvestSiteSchedule object for each row in the table.
819
   */
820
  private void readHarvestSiteSchedule() {
821
    HarvestSiteSchedule harvestSiteSchedule;
822
    ResultSet rs;
823
    SQLWarning warn;
824
    Statement stmt;
825

    
826
    String contactEmail;
827
    String dateLastHarvest;
828
    String dateNextHarvest;
829
    String documentListURL;
830
    String ldapDN;
831
    String ldapPwd;
832
    int siteScheduleID;
833
    String unit;
834
    int updateFrequency;
835
        
836
    try {
837
      // Read the HARVEST_SITE_SCHEDULE table
838
      stmt = conn.createStatement();
839
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
840
      warn = rs.getWarnings();
841

    
842
      if (warn != null) {
843
        System.out.println("\n---Warning---\n");
844

    
845
        while (warn != null) {
846
          System.out.println("Message: " + warn.getMessage());
847
          System.out.println("SQLState: " + warn.getSQLState());
848
          System.out.print("Vendor error code: ");
849
          System.out.println(warn.getErrorCode());
850
          System.out.println("");
851
          warn = warn.getNextWarning();
852
        }
853
      }
854
     
855
      while (rs.next()) {
856
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
857
        documentListURL = rs.getString("DOCUMENTLISTURL");
858
        ldapDN = rs.getString("LDAPDN");
859
        ldapPwd = rs.getString("LDAPPWD");
860
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
861
        dateLastHarvest = rs.getString("DATELASTHARVEST");
862
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
863
        unit = rs.getString("UNIT");
864
        contactEmail = rs.getString("CONTACT_EMAIL");
865
        
866
        warn = rs.getWarnings();
867

    
868
        if (warn != null) {
869
          System.out.println("\n---Warning---\n");
870
      
871
          while (warn != null) {
872
            System.out.println("Message: " + warn.getMessage());
873
            System.out.println("SQLState: " + warn.getSQLState());
874
            System.out.print("Vendor error code: ");
875
            System.out.println(warn.getErrorCode());
876
            System.out.println("");
877
            warn = warn.getNextWarning();
878
          }
879
        }
880
      
881
        harvestSiteSchedule = new HarvestSiteSchedule(this,
882
                                                      siteScheduleID,
883
                                                      documentListURL,
884
                                                      ldapDN,
885
                                                      ldapPwd,
886
                                                      dateNextHarvest,
887
                                                      dateLastHarvest,
888
                                                      updateFrequency,
889
                                                      unit,
890
                                                      contactEmail
891
                                                     );
892
        harvestSiteScheduleList.add(harvestSiteSchedule);
893
      }
894
      
895
      rs.close();
896
      stmt.close();
897
    }
898
    catch (SQLException e) {
899
      System.out.println("Database access failed " + e);
900
      System.exit(1);
901
    }
902
    
903
  }
904
    
905

    
906
  /**
907
   * Sends a report to the Harvester Administrator. The report prints each log
908
   * entry pertaining to this harvest run.
909
   *
910
   * @param maxCodeLevel  the maximum code level that should be printed,
911
   *                      e.g. "warning". Any log entries higher than this
912
   *                      level will not be printed.
913
   */
914
  void reportToAdministrator(String maxCodeLevel) {
915
    PrintStream body;
916
    String from = harvesterAdministrator;
917
    String[] fromArray;
918
    MailMessage msg;
919
    int siteScheduleID = 0;
920
    String subject = "Report from Metacat Harvester: " + timestamp;
921
    String to = harvesterAdministrator;
922
    
923
    if (!to.equals("")) {
924
      System.out.println("Sending report to Harvester Administrator at address "
925
                         + harvesterAdministrator);
926
      
927
      try {
928
        msg = new MailMessage(smtpServer);
929

    
930
        if (from.indexOf(',') > 0) {
931
          fromArray = from.split(",");
932
          
933
          for (int i = 0; i < fromArray.length; i++) {
934
            if (i == 0) {
935
              msg.from(fromArray[i]);
936
            }
937
            
938
            msg.to(fromArray[i]);            
939
          }
940
        }
941
        else if (from.indexOf(';') > 0) {
942
          fromArray = from.split(";");
943

    
944
          for (int i = 0; i < fromArray.length; i++) {
945
            if (i == 0) {
946
              msg.from(fromArray[i]);
947
            }
948
            
949
            msg.to(fromArray[i]);            
950
          }
951
        }
952
        else {
953
          msg.from(from);
954
          msg.to(to);
955
        }
956
        
957
        msg.setSubject(subject);
958
        body = msg.getPrintStream();
959
        printHarvestHeader(body, siteScheduleID);
960
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
961
        msg.sendAndClose();
962
      }
963
      catch (IOException e) {
964
        System.out.println("There was a problem sending email to " + to);
965
        System.out.println("IOException: " + e.getMessage());
966
      }
967
    }
968
  }
969
  
970

    
971
  /**
972
   * Sets the harvest start time for this harvest run.
973
   * 
974
   * @param date
975
   */
976
  public void setHarvestStartTime(Date date) {
977
    harvestStartTime = date;
978
  }
979
    
980

    
981
  /**
982
   * Shuts down Harvester. Performs cleanup operations such as logging out
983
   * of Metacat and disconnecting from the database.
984
   */
985
  private void shutdown() {
986
    String maxCodeLevel = "debug";  // Print all log entries from level 1
987
                                    // ("error") to level 5 ("debug")
988
    int siteScheduleID = 0;
989

    
990
    // Log shutdown operation
991
    System.out.println("Shutting Down Harvester");
992
    addLogEntry(0, "Shutting Down Harvester", "harvester.HarvesterShutdown", 0, null, "");
993
    pruneHarvestLog();
994
    closeConnection();
995
    // Print log to standard output and then email the Harvester administrator
996
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
997
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
998
  }
999
    
1000

    
1001
    /**
1002
	 * Initializes Harvester at startup. Connects to the database and to Metacat.
1003
	 * 
1004
	 * @param nHarvests        the nth harvest
1005
	 * @param maxHarvests      the maximum number of harvests that this process
1006
	 *                         can run
1007
	 */
1008
	private void startup(int nHarvests, int maxHarvests) {
1009
		Boolean ctm;
1010
		Integer lp;
1011
		String metacatURL;
1012
		Date now = new Date();
1013

    
1014
		timestamp = now.toString();
1015
		System.out.println(Harvester.marker);
1016
		System.out.print(timestamp + ": Starting Next Harvest");
1017
		if (maxHarvests > 0) {
1018
			System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
1019
		}
1020
		System.out.print("\n");
1021
		try {
1022
			ctm = Boolean.valueOf(PropertyService.getProperty("harvester.connectToMetacat"));
1023
			connectToMetacat = ctm.booleanValue();
1024
			harvesterAdministrator = PropertyService
1025
					.getProperty("harvester.administrator");
1026
			smtpServer = PropertyService.getProperty("harvester.smtpServer");
1027

    
1028
			lp = Integer.valueOf(PropertyService.getProperty("harvester.logPeriod"));
1029
			logPeriod = lp.intValue();
1030
		} catch (NumberFormatException e) {
1031
			System.err.println("NumberFormatException: Error parsing logPeriod "
1032
					+ logPeriod + e.getMessage());
1033
			System.err.println("Defaulting to logPeriod of 90 days");
1034
			logPeriod = 90;
1035
		} catch (PropertyNotFoundException pnfe) {
1036
			System.out.println("PropertyNotFoundException: Error getting property: "
1037
					+ pnfe.getMessage());
1038
			return;
1039
		}
1040

    
1041
		conn = getConnection();
1042
		initLogIDs();
1043
		setHarvestStartTime(now);
1044
		// Log startup operation
1045
		addLogEntry(0, "Starting Up Harvester", "harvester.HarvesterStartup", 0, null, "");
1046

    
1047
		if (connectToMetacat()) {
1048
			try {
1049
				metacatURL = SystemUtil.getServletURL();
1050
				System.out.println("Connecting to Metacat: " + metacatURL);
1051
				metacat = MetacatFactory.createMetacatConnection(metacatURL);
1052
			} catch (MetacatInaccessibleException e) {
1053
				System.out.println("Metacat connection failed." + e.getMessage());
1054
			} catch (Exception e) {
1055
				System.out.println("Metacat connection failed." + e.getMessage());
1056
			}
1057
		}
1058
	}
1059

    
1060
}
(6-6/11)