Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: leinfelder $'
7
 *     '$Date: 2013-07-17 11:35:13 -0700 (Wed, 17 Jul 2013) $'
8
 * '$Revision: 8018 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.IOException;
29
import java.io.PrintStream;
30
import java.sql.Connection;
31
import java.sql.DriverManager;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.sql.SQLWarning;
35
import java.sql.Statement;
36
import java.util.ArrayList;
37
import java.text.SimpleDateFormat;
38
import java.util.Date;
39

    
40
import edu.ucsb.nceas.metacat.client.Metacat;
41
import edu.ucsb.nceas.metacat.client.MetacatFactory;
42
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
43
import edu.ucsb.nceas.metacat.properties.PropertyService;
44
import edu.ucsb.nceas.metacat.util.SystemUtil;
45
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
46

    
47
/**
48
 * Harvester is the main class for the Harvester application. The main
49
 * method creates a single Harvester object which drives the application.
50
 * 
51
 * @author    costa
52
 * 
53
 */
54
public class Harvester {
55

    
56
  /*
57
   * Class fields
58
   */
59

    
60
  public static final String filler = "*";
61
  private static boolean keepRunning = true;
62
  public static final String marker =
63
"*****************************************************************************";
64
//  public static PropertyService propertyService = null;
65
  private static String schemaLocation = null;
66
   
67

    
68
  /* 
69
   * Class methods
70
   */
71
   
72

    
73
  /**
74
   * Constructor. Creates a new instance of Harvester.
75
   */
76
  public Harvester() {
77
  }
78
    
79

    
80
  /**
81
   * Loads Harvester options from a configuration file.
82
   */
83
  public static void loadProperties(String metacatContextDir) {
84

    
85
    try {
86
    	PropertyService.getInstance(metacatContextDir + "/WEB-INF");
87
    } 
88
    catch (Exception e) {
89
      System.out.println("Error in loading properties: " + e.getMessage());
90
      System.exit(1);
91
    }
92
  }
93
  
94
  
95
    /**
96
	 * Harvester main method.
97
	 * 
98
	 * @param args               the command line arguments
99
	 * 
100
	 *   args[0] if "false", then this is not command-line mode,
101
	 *           Command-line mode is true by default.
102
	 *           
103
	 *   args[1] if present, represents the path to the harvest list schema file.
104
	 *           Specifying it overrides the default path to the schema file.
105
	 *   
106
	 * @throws SAXException
107
	 * @throws IOException
108
	 * @throws ParserConfigurationException
109
	 */
110
	public static void main(String[] args) {
111

    
112
	    Integer delayDefault = new Integer(0); // Default number of hours delay
113
		int delay = delayDefault.intValue(); // Delay in hours before first
114
												// harvest
115
		Integer d; // Used for determining delay
116
		long delta; // endTime - startTime
117
		long endTime; // time that a harvest completes
118
		Harvester harvester; // object for a single harvest run
119
		Integer maxHarvestsDefault = new Integer(0); // Default max harvests
120
		int maxHarvests = maxHarvestsDefault.intValue(); // Max number of
121
															// harvests
122
		Integer mh; // used in determining max harvests
123
		int nHarvests = 0; // counts the number of harvest runs
124
		final long oneHour = (60 * 60 * 1000); // milliseconds in one hour
125
		Integer periodDefault = new Integer(24); // Default hours between
126
													// harvests
127
		int period = periodDefault.intValue(); // Hours between harvests
128
		Integer p; // Used in determining the period
129
		long startTime; // time that a harvest run starts
130
		
131
		String metacatContextDir = null;
132

    
133
		if ((args.length > 0) && (args[0] != null)) {
134
			metacatContextDir = args[0];
135
		}
136

    
137
		/*
138
		 * If there is a second argument, it is the schemaLocation value
139
		 */
140
		if (args.length > 1) {
141
			schemaLocation = args[1];
142
			System.err.println("schemaLocation: " + schemaLocation);
143

    
144
			try {
145
				Thread.sleep(10000);
146
			} catch (InterruptedException e) {
147
				e.printStackTrace();
148
			}
149
		}
150

    
151
		System.out.println(marker);
152
		System.out.println("Starting Harvester");
153
		Harvester.loadProperties(metacatContextDir);
154

    
155
		// Parse the delay property. Use default if necessary.
156
		try {
157
			d = Integer.valueOf(PropertyService.getProperty("harvester.delay"));
158
			delay = d.intValue();
159
		} catch (NumberFormatException e) {
160
			System.out.println("NumberFormatException: Error parsing delay: "
161
					+ e.getMessage());
162
			System.out.println("Defaulting to delay=" + delayDefault);
163
			delay = delayDefault.intValue();
164
		} catch (PropertyNotFoundException pnfe) {
165
			System.out.println("PropertyNotFoundException: Error finding delay: "
166
					+ pnfe.getMessage());
167
			System.out.println("Defaulting to delay=" + delayDefault);
168
			delay = delayDefault.intValue();
169
		}
170

    
171
		// Parse the maxHarvests property. Use default if necessary.
172
		try {
173
			mh = Integer.valueOf(PropertyService.getProperty("harvester.maxHarvests"));
174
			maxHarvests = mh.intValue();
175
		} catch (NumberFormatException e) {
176
			System.out.println("NumberFormatException: Error parsing maxHarvests: "
177
					+ e.getMessage());
178
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
179
			maxHarvests = maxHarvestsDefault.intValue();
180
		} catch (PropertyNotFoundException pnfe) {
181
			System.out.println("PropertyNotFoundException: Error finding maxHarvests: "
182
					+ pnfe.getMessage());
183
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
184
			maxHarvests = maxHarvestsDefault.intValue();
185
		}
186

    
187
		// Parse the period property. Use default if necessary.
188
		try {
189
			p = Integer.valueOf(PropertyService.getProperty("harvester.period"));
190
			period = p.intValue();
191
		} catch (NumberFormatException e) {
192
			System.out.println("NumberFormatException: Error parsing period: "
193
					+ e.getMessage());
194
			System.out.println("Defaulting to period=" + periodDefault);
195
			period = periodDefault.intValue();
196
		} catch (PropertyNotFoundException pnfe) {
197
			System.out.println("PropertyNotFoundException: Error finding period: "
198
					+ pnfe.getMessage());
199
			System.out.println("Defaulting to period=" + periodDefault);
200
			period = periodDefault.intValue();
201
		}
202

    
203
		// Sleep for delay number of hours prior to starting first harvest
204
		if (delay > 0) {
205
			try {
206
				System.out.print("First harvest will begin in " + delay);
207
				if (delay == 1) {
208
					System.out.println(" hour.");
209
				} else {
210
					System.out.println(" hours.");
211
				}
212
				Thread.sleep(delay * oneHour);
213
			} catch (InterruptedException e) {
214
				System.err.println("InterruptedException: " + e.getMessage());
215
				System.exit(1);
216
			}
217
		}
218

    
219
    // Repeat a new harvest once every period number of hours, until we reach
220
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
221
    // Subtract delta from the time period so
222
    // that each harvest will start at a fixed interval.
223
    //
224
    while (keepRunning && ((nHarvests < maxHarvests) || (maxHarvests <= 0))) {
225
      nHarvests++;
226
      startTime = System.currentTimeMillis();
227
      harvester = new Harvester();                // New object for this
228
													// harvest
229
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
230
      harvester.readHarvestSiteSchedule();        // Read the database table
231
      harvester.harvest();                        // Harvest the documents
232
      harvester.shutdown();                       // Shut down Harvester
233
      endTime = System.currentTimeMillis();
234
      delta = endTime - startTime;
235

    
236
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
237
        try {
238
          System.out.println("Next harvest will begin in " + 
239
                             period + " hours.");
240
          Thread.sleep((period * oneHour) - delta);
241
        }
242
        catch (InterruptedException e) {
243
          System.err.println("InterruptedException: " + e.getMessage());
244
          System.exit(1);
245
        }
246
      }
247
    }
248
  }
249
  
250
  
251
  /**
252
   * Set the keepRunning flag. If set to false, the main program will end
253
   * the while loop that keeps harvester running every period number of hours.
254
   * The static method is intended to be called from the HarvesterServlet class
255
   * which creates a thread to run Harvester. When the thread is destroyed, the
256
   * thread's destroy() method calls Harvester.setKeepRunning(false).
257
   * 
258
   * @param keepRunning
259
   */
260
  static void setKeepRunning(boolean keepRunning) {
261
    Harvester.keepRunning = keepRunning;
262
  }
263

    
264
  
265
  /*
266
   * Object fields
267
   */
268

    
269
  /** Database connection */
270
  private Connection conn = null;
271
  
272
  /** Used during development to determine whether to connect to metacat 
273
   *  Sometimes it's useful to test parts of the code without actually
274
   *  connecting to Metacat.
275
   */
276
  private boolean connectToMetacat;
277

    
278
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
279
  private int detailLogID;
280
  
281
  /** Email address of the Harvester Administrator */
282
  String harvesterAdministrator;
283
  
284
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
285
  private int harvestLogID;
286
  
287
  /** End time of this harvest session */
288
  private Date harvestEndTime;
289
  
290
  /** List of HarvestLog objects. Stores log entries for report generation. */
291
  private ArrayList harvestLogList = new ArrayList();
292
  
293
  /** List of HarvestSiteSchedule objects */
294
  private ArrayList harvestSiteScheduleList = new ArrayList();
295
  
296
  /** Start time of this harvest session */
297
  private Date harvestStartTime;
298
  
299
  /** Number of days to save log records. Any that are older are purged. */
300
  int logPeriod;
301
  
302
  /** Metacat client object */
303
  Metacat metacat;
304
  
305
  /** SMTP server for sending mail messages */
306
  String smtpServer;
307
  
308
  /** The timestamp for this harvest run. Used for output only. */
309
  String timestamp;
310
  
311

    
312
  /*
313
   * Object methods
314
   */
315
   
316
  /**
317
   * Creates a new HarvestLog object and adds it to the harvestLogList.
318
   * 
319
   * @param  status          the status of the harvest operation
320
   * @param  message         the message text of the harvest operation
321
   * @param  harvestOperationCode  the harvest operation code
322
   * @param  siteScheduleID  the siteScheduleID for which this operation was
323
   *                         performed. 0 indicates that the operation did not
324
   *                         involve a particular harvest site.
325
   * @param  harvestDocument the associated HarvestDocument object. May be null.
326
   * @param  errorMessage    additional error message pertaining to document
327
   *                         error.
328
   */
329
  void addLogEntry(int    status,
330
                   String message,
331
                   String harvestOperationCode,
332
                   int    siteScheduleID,
333
                   HarvestDocument harvestDocument,
334
                   String errorMessage
335
                  ) {
336
    HarvestLog harvestLog;
337
    int harvestLogID = getHarvestLogID();
338
    int detailLogID;
339

    
340
    /* If there is no associated harvest document, call the basic constructor;
341
     * else call the extended constructor.
342
     */
343
    if (harvestDocument == null) {    
344
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
345
                                  status, message, harvestOperationCode, 
346
                                  siteScheduleID);
347
    }
348
    else {
349
      detailLogID = getDetailLogID();
350
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
351
                                  harvestStartTime, status, message,
352
                                  harvestOperationCode, siteScheduleID,
353
                                  harvestDocument, errorMessage);
354
    }
355
    
356
    harvestLogList.add(harvestLog);
357
  }
358
  
359
  
360
  public void closeConnection() {
361
    try {
362
      // Close the database connection
363
      System.out.println("Closing the database connection.");
364
      conn.close();
365
    }
366
    catch (SQLException e) {
367
      System.out.println("Database access failed " + e);
368
    }    
369
  }
370

    
371

    
372
  /**
373
   * Determines whether Harvester should attempt to connect to Metacat.
374
   * Used during development and testing.
375
   * 
376
   * @return     true if Harvester should connect, otherwise false
377
   */
378
  boolean connectToMetacat () {
379
    return connectToMetacat;
380
  }
381
  
382

    
383
  /**
384
   * Normalizes text prior to insertion into the HARVEST_LOG or
385
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
386
   * character with the double quote character. This prevents SQL errors
387
   * involving words that contain single quotes. Also removes \n and \r
388
   * characters from the text.
389
   * 
390
   * @param text  the original string
391
   * @return      a string containing the normalized text
392
   */
393
  public String dequoteText(String text) {
394
    char c;
395
    StringBuffer stringBuffer = new StringBuffer();
396
    
397
    for (int i = 0; i < text.length(); i++) {
398
      c = text.charAt(i);
399
      switch (c) {
400
        case '\'':
401
          stringBuffer.append('\"');
402
          break;
403
        case '\r':
404
        case '\n':
405
          break;
406
        default:
407
          stringBuffer.append(c);
408
          break;
409
      }
410
    }
411
    
412
    return stringBuffer.toString();
413
  }
414
  
415
  /**
416
   * Returns a connection to the database. Opens the connection if a connection
417
   * has not already been made previously.
418
   * 
419
   * @return  conn  the database Connection object
420
   */
421
  public Connection getConnection() {
422
    String dbDriver = "";
423
    String defaultDB = null;
424
    String password = null;
425
    String user = null;
426
    SQLWarning warn;
427
    
428
    if (conn == null) {
429
    	try {
430
			dbDriver = PropertyService.getProperty("database.driver");
431
			defaultDB = PropertyService.getProperty("database.connectionURI");
432
			password = PropertyService.getProperty("database.password");
433
			user = PropertyService.getProperty("database.user");
434
		} catch (PropertyNotFoundException pnfe) {
435
			System.out.println("Can't find property " + pnfe);
436
	        System.exit(1);
437
		}
438

    
439
      // Load the jdbc driver
440
      try {
441
        Class.forName(dbDriver);
442
      }
443
      catch (ClassNotFoundException e) {
444
        System.out.println("Can't load driver " + e);
445
        System.exit(1);
446
      } 
447

    
448
      // Make the database connection
449
      try {
450
        System.out.println("Getting connection to Harvester tables");
451
        conn = DriverManager.getConnection(defaultDB, user, password);
452

    
453
        // If a SQLWarning object is available, print its warning(s).
454
        // There may be multiple warnings chained.
455
        warn = conn.getWarnings();
456
      
457
        if (warn != null) {
458
          while (warn != null) {
459
            System.out.println("SQLState: " + warn.getSQLState());
460
            System.out.println("Message:  " + warn.getMessage());
461
            System.out.println("Vendor: " + warn.getErrorCode());
462
            System.out.println("");
463
            warn = warn.getNextWarning();
464
          }
465
        }
466
      }
467
      catch (SQLException e) {
468
        System.out.println("Database access failed " + e);
469
        System.exit(1);
470
      }
471
    }
472
    
473
    return conn;
474
  }
475

    
476

    
477
  /**
478
   * Gets the current value of the detailLogID for storage as a primary key in
479
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
480
   * 
481
   * @return  the current value of the detailLogID
482
   */
483
  public int getDetailLogID() {
484
    int currentValue = detailLogID;
485
    
486
    detailLogID++;
487
    return currentValue;
488
  }
489
  
490
  
491
  /**
492
   * Gets the current value of the harvestLogID for storage as a primary key in
493
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
494
   * 
495
   * @return  the current value of the detailLogID
496
   */
497
  public int getHarvestLogID() {
498
    int currentValue = harvestLogID;
499
    
500
    harvestLogID++;
501
    return currentValue;
502
  }
503
  
504

    
505
  /** 
506
   * Gets the maximum value of an integer field from a table.
507
   * 
508
   * @param tableName  the database table name
509
   * @param fieldName  the field name of the integer field in the table
510
   * @return  the maximum integer stored in the fieldName field of tableName
511
   */
512
  private int getMaxValue(String tableName, String fieldName) {
513
    int maxValue = 0;
514
    int fieldValue;
515
    String query = "SELECT " + fieldName + " FROM " + tableName;
516
    Statement stmt;
517
    
518
	try {
519
      stmt = conn.createStatement();
520
      ResultSet rs = stmt.executeQuery(query);
521
	
522
      while (rs.next()) {
523
        fieldValue = rs.getInt(fieldName);
524
        maxValue = Math.max(maxValue, fieldValue);
525
      }
526
      
527
      stmt.close();
528
    } 
529
    catch(SQLException ex) {
530
      System.out.println("SQLException: " + ex.getMessage());
531
    }
532
    
533
    return maxValue;
534
  }
535
  
536
  
537
  /** 
538
   * Gets the minimum value of an integer field from a table.
539
   * 
540
   * @param tableName  the database table name
541
   * @param fieldName  the field name of the integer field in the table
542
   * @return  the minimum integer stored in the fieldName field of tableName
543
   */
544
  private int getMinValue(String tableName, String fieldName) {
545
    int minValue = 0;
546
    int fieldValue;
547
    String query = "SELECT " + fieldName + " FROM " + tableName;
548
    Statement stmt;
549
    
550
    try {
551
      stmt = conn.createStatement();
552
      ResultSet rs = stmt.executeQuery(query);
553
	
554
      while (rs.next()) {
555
        fieldValue = rs.getInt(fieldName);
556

    
557
        if (minValue == 0) {
558
          minValue = fieldValue;
559
        }
560
        else {
561
          minValue = Math.min(minValue, fieldValue);
562
        }
563
      }
564
      
565
      stmt.close();
566
    } 
567
    catch(SQLException ex) {
568
      System.out.println("SQLException: " + ex.getMessage());
569
    }
570

    
571
    return minValue;
572
  }
573
  
574
  
575
  /**
576
   * For every Harvest site schedule in the database, harvest the
577
   * documents for that site if they are due to be harvested.
578
   * 
579
   * @throws SAXException
580
   * @throws IOException
581
   * @throws ParserConfigurationException
582
   */
583
  private void harvest() {
584
    HarvestSiteSchedule harvestSiteSchedule;
585

    
586
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
587
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
588
      
589
      if (Harvester.schemaLocation != null) {
590
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
591
      }
592
      
593
      harvestSiteSchedule.harvestDocumentList();
594
    }
595
  }
596
  
597
  
598
  /**
599
   * Initializes the detailLogID and harvestLogID values to their current
600
   * maximums + 1.
601
   */
602
  public void initLogIDs() {
603
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
604
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
605
  }
606
  
607

    
608
  /**
609
   * Prints the header of the harvest report.
610
   * 
611
   * @param out            the PrintStream object to print to
612
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
613
   *                       have a value of 0 if no particular site is involved,
614
   *                       which indicates that the report is being prepared
615
   *                       for the Harvester Administrator rather than for a
616
   *                       particular Site Contact.
617
   */
618
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
619
    HarvestLog harvestLog;
620
    int logSiteScheduleID;
621
    int nErrors = 0;
622
    String phrase;
623
    
624
    for (int i = 0; i < harvestLogList.size(); i++) {
625
      harvestLog = (HarvestLog) harvestLogList.get(i);
626
      logSiteScheduleID = harvestLog.getSiteScheduleID();
627
      
628
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
629
        if (harvestLog.isErrorEntry()) {
630
          nErrors++;
631
        }
632
      }      
633
    }
634

    
635
    out.println(marker);
636
    out.println(filler);
637
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
638
    out.println(filler);
639

    
640
    if (nErrors > 0) {
641
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
642
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
643
      out.println("* Please see the log entries below for additonal details.");
644
    }
645
    else {
646
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
647
    }
648
    
649
    out.println(filler);
650
    out.println(marker);
651
  }
652
    
653

    
654
  /**
655
   * Prints harvest log entries for this harvest run. Entries may be filtered
656
   * for a particular site, or all entries may be printed.
657
   * 
658
   * @param out            the PrintStream object to write to
659
   * @param maxCodeLevel   the maximum code level that should be printed,
660
   *                       e.g. "warning". Any log entries higher than this
661
   *                       level will not be printed.
662
   * @param siteScheduleID if greater than 0, indicates that the log
663
   *                       entry should only be printed for a particular site
664
   *                       as identified by its siteScheduleID. if 0, then
665
   *                       print output for all sites.
666
   */
667
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
668
                      ) {
669
    HarvestLog harvestLog;
670
    int logSiteScheduleID;
671
    int nErrors = 0;
672
    String phrase;
673
    
674
    out.println("");
675
    out.println(marker);
676
    out.println(filler);
677
    out.println("*                       LOG ENTRIES");
678
    out.println(filler);
679
    out.println(marker);
680

    
681
    for (int i = 0; i < harvestLogList.size(); i++) {
682
      harvestLog = (HarvestLog) harvestLogList.get(i);
683
      logSiteScheduleID = harvestLog.getSiteScheduleID();
684
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
685
        harvestLog.printOutput(out, maxCodeLevel);
686
      }
687
    }
688
  }
689
    
690

    
691
  /**
692
   * Prints the site schedule data for a given site.
693
   * 
694
   * @param out              the PrintStream to write to
695
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
696
   */
697
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
698
    HarvestSiteSchedule harvestSiteSchedule;
699

    
700
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
701
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
702
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
703
        harvestSiteSchedule.printOutput(out);
704
      }
705
    }
706
  }
707
  
708

    
709
  /**
710
   * Prunes old records from the HARVEST_LOG table. Records are removed if
711
   * their HARVEST_DATE is older than a given number of days, as stored in the
712
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
713
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
714
   */
715
  private void pruneHarvestLog() {
716
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
717
    Date dateLastLog;                    // Prune everything prior to this date
718
    String deleteString;
719
    String deleteStringDetailLog;
720
    long delta;
721
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
722
    int recordsDeleted;
723
    int recordsDeletedDetail = 0;
724
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
725
    String dateString;
726
    ResultSet rs;
727
    String selectString;
728
    Statement stmt;
729
    long timeLastLog = 0;
730
    SQLWarning warn;
731
     
732
    delta = logPeriod * millisecondsPerDay;
733
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
734
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
735
    deleteStringDetailLog = 
736
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
737
    timeLastLog = currentTime - delta;
738
    dateLastLog = new Date(timeLastLog);
739
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
740
    deleteString += dateString;
741
    selectString += dateString;
742

    
743
    try {
744
      System.out.println(
745
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
746

    
747
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
748
       * table.
749
       */
750
      stmt = conn.createStatement();                            
751
      rs = stmt.executeQuery(selectString);
752
      warn = rs.getWarnings();
753

    
754
      if (warn != null) {
755
        System.out.println("\n---Warning---\n");
756

    
757
        while (warn != null) {
758
          System.out.println("Message: " + warn.getMessage());
759
          System.out.println("SQLState: " + warn.getSQLState());
760
          System.out.print("Vendor error code: ");
761
          System.out.println(warn.getErrorCode());
762
          System.out.println("");
763
          warn = warn.getNextWarning();
764
        }
765
      } 
766

    
767
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
768
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
769
       * be pruned first because its records have a child relationship to those
770
       * in HARVEST_LOG.
771
       */
772
      while (rs.next()) {
773
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
774
        stmt = conn.createStatement();                            
775
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
776
                                            harvestLogID);
777
        recordsDeletedDetail += recordsDeleted;
778
        stmt.close();
779
      }
780
 
781
      /* Now prune entries from the HARVEST_LOG table using a single update.
782
       */
783
      stmt = conn.createStatement();                            
784
      recordsDeleted = stmt.executeUpdate(deleteString);
785
      stmt.close();
786

    
787
      System.out.println("  " + recordsDeletedDetail + 
788
                         " records deleted from HARVEST_DETAIL_LOG");
789
      System.out.println("  " + recordsDeleted + 
790
                         " records deleted from HARVEST_LOG");
791
    }
792
    catch (SQLException e) {
793
      System.out.println("SQLException: " + e.getMessage());
794
    }
795
  }
796
    
797

    
798
  /**
799
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
800
   * a HarvestSiteSchedule object for each row in the table.
801
   */
802
  private void readHarvestSiteSchedule() {
803
    HarvestSiteSchedule harvestSiteSchedule;
804
    ResultSet rs;
805
    SQLWarning warn;
806
    Statement stmt;
807

    
808
    String contactEmail;
809
    String dateLastHarvest;
810
    String dateNextHarvest;
811
    String documentListURL;
812
    String ldapDN;
813
    String ldapPwd;
814
    int siteScheduleID;
815
    String unit;
816
    int updateFrequency;
817
        
818
    try {
819
      // Read the HARVEST_SITE_SCHEDULE table
820
      stmt = conn.createStatement();
821
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
822
      warn = rs.getWarnings();
823

    
824
      if (warn != null) {
825
        System.out.println("\n---Warning---\n");
826

    
827
        while (warn != null) {
828
          System.out.println("Message: " + warn.getMessage());
829
          System.out.println("SQLState: " + warn.getSQLState());
830
          System.out.print("Vendor error code: ");
831
          System.out.println(warn.getErrorCode());
832
          System.out.println("");
833
          warn = warn.getNextWarning();
834
        }
835
      }
836
     
837
      while (rs.next()) {
838
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
839
        documentListURL = rs.getString("DOCUMENTLISTURL");
840
        ldapDN = rs.getString("LDAPDN");
841
        ldapPwd = rs.getString("LDAPPWD");
842
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
843
        dateLastHarvest = rs.getString("DATELASTHARVEST");
844
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
845
        unit = rs.getString("UNIT");
846
        contactEmail = rs.getString("CONTACT_EMAIL");
847
        
848
        warn = rs.getWarnings();
849

    
850
        if (warn != null) {
851
          System.out.println("\n---Warning---\n");
852
      
853
          while (warn != null) {
854
            System.out.println("Message: " + warn.getMessage());
855
            System.out.println("SQLState: " + warn.getSQLState());
856
            System.out.print("Vendor error code: ");
857
            System.out.println(warn.getErrorCode());
858
            System.out.println("");
859
            warn = warn.getNextWarning();
860
          }
861
        }
862
      
863
        harvestSiteSchedule = new HarvestSiteSchedule(this,
864
                                                      siteScheduleID,
865
                                                      documentListURL,
866
                                                      ldapDN,
867
                                                      ldapPwd,
868
                                                      dateNextHarvest,
869
                                                      dateLastHarvest,
870
                                                      updateFrequency,
871
                                                      unit,
872
                                                      contactEmail
873
                                                     );
874
        harvestSiteScheduleList.add(harvestSiteSchedule);
875
      }
876
      
877
      rs.close();
878
      stmt.close();
879
    }
880
    catch (SQLException e) {
881
      System.out.println("Database access failed " + e);
882
      System.exit(1);
883
    }
884
    
885
  }
886
    
887

    
888
  /**
889
   * Sends a report to the Harvester Administrator. The report prints each log
890
   * entry pertaining to this harvest run.
891
   *
892
   * @param maxCodeLevel  the maximum code level that should be printed,
893
   *                      e.g. "warning". Any log entries higher than this
894
   *                      level will not be printed.
895
   */
896
  void reportToAdministrator(String maxCodeLevel) {
897
    PrintStream body;
898
    String from = harvesterAdministrator;
899
    String[] fromArray;
900
    MailMessage msg;
901
    int siteScheduleID = 0;
902
    String subject = "Report from Metacat Harvester: " + timestamp;
903
    String to = harvesterAdministrator;
904
    
905
    if (!to.equals("")) {
906
      System.out.println("Sending report to Harvester Administrator at address "
907
                         + harvesterAdministrator);
908
      
909
      try {
910
        msg = new MailMessage(smtpServer);
911

    
912
        if (from.indexOf(',') > 0) {
913
          fromArray = from.split(",");
914
          
915
          for (int i = 0; i < fromArray.length; i++) {
916
            if (i == 0) {
917
              msg.from(fromArray[i]);
918
            }
919
            
920
            msg.to(fromArray[i]);            
921
          }
922
        }
923
        else if (from.indexOf(';') > 0) {
924
          fromArray = from.split(";");
925

    
926
          for (int i = 0; i < fromArray.length; i++) {
927
            if (i == 0) {
928
              msg.from(fromArray[i]);
929
            }
930
            
931
            msg.to(fromArray[i]);            
932
          }
933
        }
934
        else {
935
          msg.from(from);
936
          msg.to(to);
937
        }
938
        
939
        msg.setSubject(subject);
940
        body = msg.getPrintStream();
941
        printHarvestHeader(body, siteScheduleID);
942
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
943
        msg.sendAndClose();
944
      }
945
      catch (IOException e) {
946
        System.out.println("There was a problem sending email to " + to);
947
        System.out.println("IOException: " + e.getMessage());
948
      }
949
    }
950
  }
951
  
952

    
953
  /**
954
   * Sets the harvest start time for this harvest run.
955
   * 
956
   * @param date
957
   */
958
  public void setHarvestStartTime(Date date) {
959
    harvestStartTime = date;
960
  }
961
    
962

    
963
  /**
964
   * Shuts down Harvester. Performs cleanup operations such as logging out
965
   * of Metacat and disconnecting from the database.
966
   */
967
  private void shutdown() {
968
    String maxCodeLevel = "debug";  // Print all log entries from level 1
969
                                    // ("error") to level 5 ("debug")
970
    int siteScheduleID = 0;
971

    
972
    // Log shutdown operation
973
    System.out.println("Shutting Down Harvester");
974
    addLogEntry(0, "Shutting Down Harvester", "harvester.HarvesterShutdown", 0, null, "");
975
    pruneHarvestLog();
976
    closeConnection();
977
    // Print log to standard output and then email the Harvester administrator
978
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
979
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
980
  }
981
    
982

    
983
    /**
984
	 * Initializes Harvester at startup. Connects to the database and to Metacat.
985
	 * 
986
	 * @param nHarvests        the nth harvest
987
	 * @param maxHarvests      the maximum number of harvests that this process
988
	 *                         can run
989
	 */
990
	private void startup(int nHarvests, int maxHarvests) {
991
		Boolean ctm;
992
		Integer lp;
993
		String metacatURL;
994
		Date now = new Date();
995

    
996
		timestamp = now.toString();
997
		System.out.println(Harvester.marker);
998
		System.out.print(timestamp + ": Starting Next Harvest");
999
		if (maxHarvests > 0) {
1000
			System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
1001
		}
1002
		System.out.print("\n");
1003
		try {
1004
			ctm = Boolean.valueOf(PropertyService.getProperty("harvester.connectToMetacat"));
1005
			connectToMetacat = ctm.booleanValue();
1006
			harvesterAdministrator = PropertyService
1007
					.getProperty("harvester.administrator");
1008
			smtpServer = PropertyService.getProperty("harvester.smtpServer");
1009

    
1010
			lp = Integer.valueOf(PropertyService.getProperty("harvester.logPeriod"));
1011
			logPeriod = lp.intValue();
1012
		} catch (NumberFormatException e) {
1013
			System.err.println("NumberFormatException: Error parsing logPeriod "
1014
					+ logPeriod + e.getMessage());
1015
			System.err.println("Defaulting to logPeriod of 90 days");
1016
			logPeriod = 90;
1017
		} catch (PropertyNotFoundException pnfe) {
1018
			System.out.println("PropertyNotFoundException: Error getting property: "
1019
					+ pnfe.getMessage());
1020
			return;
1021
		}
1022

    
1023
		conn = getConnection();
1024
		initLogIDs();
1025
		setHarvestStartTime(now);
1026
		// Log startup operation
1027
		addLogEntry(0, "Starting Up Harvester", "harvester.HarvesterStartup", 0, null, "");
1028

    
1029
		if (connectToMetacat()) {
1030
			try {
1031
				metacatURL = SystemUtil.getServletURL();
1032
				System.out.println("Connecting to Metacat: " + metacatURL);
1033
				metacat = MetacatFactory.createMetacatConnection(metacatURL);
1034
			} catch (MetacatInaccessibleException e) {
1035
				System.out.println("Metacat connection failed." + e.getMessage());
1036
			} catch (Exception e) {
1037
				System.out.println("Metacat connection failed." + e.getMessage());
1038
			}
1039
		}
1040
	}
1041

    
1042
}
(6-6/11)