Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: daigle $'
7
 *     '$Date: 2008-07-15 10:11:14 -0700 (Tue, 15 Jul 2008) $'
8
 * '$Revision: 4125 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.IOException;
29
import java.io.PrintStream;
30
import java.sql.Connection;
31
import java.sql.DriverManager;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.sql.SQLWarning;
35
import java.sql.Statement;
36
import java.util.ArrayList;
37
import java.text.SimpleDateFormat;
38
import java.util.Date;
39

    
40

    
41
import edu.ucsb.nceas.metacat.client.Metacat;
42
import edu.ucsb.nceas.metacat.client.MetacatFactory;
43
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
44
import edu.ucsb.nceas.metacat.service.PropertyService;
45
import edu.ucsb.nceas.metacat.service.ServiceException;
46
import edu.ucsb.nceas.metacat.util.SystemUtil;
47
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
48

    
49
/**
50
 * Harvester is the main class for the Harvester application. The main
51
 * method creates a single Harvester object which drives the application.
52
 * 
53
 * @author    costa
54
 * 
55
 */
56
public class Harvester {
57

    
58
  /*
59
   * Class fields
60
   */
61
  private static final String CONFIG_DIR = "../../build/war/WEB-INF";
62
  private static final String CONFIG_DIR_TEST = "./build/war/WEB-INF";
63
//  private static final String CONFIG_NAME = "metacat.properties";
64
  public static final String filler = "*";
65
  private static boolean keepRunning = true;
66
  public static final String marker =
67
"*****************************************************************************";
68
//  public static PropertyService propertyService = null;
69
  private static String schemaLocation = null;
70
   
71

    
72
  /* 
73
   * Class methods
74
   */
75
   
76

    
77
  /**
78
   * Constructor. Creates a new instance of Harvester.
79
   */
80
  public Harvester() {
81
  }
82
    
83

    
84
  /**
85
   * Loads Harvester options from a configuration file.
86
   */
87
  public static void loadProperties(boolean test) {
88
    String configDir = test ? CONFIG_DIR_TEST : CONFIG_DIR;    
89

    
90
    try {
91
    	PropertyService.getInstance(configDir);
92
    } 
93
    catch (ServiceException e) {
94
      System.out.println("Error in loading properties: " + e.getMessage());
95
    }
96
  }
97
  
98
  
99
    /**
100
	 * Harvester main method.
101
	 * 
102
	 * @param args
103
	 *            the command line arguments
104
	 * @throws SAXException
105
	 * @throws IOException
106
	 * @throws ParserConfigurationException
107
	 */
108
	public static void main(String[] args) {
109
		Integer delayDefault = new Integer(0); // Default number of hours delay
110
		int delay = delayDefault.intValue(); // Delay in hours before first
111
												// harvest
112
		Integer d; // Used for determining delay
113
		long delta; // endTime - startTime
114
		long endTime; // time that a harvest completes
115
		Harvester harvester; // object for a single harvest run
116
		Integer maxHarvestsDefault = new Integer(0); // Default max harvests
117
		int maxHarvests = maxHarvestsDefault.intValue(); // Max number of
118
															// harvests
119
		Integer mh; // used in determining max harvests
120
		int nHarvests = 0; // counts the number of harvest runs
121
		final long oneHour = (60 * 60 * 1000); // milliseconds in one hour
122
		Integer periodDefault = new Integer(24); // Default hours between
123
													// harvests
124
		int period = periodDefault.intValue(); // Hours between harvests
125
		Integer p; // Used in determining the period
126
		long startTime; // time that a harvest run starts
127
		boolean test = false; // Passed to loadOption()
128

    
129
		if (args.length > 0) {
130
			schemaLocation = args[0];
131
			System.err.println("schemaLocation: " + schemaLocation);
132

    
133
			try {
134
				Thread.sleep(10000);
135
			} catch (InterruptedException e) {
136
				e.printStackTrace();
137
			}
138
		}
139

    
140
		System.out.println(marker);
141
		System.out.println("Starting Harvester");
142
		Harvester.loadProperties(test);
143

    
144
		// Parse the delay property. Use default if necessary.
145
		try {
146
			d = Integer.valueOf(PropertyService.getProperty("delay"));
147
			delay = d.intValue();
148
		} catch (NumberFormatException e) {
149
			System.out.println("NumberFormatException: Error parsing delay: "
150
					+ e.getMessage());
151
			System.out.println("Defaulting to delay=" + delayDefault);
152
			delay = delayDefault.intValue();
153
		} catch (PropertyNotFoundException pnfe) {
154
			System.out.println("PropertyNotFoundException: Error finding delay: "
155
					+ pnfe.getMessage());
156
			System.out.println("Defaulting to delay=" + delayDefault);
157
			delay = delayDefault.intValue();
158
		}
159

    
160
		// Parse the maxHarvests property. Use default if necessary.
161
		try {
162
			mh = Integer.valueOf(PropertyService.getProperty("maxHarvests"));
163
			maxHarvests = mh.intValue();
164
		} catch (NumberFormatException e) {
165
			System.out.println("NumberFormatException: Error parsing maxHarvests: "
166
					+ e.getMessage());
167
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
168
			maxHarvests = maxHarvestsDefault.intValue();
169
		} catch (PropertyNotFoundException pnfe) {
170
			System.out.println("PropertyNotFoundException: Error finding maxHarvests: "
171
					+ pnfe.getMessage());
172
			System.out.println("Defaulting to maxHarvests=" + maxHarvestsDefault);
173
			maxHarvests = maxHarvestsDefault.intValue();
174
		}
175

    
176
		// Parse the period property. Use default if necessary.
177
		try {
178
			p = Integer.valueOf(PropertyService.getProperty("period"));
179
			period = p.intValue();
180
		} catch (NumberFormatException e) {
181
			System.out.println("NumberFormatException: Error parsing period: "
182
					+ e.getMessage());
183
			System.out.println("Defaulting to period=" + periodDefault);
184
			period = periodDefault.intValue();
185
		} catch (PropertyNotFoundException pnfe) {
186
			System.out.println("PropertyNotFoundException: Error finding period: "
187
					+ pnfe.getMessage());
188
			System.out.println("Defaulting to period=" + periodDefault);
189
			period = periodDefault.intValue();
190
		}
191

    
192
		// Sleep for delay number of hours prior to starting first harvest
193
		if (delay > 0) {
194
			try {
195
				System.out.print("First harvest will begin in " + delay);
196
				if (delay == 1) {
197
					System.out.println(" hour.");
198
				} else {
199
					System.out.println(" hours.");
200
				}
201
				Thread.sleep(delay * oneHour);
202
			} catch (InterruptedException e) {
203
				System.err.println("InterruptedException: " + e.getMessage());
204
				System.exit(1);
205
			}
206
		}
207

    
208
    // Repeat a new harvest once every period number of hours, until we reach
209
    // the maximum number of harvests, or indefinitely if maxHarvests <= 0.
210
    // Subtract delta from the time period so
211
    // that each harvest will start at a fixed interval.
212
    //
213
    while (keepRunning && ((nHarvests < maxHarvests) || (maxHarvests <= 0))) {
214
      nHarvests++;
215
      startTime = System.currentTimeMillis();
216
      harvester = new Harvester();                // New object for this
217
													// harvest
218
      harvester.startup(nHarvests, maxHarvests);  // Start up Harvester
219
      harvester.readHarvestSiteSchedule();        // Read the database table
220
      harvester.harvest();                        // Harvest the documents
221
      harvester.shutdown();                       // Shut down Harvester
222
      endTime = System.currentTimeMillis();
223
      delta = endTime - startTime;
224

    
225
      if ((nHarvests < maxHarvests) || (maxHarvests <= 0)) {
226
        try {
227
          System.out.println("Next harvest will begin in " + 
228
                             period + " hours.");
229
          Thread.sleep((period * oneHour) - delta);
230
        }
231
        catch (InterruptedException e) {
232
          System.err.println("InterruptedException: " + e.getMessage());
233
          System.exit(1);
234
        }
235
      }
236
    }
237
  }
238
  
239
  
240
  /**
241
   * Set the keepRunning flag. If set to false, the main program will end
242
   * the while loop that keeps harvester running every period number of hours.
243
   * The static method is intended to be called from the HarvesterServlet class
244
   * which creates a thread to run Harvester. When the thread is destroyed, the
245
   * thread's destroy() method calls Harvester.setKeepRunning(false).
246
   * 
247
   * @param keepRunning
248
   */
249
  static void setKeepRunning(boolean keepRunning) {
250
    Harvester.keepRunning = keepRunning;
251
  }
252

    
253
  
254
  /*
255
   * Object fields
256
   */
257

    
258
  /** Database connection */
259
  private Connection conn = null;
260
  
261
  /** Used during development to determine whether to connect to metacat 
262
   *  Sometimes it's useful to test parts of the code without actually
263
   *  connecting to Metacat.
264
   */
265
  private boolean connectToMetacat;
266

    
267
  /** Highest DETAIL_LOG_ID primary key in the HARVEST_DETAIL_LOG table */
268
  private int detailLogID;
269
  
270
  /** Email address of the Harvester Administrator */
271
  String harvesterAdministrator;
272
  
273
  /** Highest HARVEST_LOG_ID primary key in the HARVEST_LOG table */
274
  private int harvestLogID;
275
  
276
  /** End time of this harvest session */
277
  private Date harvestEndTime;
278
  
279
  /** List of HarvestLog objects. Stores log entries for report generation. */
280
  private ArrayList harvestLogList = new ArrayList();
281
  
282
  /** List of HarvestSiteSchedule objects */
283
  private ArrayList harvestSiteScheduleList = new ArrayList();
284
  
285
  /** Start time of this harvest session */
286
  private Date harvestStartTime;
287
  
288
  /** Number of days to save log records. Any that are older are purged. */
289
  int logPeriod;
290
  
291
  /** Metacat client object */
292
  Metacat metacat;
293
  
294
  /** SMTP server for sending mail messages */
295
  String smtpServer;
296
  
297
  /** The timestamp for this harvest run. Used for output only. */
298
  String timestamp;
299
  
300

    
301
  /*
302
   * Object methods
303
   */
304
   
305
  /**
306
   * Creates a new HarvestLog object and adds it to the harvestLogList.
307
   * 
308
   * @param  status          the status of the harvest operation
309
   * @param  message         the message text of the harvest operation
310
   * @param  harvestOperationCode  the harvest operation code
311
   * @param  siteScheduleID  the siteScheduleID for which this operation was
312
   *                         performed. 0 indicates that the operation did not
313
   *                         involve a particular harvest site.
314
   * @param  harvestDocument the associated HarvestDocument object. May be null.
315
   * @param  errorMessage    additional error message pertaining to document
316
   *                         error.
317
   */
318
  void addLogEntry(int    status,
319
                   String message,
320
                   String harvestOperationCode,
321
                   int    siteScheduleID,
322
                   HarvestDocument harvestDocument,
323
                   String errorMessage
324
                  ) {
325
    HarvestLog harvestLog;
326
    int harvestLogID = getHarvestLogID();
327
    int detailLogID;
328

    
329
    /* If there is no associated harvest document, call the basic constructor;
330
     * else call the extended constructor.
331
     */
332
    if (harvestDocument == null) {    
333
      harvestLog = new HarvestLog(this, conn, harvestLogID, harvestStartTime, 
334
                                  status, message, harvestOperationCode, 
335
                                  siteScheduleID);
336
    }
337
    else {
338
      detailLogID = getDetailLogID();
339
      harvestLog = new HarvestLog(this, conn, harvestLogID, detailLogID, 
340
                                  harvestStartTime, status, message,
341
                                  harvestOperationCode, siteScheduleID,
342
                                  harvestDocument, errorMessage);
343
    }
344
    
345
    harvestLogList.add(harvestLog);
346
  }
347
  
348
  
349
  public void closeConnection() {
350
    try {
351
      // Close the database connection
352
      System.out.println("Closing the database connection.");
353
      conn.close();
354
    }
355
    catch (SQLException e) {
356
      System.out.println("Database access failed " + e);
357
    }    
358
  }
359

    
360

    
361
  /**
362
   * Determines whether Harvester should attempt to connect to Metacat.
363
   * Used during development and testing.
364
   * 
365
   * @return     true if Harvester should connect, otherwise false
366
   */
367
  boolean connectToMetacat () {
368
    return connectToMetacat;
369
  }
370
  
371

    
372
  /**
373
   * Normalizes text prior to insertion into the HARVEST_LOG or
374
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
375
   * character with the double quote character. This prevents SQL errors
376
   * involving words that contain single quotes. Also removes \n and \r
377
   * characters from the text.
378
   * 
379
   * @param text  the original string
380
   * @return      a string containing the normalized text
381
   */
382
  public String dequoteText(String text) {
383
    char c;
384
    StringBuffer stringBuffer = new StringBuffer();
385
    
386
    for (int i = 0; i < text.length(); i++) {
387
      c = text.charAt(i);
388
      switch (c) {
389
        case '\'':
390
          stringBuffer.append('\"');
391
          break;
392
        case '\r':
393
        case '\n':
394
          break;
395
        default:
396
          stringBuffer.append(c);
397
          break;
398
      }
399
    }
400
    
401
    return stringBuffer.toString();
402
  }
403
  
404
  /**
405
   * Returns a connection to the database. Opens the connection if a connection
406
   * has not already been made previously.
407
   * 
408
   * @return  conn  the database Connection object
409
   */
410
  public Connection getConnection() {
411
    String dbDriver = "";
412
    String defaultDB = null;
413
    String password = null;
414
    String user = null;
415
    SQLWarning warn;
416
    
417
    if (conn == null) {
418
    	try {
419
			dbDriver = PropertyService.getProperty("database.driver");
420
			defaultDB = PropertyService.getProperty("database.connectionURI");
421
			password = PropertyService.getProperty("database.password");
422
			user = PropertyService.getProperty("database.user");
423
		} catch (PropertyNotFoundException pnfe) {
424
			System.out.println("Can't find property " + pnfe);
425
	        System.exit(1);
426
		}
427

    
428
      // Load the jdbc driver
429
      try {
430
        Class.forName(dbDriver);
431
      }
432
      catch (ClassNotFoundException e) {
433
        System.out.println("Can't load driver " + e);
434
        System.exit(1);
435
      } 
436

    
437
      // Make the database connection
438
      try {
439
        System.out.println("Getting connection to Harvester tables");
440
        conn = DriverManager.getConnection(defaultDB, user, password);
441

    
442
        // If a SQLWarning object is available, print its warning(s).
443
        // There may be multiple warnings chained.
444
        warn = conn.getWarnings();
445
      
446
        if (warn != null) {
447
          while (warn != null) {
448
            System.out.println("SQLState: " + warn.getSQLState());
449
            System.out.println("Message:  " + warn.getMessage());
450
            System.out.println("Vendor: " + warn.getErrorCode());
451
            System.out.println("");
452
            warn = warn.getNextWarning();
453
          }
454
        }
455
      }
456
      catch (SQLException e) {
457
        System.out.println("Database access failed " + e);
458
        System.exit(1);
459
      }
460
    }
461
    
462
    return conn;
463
  }
464

    
465

    
466
  /**
467
   * Gets the current value of the detailLogID for storage as a primary key in
468
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
469
   * 
470
   * @return  the current value of the detailLogID
471
   */
472
  public int getDetailLogID() {
473
    int currentValue = detailLogID;
474
    
475
    detailLogID++;
476
    return currentValue;
477
  }
478
  
479
  
480
  /**
481
   * Gets the current value of the harvestLogID for storage as a primary key in
482
   * the HARVEST_LOG_ID field of the HARVEST_LOG table.
483
   * 
484
   * @return  the current value of the detailLogID
485
   */
486
  public int getHarvestLogID() {
487
    int currentValue = harvestLogID;
488
    
489
    harvestLogID++;
490
    return currentValue;
491
  }
492
  
493

    
494
  /** 
495
   * Gets the maximum value of an integer field from a table.
496
   * 
497
   * @param tableName  the database table name
498
   * @param fieldName  the field name of the integer field in the table
499
   * @return  the maximum integer stored in the fieldName field of tableName
500
   */
501
  private int getMaxValue(String tableName, String fieldName) {
502
    int maxValue = 0;
503
    int fieldValue;
504
    String query = "SELECT " + fieldName + " FROM " + tableName;
505
    Statement stmt;
506
    
507
	try {
508
      stmt = conn.createStatement();
509
      ResultSet rs = stmt.executeQuery(query);
510
	
511
      while (rs.next()) {
512
        fieldValue = rs.getInt(fieldName);
513
        maxValue = Math.max(maxValue, fieldValue);
514
      }
515
      
516
      stmt.close();
517
    } 
518
    catch(SQLException ex) {
519
      System.out.println("SQLException: " + ex.getMessage());
520
    }
521
    
522
    return maxValue;
523
  }
524
  
525
  
526
  /** 
527
   * Gets the minimum value of an integer field from a table.
528
   * 
529
   * @param tableName  the database table name
530
   * @param fieldName  the field name of the integer field in the table
531
   * @return  the minimum integer stored in the fieldName field of tableName
532
   */
533
  private int getMinValue(String tableName, String fieldName) {
534
    int minValue = 0;
535
    int fieldValue;
536
    String query = "SELECT " + fieldName + " FROM " + tableName;
537
    Statement stmt;
538
    
539
    try {
540
      stmt = conn.createStatement();
541
      ResultSet rs = stmt.executeQuery(query);
542
	
543
      while (rs.next()) {
544
        fieldValue = rs.getInt(fieldName);
545

    
546
        if (minValue == 0) {
547
          minValue = fieldValue;
548
        }
549
        else {
550
          minValue = Math.min(minValue, fieldValue);
551
        }
552
      }
553
      
554
      stmt.close();
555
    } 
556
    catch(SQLException ex) {
557
      System.out.println("SQLException: " + ex.getMessage());
558
    }
559

    
560
    return minValue;
561
  }
562
  
563
  
564
  /**
565
   * For every Harvest site schedule in the database, harvest the
566
   * documents for that site if they are due to be harvested.
567
   * 
568
   * @throws SAXException
569
   * @throws IOException
570
   * @throws ParserConfigurationException
571
   */
572
  private void harvest() {
573
    HarvestSiteSchedule harvestSiteSchedule;
574

    
575
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
576
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
577
      
578
      if (Harvester.schemaLocation != null) {
579
        harvestSiteSchedule.setSchemaLocation(Harvester.schemaLocation);
580
      }
581
      
582
      harvestSiteSchedule.harvestDocumentList();
583
    }
584
  }
585
  
586
  
587
  /**
588
   * Initializes the detailLogID and harvestLogID values to their current
589
   * maximums + 1.
590
   */
591
  public void initLogIDs() {
592
    detailLogID = getMaxValue("HARVEST_DETAIL_LOG", "DETAIL_LOG_ID") + 1;
593
    harvestLogID = getMaxValue("HARVEST_LOG", "HARVEST_LOG_ID") + 1;
594
  }
595
  
596

    
597
  /**
598
   * Prints the header of the harvest report.
599
   * 
600
   * @param out            the PrintStream object to print to
601
   * @param siteScheduleID the siteScheduleId of the HarvestSiteSchedule. Will
602
   *                       have a value of 0 if no particular site is involved,
603
   *                       which indicates that the report is being prepared
604
   *                       for the Harvester Administrator rather than for a
605
   *                       particular Site Contact.
606
   */
607
  void printHarvestHeader(PrintStream out, int siteScheduleID) {
608
    HarvestLog harvestLog;
609
    int logSiteScheduleID;
610
    int nErrors = 0;
611
    String phrase;
612
    
613
    for (int i = 0; i < harvestLogList.size(); i++) {
614
      harvestLog = (HarvestLog) harvestLogList.get(i);
615
      logSiteScheduleID = harvestLog.getSiteScheduleID();
616
      
617
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
618
        if (harvestLog.isErrorEntry()) {
619
          nErrors++;
620
        }
621
      }      
622
    }
623

    
624
    out.println(marker);
625
    out.println(filler);
626
    out.println("* METACAT HARVESTER REPORT: " + timestamp);
627
    out.println(filler);
628

    
629
    if (nErrors > 0) {
630
      phrase = (nErrors == 1) ? " ERROR WAS " : " ERRORS WERE ";
631
      out.println("* A TOTAL OF " + nErrors + phrase + "DETECTED.");
632
      out.println("* Please see the log entries below for additonal details.");
633
    }
634
    else {
635
      out.println("* NO ERRORS WERE DETECTED DURING THIS HARVEST.");
636
    }
637
    
638
    out.println(filler);
639
    out.println(marker);
640
  }
641
    
642

    
643
  /**
644
   * Prints harvest log entries for this harvest run. Entries may be filtered
645
   * for a particular site, or all entries may be printed.
646
   * 
647
   * @param out            the PrintStream object to write to
648
   * @param maxCodeLevel   the maximum code level that should be printed,
649
   *                       e.g. "warning". Any log entries higher than this
650
   *                       level will not be printed.
651
   * @param siteScheduleID if greater than 0, indicates that the log
652
   *                       entry should only be printed for a particular site
653
   *                       as identified by its siteScheduleID. if 0, then
654
   *                       print output for all sites.
655
   */
656
  void printHarvestLog(PrintStream out, String maxCodeLevel, int siteScheduleID
657
                      ) {
658
    HarvestLog harvestLog;
659
    int logSiteScheduleID;
660
    int nErrors = 0;
661
    String phrase;
662
    
663
    out.println("");
664
    out.println(marker);
665
    out.println(filler);
666
    out.println("*                       LOG ENTRIES");
667
    out.println(filler);
668
    out.println(marker);
669

    
670
    for (int i = 0; i < harvestLogList.size(); i++) {
671
      harvestLog = (HarvestLog) harvestLogList.get(i);
672
      logSiteScheduleID = harvestLog.getSiteScheduleID();
673
      if ((siteScheduleID == 0) || (siteScheduleID == logSiteScheduleID)) {
674
        harvestLog.printOutput(out, maxCodeLevel);
675
      }
676
    }
677
  }
678
    
679

    
680
  /**
681
   * Prints the site schedule data for a given site.
682
   * 
683
   * @param out              the PrintStream to write to
684
   * @param siteScheduleID   the primary key in the HARVEST_SITE_SCHEDULE table
685
   */
686
  void printHarvestSiteSchedule(PrintStream out, int siteScheduleID) {
687
    HarvestSiteSchedule harvestSiteSchedule;
688

    
689
    for (int i = 0; i < harvestSiteScheduleList.size(); i++) {
690
      harvestSiteSchedule = (HarvestSiteSchedule)harvestSiteScheduleList.get(i);
691
      if (harvestSiteSchedule.siteScheduleID == siteScheduleID) {
692
        harvestSiteSchedule.printOutput(out);
693
      }
694
    }
695
  }
696
  
697

    
698
  /**
699
   * Prunes old records from the HARVEST_LOG table. Records are removed if
700
   * their HARVEST_DATE is older than a given number of days, as stored in the
701
   * logPeriod object field. First deletes records from the HARVEST_DETAIL_LOG
702
   * table that reference the to-be-pruned entries in the HARVEST_LOG table.
703
   */
704
  private void pruneHarvestLog() {
705
    long currentTime = harvestStartTime.getTime(); // time in milliseconds
706
    Date dateLastLog;                    // Prune everything prior to this date
707
    String deleteString;
708
    String deleteStringDetailLog;
709
    long delta;
710
    final long millisecondsPerDay = (1000 * 60 * 60 * 24);
711
    int recordsDeleted;
712
    int recordsDeletedDetail = 0;
713
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
714
    String dateString;
715
    ResultSet rs;
716
    String selectString;
717
    Statement stmt;
718
    long timeLastLog = 0;
719
    SQLWarning warn;
720
     
721
    delta = logPeriod * millisecondsPerDay;
722
    deleteString = "DELETE FROM HARVEST_LOG WHERE HARVEST_DATE < ";
723
    selectString="SELECT HARVEST_LOG_ID FROM HARVEST_LOG WHERE HARVEST_DATE < ";
724
    deleteStringDetailLog = 
725
                       "DELETE FROM HARVEST_DETAIL_LOG WHERE HARVEST_LOG_ID = ";
726
    timeLastLog = currentTime - delta;
727
    dateLastLog = new Date(timeLastLog);
728
    dateString = "'" + simpleDateFormat.format(dateLastLog) + "'";
729
    deleteString += dateString;
730
    selectString += dateString;
731

    
732
    try {
733
      System.out.println(
734
                "Pruning log entries from HARVEST_DETAIL_LOG and HARVEST_LOG:");
735

    
736
      /* Get the list of entries that need to be pruned from the HARVEST_LOG
737
       * table.
738
       */
739
      stmt = conn.createStatement();                            
740
      rs = stmt.executeQuery(selectString);
741
      warn = rs.getWarnings();
742

    
743
      if (warn != null) {
744
        System.out.println("\n---Warning---\n");
745

    
746
        while (warn != null) {
747
          System.out.println("Message: " + warn.getMessage());
748
          System.out.println("SQLState: " + warn.getSQLState());
749
          System.out.print("Vendor error code: ");
750
          System.out.println(warn.getErrorCode());
751
          System.out.println("");
752
          warn = warn.getNextWarning();
753
        }
754
      } 
755

    
756
      /* Delete any entries from the HARVEST_DETAIL_LOG which reference
757
       * HARVEST_LOG_IDs that are about to be pruned. HARVEST_DETAIL_LOG must
758
       * be pruned first because its records have a child relationship to those
759
       * in HARVEST_LOG.
760
       */
761
      while (rs.next()) {
762
        harvestLogID = rs.getInt("HARVEST_LOG_ID");
763
        stmt = conn.createStatement();                            
764
        recordsDeleted = stmt.executeUpdate(deleteStringDetailLog + 
765
                                            harvestLogID);
766
        recordsDeletedDetail += recordsDeleted;
767
        stmt.close();
768
      }
769
 
770
      /* Now prune entries from the HARVEST_LOG table using a single update.
771
       */
772
      stmt = conn.createStatement();                            
773
      recordsDeleted = stmt.executeUpdate(deleteString);
774
      stmt.close();
775

    
776
      System.out.println("  " + recordsDeletedDetail + 
777
                         " records deleted from HARVEST_DETAIL_LOG");
778
      System.out.println("  " + recordsDeleted + 
779
                         " records deleted from HARVEST_LOG");
780
    }
781
    catch (SQLException e) {
782
      System.out.println("SQLException: " + e.getMessage());
783
    }
784
  }
785
    
786

    
787
  /**
788
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
789
   * a HarvestSiteSchedule object for each row in the table.
790
   */
791
  private void readHarvestSiteSchedule() {
792
    HarvestSiteSchedule harvestSiteSchedule;
793
    ResultSet rs;
794
    SQLWarning warn;
795
    Statement stmt;
796

    
797
    String contactEmail;
798
    String dateLastHarvest;
799
    String dateNextHarvest;
800
    String documentListURL;
801
    String ldapDN;
802
    String ldapPwd;
803
    int siteScheduleID;
804
    String unit;
805
    int updateFrequency;
806
        
807
    try {
808
      // Read the HARVEST_SITE_SCHEDULE table
809
      stmt = conn.createStatement();
810
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
811
      warn = rs.getWarnings();
812

    
813
      if (warn != null) {
814
        System.out.println("\n---Warning---\n");
815

    
816
        while (warn != null) {
817
          System.out.println("Message: " + warn.getMessage());
818
          System.out.println("SQLState: " + warn.getSQLState());
819
          System.out.print("Vendor error code: ");
820
          System.out.println(warn.getErrorCode());
821
          System.out.println("");
822
          warn = warn.getNextWarning();
823
        }
824
      }
825
     
826
      while (rs.next()) {
827
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
828
        documentListURL = rs.getString("DOCUMENTLISTURL");
829
        ldapDN = rs.getString("LDAPDN");
830
        ldapPwd = rs.getString("LDAPPWD");
831
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
832
        dateLastHarvest = rs.getString("DATELASTHARVEST");
833
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
834
        unit = rs.getString("UNIT");
835
        contactEmail = rs.getString("CONTACT_EMAIL");
836
        
837
        warn = rs.getWarnings();
838

    
839
        if (warn != null) {
840
          System.out.println("\n---Warning---\n");
841
      
842
          while (warn != null) {
843
            System.out.println("Message: " + warn.getMessage());
844
            System.out.println("SQLState: " + warn.getSQLState());
845
            System.out.print("Vendor error code: ");
846
            System.out.println(warn.getErrorCode());
847
            System.out.println("");
848
            warn = warn.getNextWarning();
849
          }
850
        }
851
      
852
        harvestSiteSchedule = new HarvestSiteSchedule(this,
853
                                                      siteScheduleID,
854
                                                      documentListURL,
855
                                                      ldapDN,
856
                                                      ldapPwd,
857
                                                      dateNextHarvest,
858
                                                      dateLastHarvest,
859
                                                      updateFrequency,
860
                                                      unit,
861
                                                      contactEmail
862
                                                     );
863
        harvestSiteScheduleList.add(harvestSiteSchedule);
864
      }
865
      
866
      rs.close();
867
      stmt.close();
868
    }
869
    catch (SQLException e) {
870
      System.out.println("Database access failed " + e);
871
      System.exit(1);
872
    }
873
    
874
  }
875
    
876

    
877
  /**
878
   * Sends a report to the Harvester Administrator. The report prints each log
879
   * entry pertaining to this harvest run.
880
   *
881
   * @param maxCodeLevel  the maximum code level that should be printed,
882
   *                      e.g. "warning". Any log entries higher than this
883
   *                      level will not be printed.
884
   */
885
  void reportToAdministrator(String maxCodeLevel) {
886
    PrintStream body;
887
    String from = harvesterAdministrator;
888
    String[] fromArray;
889
    MailMessage msg;
890
    int siteScheduleID = 0;
891
    String subject = "Report from Metacat Harvester: " + timestamp;
892
    String to = harvesterAdministrator;
893
    
894
    if (!to.equals("")) {
895
      System.out.println("Sending report to Harvester Administrator at address "
896
                         + harvesterAdministrator);
897
      
898
      try {
899
        msg = new MailMessage(smtpServer);
900

    
901
        if (from.indexOf(',') > 0) {
902
          fromArray = from.split(",");
903
          
904
          for (int i = 0; i < fromArray.length; i++) {
905
            if (i == 0) {
906
              msg.from(fromArray[i]);
907
            }
908
            
909
            msg.to(fromArray[i]);            
910
          }
911
        }
912
        else if (from.indexOf(';') > 0) {
913
          fromArray = from.split(";");
914

    
915
          for (int i = 0; i < fromArray.length; i++) {
916
            if (i == 0) {
917
              msg.from(fromArray[i]);
918
            }
919
            
920
            msg.to(fromArray[i]);            
921
          }
922
        }
923
        else {
924
          msg.from(from);
925
          msg.to(to);
926
        }
927
        
928
        msg.setSubject(subject);
929
        body = msg.getPrintStream();
930
        printHarvestHeader(body, siteScheduleID);
931
        printHarvestLog(body, maxCodeLevel, siteScheduleID);
932
        msg.sendAndClose();
933
      }
934
      catch (IOException e) {
935
        System.out.println("There was a problem sending email to " + to);
936
        System.out.println("IOException: " + e.getMessage());
937
      }
938
    }
939
  }
940
  
941

    
942
  /**
943
   * Sets the harvest start time for this harvest run.
944
   * 
945
   * @param date
946
   */
947
  public void setHarvestStartTime(Date date) {
948
    harvestStartTime = date;
949
  }
950
    
951

    
952
  /**
953
   * Shuts down Harvester. Performs cleanup operations such as logging out
954
   * of Metacat and disconnecting from the database.
955
   */
956
  private void shutdown() {
957
    String maxCodeLevel = "debug";  // Print all log entries from level 1
958
                                    // ("error") to level 5 ("debug")
959
    int siteScheduleID = 0;
960

    
961
    // Log shutdown operation
962
    System.out.println("Shutting Down Harvester");
963
    addLogEntry(0, "Shutting Down Harvester", "HarvesterShutdown", 0, null, "");
964
    pruneHarvestLog();
965
    closeConnection();
966
    // Print log to standard output and then email the Harvester administrator
967
    printHarvestLog(System.out, maxCodeLevel, siteScheduleID);
968
    reportToAdministrator(maxCodeLevel);      // Send a copy to harvester admin
969
  }
970
    
971

    
972
    /**
973
	 * Initializes Harvester at startup. Connects to the database and to Metacat.
974
	 * 
975
	 * @param nHarvests        the nth harvest
976
	 * @param maxHarvests      the maximum number of harvests that this process
977
	 *                         can run
978
	 */
979
	private void startup(int nHarvests, int maxHarvests) {
980
		Boolean ctm;
981
		Integer lp;
982
		String metacatURL;
983
		Date now = new Date();
984

    
985
		timestamp = now.toString();
986
		System.out.println(Harvester.marker);
987
		System.out.print(timestamp + ": Starting Next Harvest");
988
		if (maxHarvests > 0) {
989
			System.out.print(" (" + nHarvests + "/" + maxHarvests + ")");
990
		}
991
		System.out.print("\n");
992
		try {
993
			ctm = Boolean.valueOf(PropertyService.getProperty("connectToMetacat"));
994
			connectToMetacat = ctm.booleanValue();
995
			harvesterAdministrator = PropertyService
996
					.getProperty("harvesterAdministrator");
997
			smtpServer = PropertyService.getProperty("smtpServer");
998

    
999
			lp = Integer.valueOf(PropertyService.getProperty("logPeriod"));
1000
			logPeriod = lp.intValue();
1001
		} catch (NumberFormatException e) {
1002
			System.err.println("NumberFormatException: Error parsing logPeriod "
1003
					+ logPeriod + e.getMessage());
1004
			System.err.println("Defaulting to logPeriod of 90 days");
1005
			logPeriod = 90;
1006
		} catch (PropertyNotFoundException pnfe) {
1007
			System.out.println("PropertyNotFoundException: Error getting property: "
1008
					+ pnfe.getMessage());
1009
			return;
1010
		}
1011

    
1012
		conn = getConnection();
1013
		initLogIDs();
1014
		setHarvestStartTime(now);
1015
		// Log startup operation
1016
		addLogEntry(0, "Starting Up Harvester", "HarvesterStartup", 0, null, "");
1017

    
1018
		if (connectToMetacat()) {
1019
			try {
1020
				metacatURL = SystemUtil.getServletURL();
1021
				System.out.println("Connecting to Metacat: " + metacatURL);
1022
				metacat = MetacatFactory.createMetacatConnection(metacatURL);
1023
			} catch (MetacatInaccessibleException e) {
1024
				System.out.println("Metacat connection failed." + e.getMessage());
1025
			} catch (Exception e) {
1026
				System.out.println("Metacat connection failed." + e.getMessage());
1027
			}
1028
		}
1029
	}
1030

    
1031
}
(6-6/11)