Project

General

Profile

1
/*
2
 * HarvestSiteSchedule.java
3
 *
4
 * Created on January 14, 2004, 4:47 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import com.oreilly.servlet.MailMessage;
10
import java.io.FileNotFoundException;
11
import java.io.IOException;
12
import java.io.InputStream;
13
import java.io.InputStreamReader;
14
import java.io.PrintStream;
15
import java.io.Reader;
16
import java.net.MalformedURLException;
17
import java.net.URL;
18
import java.sql.Connection;
19
import java.sql.SQLException;
20
import java.sql.Statement;
21
import java.text.DateFormat;
22
import java.text.ParseException;
23
import java.text.SimpleDateFormat;
24
import java.util.ArrayList;
25
import java.util.Date;
26
import javax.xml.parsers.ParserConfigurationException;
27
import org.xml.sax.Attributes;
28
import org.xml.sax.ContentHandler;
29
import org.xml.sax.ErrorHandler;
30
import org.xml.sax.InputSource;
31
import org.xml.sax.SAXException;
32
import org.xml.sax.SAXParseException;
33
import org.xml.sax.XMLReader;
34
import org.xml.sax.helpers.DefaultHandler;
35
import org.xml.sax.helpers.XMLReaderFactory;
36

    
37
import edu.ucsb.nceas.metacat.client.Metacat;
38
import edu.ucsb.nceas.metacat.client.MetacatException;
39
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
40

    
41

    
42
/**
43
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
44
 * table, determining when and how to harvest the documents for a given site.
45
 * 
46
 * @author  costa
47
 */
48
class HarvestSiteSchedule {
49
    
50
  private String contactEmail;
51
  private String dateLastHarvest;
52
  private String dateNextHarvest;
53
  private long delta;
54
  private String documentListURL;
55
  private Harvester harvester;
56
  private ArrayList harvestDocumentList = new ArrayList();
57
  private String harvestSiteEndTime;
58
  private String harvestSiteStartTime;
59
  private String ldapDN;
60
  private String ldapPwd;
61
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
62
  int siteScheduleID;
63
  private String unit;
64
  private int updateFrequency;
65
    
66
  /**
67
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
68
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
69
   * 
70
   * @param harvester       the parent Harvester object
71
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
72
   * @param documentListURL the value of the DOCUMENTLISTURL field
73
   * @param ldapDN          the value of the LDAPDN field
74
   * @param ldapPwd    the value of the LDAPPASSWORD field
75
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
76
   * @param dateLastHarvest the value of the DATELASTHARVEST field
77
   * @param updateFrequency the value of the UPDATEFREQUENCY field
78
   * @param unit            the value of the UNIT field
79
   * @param contactEmail    the value of the CONTACT_EMAIL field
80
   */
81
  public HarvestSiteSchedule(
82
                              Harvester harvester,
83
                              int    siteScheduleID,
84
                              String documentListURL,
85
                              String ldapDN,
86
                              String ldapPwd,
87
                              String dateNextHarvest,
88
                              String dateLastHarvest,
89
                              int    updateFrequency,
90
                              String unit,
91
                              String contactEmail
92
                            )
93
  {
94
    this.harvester = harvester;
95
    this.siteScheduleID = siteScheduleID;
96
    this.documentListURL = documentListURL;
97
    this.ldapDN = ldapDN;
98
    this.ldapPwd = ldapPwd;
99
    this.dateNextHarvest = dateNextHarvest;
100
    this.dateLastHarvest = dateLastHarvest;
101
    this.updateFrequency = updateFrequency;
102
    this.unit = unit;
103
    this.contactEmail = contactEmail;
104
    
105
    // Calculate the value of delta, the number of milliseconds between the
106
    // last harvest date and the next harvest date.
107
    delta = updateFrequency * millisecondsPerDay;
108
    
109
    if (unit.equals("weeks")) {
110
      delta *= 7;
111
    }
112
    else if (unit.equals("months")) {
113
      delta *= 30;
114
    }
115
  }
116
  
117
  
118
  /**
119
   * Updates the DATELASTHARVEST and DATENEXTHARVEST values of the 
120
   * HARVEST_SITE_SCHEDULE table after a harvest operation has completed.
121
   * Calculates the date of the next harvest based on today's date and the 
122
   * update frequency.
123
   */
124
  private void dbUpdateHarvestDates() {
125
		Connection conn;
126
    long currentTime;                    // Current time in milliseconds
127
    Date dateNextHarvest;                // Date of next harvest
128
    String lastHarvest;
129
    String nextHarvest;
130
    Date now = new Date();
131
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
132
		Statement stmt;
133
    long timeNextHarvest;
134
    
135
    conn = harvester.conn;
136
    now = new Date();
137
    currentTime = now.getTime();
138
    timeNextHarvest = currentTime + delta;
139
    dateNextHarvest = new Date(timeNextHarvest);
140
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
141
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
142
	
143
		try {
144
			stmt = conn.createStatement();							
145
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
146
                         nextHarvest +
147
                         " WHERE SITE_SCHEDULE_ID = " +
148
                         siteScheduleID);
149
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
150
                         lastHarvest +
151
                         " WHERE SITE_SCHEDULE_ID = " +
152
                         siteScheduleID);
153
			stmt.close();
154
		}
155
    catch(SQLException e) {
156
			System.out.println("SQLException: " + e.getMessage());
157
		}
158
  }
159
    
160

    
161
  /**
162
   * Boolean to determine whether this site is currently due for its next
163
   * harvest.
164
   * 
165
   * @retrun     true if due for harvest, otherwise false
166
   */
167
  private boolean dueForHarvest() {
168
    boolean dueForHarvest = false;
169
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
170
    Date now = new Date();
171
    Date dnh;                          // Date of next harvest
172
    long currentTime = now.getTime();  // Current time in milliseconds
173
    long timeNextHarvest = 0;
174
    
175
    try {
176
      dnh = dateFormat.parse(dateNextHarvest);
177
      timeNextHarvest = dnh.getTime();
178
      
179
      if (timeNextHarvest < currentTime) {
180
        dueForHarvest = true;
181
        System.out.println("Due for harvest: " + documentListURL);
182
      }
183
      else {
184
        System.out.println("Not due for harvest: " + documentListURL);
185
      }
186
    }
187
    catch (ParseException e) {
188
      System.out.println("Error parsing date: " + e.getMessage());
189
    }
190
    
191
    return dueForHarvest;
192
  }
193

    
194

    
195
  /**
196
   * Harvests each document in the site document list.
197
   * 
198
   * @throws SAXException
199
   * @throws IOException
200
   * @throws ParserConfigurationException
201
   */
202
  public void harvestDocumentList() {
203
    HarvestDocument harvestDocument;
204
    boolean success;
205
    
206
    if (dueForHarvest()) {
207
      try {
208
        success = parseDocumentList();
209

    
210
        /* If the document list was validated, then proceed with harvesting
211
         * the documents
212
         */
213
        if (success) {
214
          metacatLogin();
215
        
216
          for (int i = 0; i < harvestDocumentList.size(); i++) {
217
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
218
          
219
            if (harvestDocument != null) {
220
              harvestDocument.harvestDocument();
221
            }
222
          }
223

    
224
          metacatLogout();      
225
          dbUpdateHarvestDates();  // Update the schedule
226
        }
227
      }
228
      catch (ParserConfigurationException e) {
229
        System.out.println("ParserConfigurationException: " + e.getMessage());
230
      }
231
      
232
      reportToSite();
233
    }
234
  }
235

    
236

    
237
  /**
238
   * Login to Metacat using the ldapDN and ldapPwd
239
   */
240
  private void metacatLogin() {
241
    Metacat metacat = harvester.metacat;
242

    
243
    if (harvester.connectToMetacat()) {
244
      try {
245
        System.out.println("Logging in to Metacat: " + ldapDN);
246
        metacat.login(ldapDN, ldapPwd);
247
        //System.out.println("Metacat login response: " + response);
248
        //sessionId = metacat.getSessionId();
249
        //System.out.println("Session ID: " + sessionId);
250
      } 
251
      catch (MetacatInaccessibleException e) {
252
        System.out.println("Metacat login failed." + e.getMessage());
253
      } 
254
      catch (Exception e) {
255
        System.out.println("Metacat login failed." + e.getMessage());
256
      }
257
    }    
258
  }
259
  
260
  
261
  /**
262
   * Logout from Metacat
263
   */
264
  private void metacatLogout() {
265
    Metacat metacat = harvester.metacat;
266

    
267
    if (harvester.connectToMetacat()) {
268
      try {    
269
        // Log out from the Metacat session
270
        System.out.println("Logging out from Metacat");
271
        metacat.logout();
272
      }
273
      catch (MetacatInaccessibleException e) {
274
        System.out.println("Metacat inaccessible: " + e.getMessage());
275
      }
276
      catch (MetacatException e) {
277
        System.out.println("Metacat exception: " + e.getMessage());
278
      }
279
    }
280
  }
281
  
282

    
283
  /**
284
   * Parse the site document list to find out which documents to harvest.
285
   * 
286
   * @return  true if successful, otherwise false
287
   */
288
  private boolean parseDocumentList() 
289
          throws ParserConfigurationException {
290
    DocumentListHandler documentListHandler = new DocumentListHandler();
291
    InputStream inputStream;
292
    InputStreamReader inputStreamReader;
293
    String schemaLocation = ".";
294
    boolean success = false;
295
    URL url;
296

    
297
    try {
298
      url = new URL(documentListURL);
299
      inputStream = url.openStream();
300
      harvester.addLogEntry(0,
301
                            "Retrieved: " + documentListURL,
302
                            "GetDocListSuccess",
303
                            siteScheduleID,
304
                            null,
305
                            "");
306
      inputStreamReader = new InputStreamReader(inputStream);
307
      documentListHandler.runParser(inputStreamReader, schemaLocation);
308
      harvester.addLogEntry(0,
309
                            "Validated: " + documentListURL,
310
                            "ValidateDocListSuccess",
311
                            siteScheduleID,
312
                            null,
313
                            "");
314
      success = true;
315
    }
316
    catch (MalformedURLException e){
317
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
318
                            "GetDocListError", siteScheduleID, null, "");
319
    }
320
    catch (FileNotFoundException e) {
321
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
322
                            "GetDocListError", siteScheduleID, null, "");
323
    }
324
    catch (SAXException e) {
325
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
326
                            "ValidateDocListError", siteScheduleID, null, "");
327
    }
328
    catch (ClassNotFoundException e) {
329
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
330
                            "ValidateDocListError", siteScheduleID, null, "");
331
    }
332
    catch (IOException e) {
333
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
334
                            "GetDocListError", siteScheduleID, null, "");
335
    }
336
    
337
    return success;
338
  }
339

    
340

    
341
  /**
342
   * Prints the data that is stored in this HarvestSiteSchedule object.
343
   * 
344
   * @param out   the PrintStream to write to
345
   */
346
  void printOutput(PrintStream out) {
347
    out.println("* siteScheduleID:       " + siteScheduleID);
348
    out.println("* documentListURL:      " + documentListURL);
349
    out.println("* ldapDN:               " + ldapDN);
350
    out.println("* dateNextHarvest:      " + dateNextHarvest);
351
    out.println("* dateLastHarvest:      " + dateLastHarvest);
352
    out.println("* updateFrequency:      " + updateFrequency);
353
    out.println("* unit:                 " + unit);
354
    out.println("* contactEmail:         " + contactEmail);
355
  }
356
  
357

    
358
  /**
359
   * Sends a report to the site summarizing the results of the harvest
360
   * operation.
361
   */
362
  void reportToSite() {
363
    PrintStream body;
364
    String from = "Metacat Harvester";
365
    MailMessage msg;
366
    String subject = "Report from Metacat Harvester";
367
    String to = contactEmail;
368
    
369
    if (!to.equals("")) {
370
      System.out.println("Sending report to siteScheduleID=" + siteScheduleID +
371
                         " at address: " + contactEmail);
372
      
373
      try {
374
        msg = new MailMessage();
375
        msg.from(from);
376
        msg.to(to);
377
        msg.setSubject(subject);
378
        body = msg.getPrintStream();
379
        
380
      }
381
      catch (IOException e) {
382
        System.out.println("There was a problem sending email to " + to);
383
        System.out.println("IOException: " + e.getMessage());
384
      }
385
      
386
    }
387
  }
388
    
389

    
390
  /**
391
   * This inner class extends DefaultHandler. It parses the document list,
392
   * creating a new HarvestDocument object every time it finds a </Document>
393
   * end tag.
394
   */
395
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
396
  
397
    public String scope;
398
    public int identifier;
399
    public String identifierString;
400
    public String documentType;
401
    public int revision;
402
    public String revisionString;
403
    public String documentURL;
404
    private String currentQname;
405
    public final static String DEFAULT_PARSER = 
406
           "org.apache.xerces.parsers.SAXParser";
407
    private boolean schemaValidate = true;
408
	
409

    
410
	  /**
411
     * This method is called for any plain text within an element.
412
     * It parses the value for any of the following elements:
413
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
414
     * 
415
     * @param ch          the character array holding the parsed text
416
     * @param start       the start index
417
     * @param length      the text length
418
     * 
419
     */
420
    public void characters (char ch[], int start, int length) {
421
      String s = new String(ch, start, length);
422
 
423
      if (length > 0) {           
424
        if (currentQname.equals("scope")) {
425
          scope += s;
426
        }
427
        else if (currentQname.equals("identifier")) {
428
          identifierString += s;
429
        }
430
        else if (currentQname.equals("revision")) {
431
          revisionString += s;
432
        }
433
        else if (currentQname.equals("documentType")) {
434
          documentType += s;
435
        }
436
        else if (currentQname.equals("documentURL")) {
437
          documentURL += s;
438
        }
439
      }
440
    }
441

    
442

    
443
    /** 
444
     * Handles an end-of-document event.
445
     */
446
    public void endDocument () {
447
      System.out.println("Finished parsing " + documentListURL);
448
    }
449

    
450

    
451
    /** 
452
     * Handles an end-of-element event. If the end tag is </Document>, then
453
     * creates a new HarvestDocument object and pushes it to the document
454
     * list.
455
     * 
456
     * @param uri
457
     * @param localname
458
     * @param qname
459
     */
460
    public void endElement(String uri, 
461
                           String localname,
462
                           String qname) {
463
      
464
      HarvestDocument harvestDocument;
465
      
466
      if (qname.equals("identifier")) {
467
        identifier = Integer.parseInt(identifierString);
468
      }
469
      else if (qname.equals("revision")) {
470
        revision = Integer.parseInt(revisionString);
471
      }
472
      else if (qname.equals("document")) {
473
        harvestDocument = new HarvestDocument(
474
                                              harvester,
475
                                              HarvestSiteSchedule.this,
476
                                              scope,
477
                                              identifier,
478
                                              revision,
479
                                              documentType,
480
                                              documentURL
481
                                             );
482
        harvestDocumentList.add(harvestDocument);
483
      }
484

    
485
      currentQname = "";
486
    }
487

    
488

    
489
    /**
490
     * Method for handling errors during a parse
491
     *
492
     * @param exception         The parsing error
493
     * @exception SAXException  Description of Exception
494
     */
495
     public void error(SAXParseException e) throws SAXParseException {
496
        System.out.println("SAXParseException: " + e.getMessage());
497
        throw e;
498
    }
499

    
500

    
501
    /**
502
     * Run the validating parser
503
     *
504
     * @param xml             the xml stream to be validated
505
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
506
     * @exception IOException thrown when test files can't be opened
507
     * @exception ClassNotFoundException thrown when SAX Parser class not found
508
     * @exception SAXException
509
     * @exception SAXParserException
510
     */
511
    public void runParser(Reader xml, String schemaLocation)
512
           throws IOException, ClassNotFoundException,
513
                  SAXException, SAXParseException {
514

    
515
      // Get an instance of the parser
516
      XMLReader parser;
517

    
518
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
519
      // Set Handlers in the parser
520
      parser.setContentHandler((ContentHandler)this);
521
      parser.setErrorHandler((ErrorHandler)this);
522
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
523
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
524
      parser.setFeature("http://xml.org/sax/features/validation", true);
525
      parser.setProperty(
526
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
527
              schemaLocation);
528

    
529
      if (schemaValidate) {
530
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
531
                          true);
532
      }
533
    
534
      // Parse the document
535
      parser.parse(new InputSource(xml));
536
    }
537
    /**
538
     * Handles a start-of-document event.
539
     */
540
    public void startDocument () {
541
      System.out.println("Started parsing " + documentListURL);
542
    }
543

    
544

    
545
    /** 
546
     * Handles a start-of-element event.
547
     * 
548
     * @param uri
549
     * @param localname
550
     * @param qname
551
     * @param attributes
552
     */
553
    public void startElement(String uri, 
554
                             String localname,
555
                             String qname,
556
                             Attributes attributes) {
557
      
558
      currentQname = qname;
559

    
560
      if (qname.equals("scope")) {
561
        scope = "";
562
      }
563
      else if (qname.equals("identifier")) {
564
        identifierString = "";
565
      }
566
      else if (qname.equals("revision")) {
567
        revisionString = "";
568
      }
569
      else if (qname.equals("documentType")) {
570
        documentType = "";
571
      }
572
      else if (qname.equals("documentURL")) {
573
        documentURL = "";
574
      }
575
    }
576
  }
577
}
(4-4/9)