Project

General

Profile

1
/*
2
 * HarvestSiteSchedule.java
3
 *
4
 * Created on January 14, 2004, 4:47 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.FileNotFoundException;
10
import java.io.IOException;
11
import java.io.InputStream;
12
import java.io.InputStreamReader;
13
import java.io.Reader;
14
import java.net.MalformedURLException;
15
import java.net.URL;
16
import java.sql.Connection;
17
import java.sql.SQLException;
18
import java.sql.Statement;
19
import java.text.DateFormat;
20
import java.text.ParseException;
21
import java.text.SimpleDateFormat;
22
import java.util.ArrayList;
23
import java.util.Date;
24
import javax.xml.parsers.ParserConfigurationException;
25
import org.xml.sax.Attributes;
26
import org.xml.sax.ContentHandler;
27
import org.xml.sax.ErrorHandler;
28
import org.xml.sax.InputSource;
29
import org.xml.sax.SAXException;
30
import org.xml.sax.SAXParseException;
31
import org.xml.sax.XMLReader;
32
import org.xml.sax.helpers.DefaultHandler;
33
import org.xml.sax.helpers.XMLReaderFactory;
34

    
35
import edu.ucsb.nceas.metacat.client.Metacat;
36
import edu.ucsb.nceas.metacat.client.MetacatException;
37
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
38

    
39

    
40
/**
41
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
42
 * table, determining when and how to harvest the documents for a given site.
43
 * 
44
 * @author  costa
45
 */
46
class HarvestSiteSchedule {
47
    
48
  private String contactEmail;
49
  private String dateLastHarvest;
50
  private String dateNextHarvest;
51
  private long delta;
52
  private String documentListURL;
53
  private Harvester harvester;
54
  private ArrayList harvestDocumentList = new ArrayList();
55
  private String harvestSiteEndTime;
56
  private String harvestSiteStartTime;
57
  private String ldapDN;
58
  private String ldapPwd;
59
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
60
  int siteScheduleID;
61
  private String unit;
62
  private int updateFrequency;
63
    
64
  /**
65
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
66
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
67
   * 
68
   * @param harvester       the parent Harvester object
69
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
70
   * @param documentListURL the value of the DOCUMENTLISTURL field
71
   * @param ldapDN          the value of the LDAPDN field
72
   * @param ldapPwd    the value of the LDAPPASSWORD field
73
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
74
   * @param dateLastHarvest the value of the DATELASTHARVEST field
75
   * @param updateFrequency the value of the UPDATEFREQUENCY field
76
   * @param unit            the value of the UNIT field
77
   * @param contactEmail    the value of the CONTACT_EMAIL field
78
   */
79
  public HarvestSiteSchedule(
80
                              Harvester harvester,
81
                              int    siteScheduleID,
82
                              String documentListURL,
83
                              String ldapDN,
84
                              String ldapPwd,
85
                              String dateNextHarvest,
86
                              String dateLastHarvest,
87
                              int    updateFrequency,
88
                              String unit,
89
                              String contactEmail
90
                            )
91
  {
92
    this.harvester = harvester;
93
    this.siteScheduleID = siteScheduleID;
94
    this.documentListURL = documentListURL;
95
    this.ldapDN = ldapDN;
96
    this.ldapPwd = ldapPwd;
97
    this.dateNextHarvest = dateNextHarvest;
98
    this.dateLastHarvest = dateLastHarvest;
99
    this.updateFrequency = updateFrequency;
100
    this.unit = unit;
101
    this.contactEmail = contactEmail;
102
    
103
    // Calculate the value of delta, the number of milliseconds between the
104
    // last harvest date and the next harvest date.
105
    delta = updateFrequency * millisecondsPerDay;
106
    
107
    if (unit.equals("weeks")) {
108
      delta *= 7;
109
    }
110
    else if (unit.equals("months")) {
111
      delta *= 30;
112
    }
113
  }
114
  
115
  
116
  /**
117
   * Updates the DATELASTHARVEST and DATENEXTHARVEST values of the 
118
   * HARVEST_SITE_SCHEDULE table after a harvest operation has completed.
119
   * Calculates the date of the next harvest based on today's date and the 
120
   * update frequency.
121
   */
122
  private void dbUpdateHarvestDates() {
123
		Connection conn;
124
    long currentTime;                    // Current time in milliseconds
125
    Date dateNextHarvest;                // Date of next harvest
126
    String lastHarvest;
127
    String nextHarvest;
128
    Date now = new Date();
129
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
130
		Statement stmt;
131
    long timeNextHarvest;
132
    
133
    conn = harvester.conn;
134
    now = new Date();
135
    currentTime = now.getTime();
136
    timeNextHarvest = currentTime + delta;
137
    dateNextHarvest = new Date(timeNextHarvest);
138
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
139
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
140
	
141
		try {
142
			stmt = conn.createStatement();							
143
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
144
                         nextHarvest +
145
                         " WHERE SITE_SCHEDULE_ID = " +
146
                         siteScheduleID);
147
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
148
                         lastHarvest +
149
                         " WHERE SITE_SCHEDULE_ID = " +
150
                         siteScheduleID);
151
			stmt.close();
152
		}
153
    catch(SQLException e) {
154
			System.out.println("SQLException: " + e.getMessage());
155
		}
156
  }
157
    
158

    
159
  /**
160
   * Boolean to determine whether this site is currently due for its next
161
   * harvest.
162
   * 
163
   * @retrun     true if due for harvest, otherwise false
164
   */
165
  private boolean dueForHarvest() {
166
    boolean dueForHarvest = false;
167
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
168
    Date now = new Date();
169
    Date dnh;                          // Date of next harvest
170
    long currentTime = now.getTime();  // Current time in milliseconds
171
    long timeNextHarvest = 0;
172
    
173
    try {
174
      dnh = dateFormat.parse(dateNextHarvest);
175
      timeNextHarvest = dnh.getTime();
176
      
177
      if (timeNextHarvest < currentTime) {
178
        dueForHarvest = true;
179
        System.out.println("Due for harvest: " + documentListURL);
180
      }
181
      else {
182
        System.out.println("Not due for harvest: " + documentListURL);
183
      }
184
    }
185
    catch (ParseException e) {
186
      System.out.println("Error parsing date: " + e.getMessage());
187
    }
188
    
189
    return dueForHarvest;
190
  }
191

    
192

    
193
  /**
194
   * Harvests each document in the site document list.
195
   * 
196
   * @throws SAXException
197
   * @throws IOException
198
   * @throws ParserConfigurationException
199
   */
200
  public void harvestDocumentList() {
201
    HarvestDocument harvestDocument;
202
    boolean success;
203
    
204
    if (dueForHarvest()) {
205
      try {
206
        success = parseDocumentList();
207

    
208
        /* If the document list was validated, then proceed with harvesting
209
         * the documents
210
         */
211
        if (success) {
212
          metacatLogin();
213
        
214
          for (int i = 0; i < harvestDocumentList.size(); i++) {
215
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
216
          
217
            if (harvestDocument != null) {
218
              harvestDocument.harvestDocument();
219
            }
220
          }
221

    
222
          metacatLogout();      
223
          dbUpdateHarvestDates();  // Update the schedule
224
        }
225
      }
226
      catch (ParserConfigurationException e) {
227
        System.out.println("ParserConfigurationException: " + e.getMessage());
228
      }
229
      
230
      reportToSite();
231
    }
232
  }
233

    
234

    
235
  /**
236
   * Login to Metacat using the ldapDN and ldapPwd
237
   */
238
  private void metacatLogin() {
239
    Metacat metacat = harvester.metacat;
240

    
241
    if (harvester.connectToMetacat()) {
242
      try {
243
        System.out.println("Logging in to Metacat: " + ldapDN);
244
        metacat.login(ldapDN, ldapPwd);
245
        //System.out.println("Metacat login response: " + response);
246
        //sessionId = metacat.getSessionId();
247
        //System.out.println("Session ID: " + sessionId);
248
      } 
249
      catch (MetacatInaccessibleException e) {
250
        System.out.println("Metacat login failed." + e.getMessage());
251
      } 
252
      catch (Exception e) {
253
        System.out.println("Metacat login failed." + e.getMessage());
254
      }
255
    }    
256
  }
257
  
258
  
259
  /**
260
   * Logout from Metacat
261
   */
262
  private void metacatLogout() {
263
    Metacat metacat = harvester.metacat;
264

    
265
    if (harvester.connectToMetacat()) {
266
      try {    
267
        // Log out from the Metacat session
268
        System.out.println("Logging out from Metacat");
269
        metacat.logout();
270
      }
271
      catch (MetacatInaccessibleException e) {
272
        System.out.println("Metacat inaccessible: " + e.getMessage());
273
      }
274
      catch (MetacatException e) {
275
        System.out.println("Metacat exception: " + e.getMessage());
276
      }
277
    }
278
  }
279
  
280

    
281
  /**
282
   * Parse the site document list to find out which documents to harvest.
283
   * 
284
   * @return  true if successful, otherwise false
285
   */
286
  private boolean parseDocumentList() 
287
          throws ParserConfigurationException {
288
    DocumentListHandler documentListHandler = new DocumentListHandler();
289
    InputStream inputStream;
290
    InputStreamReader inputStreamReader;
291
    String schemaLocation = ".";
292
    boolean success = false;
293
    URL url;
294

    
295
    try {
296
      url = new URL(documentListURL);
297
      inputStream = url.openStream();
298
      harvester.addLogEntry(0,
299
                            "Retrieved: " + documentListURL,
300
                            "GetDocListSuccess",
301
                            siteScheduleID,
302
                            null,
303
                            "");
304
      inputStreamReader = new InputStreamReader(inputStream);
305
      documentListHandler.runParser(inputStreamReader, schemaLocation);
306
      harvester.addLogEntry(0,
307
                            "Validated: " + documentListURL,
308
                            "ValidateDocListSuccess",
309
                            siteScheduleID,
310
                            null,
311
                            "");
312
      success = true;
313
    }
314
    catch (MalformedURLException e){
315
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
316
                            "GetDocListError", siteScheduleID, null, "");
317
    }
318
    catch (FileNotFoundException e) {
319
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
320
                            "GetDocListError", siteScheduleID, null, "");
321
    }
322
    catch (SAXException e) {
323
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
324
                            "ValidateDocListError", siteScheduleID, null, "");
325
    }
326
    catch (ClassNotFoundException e) {
327
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
328
                            "ValidateDocListError", siteScheduleID, null, "");
329
    }
330
    catch (IOException e) {
331
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
332
                            "GetDocListError", siteScheduleID, null, "");
333
    }
334
    
335
    return success;
336
  }
337

    
338

    
339
  /**
340
   * Prints the data that is stored in this HarvestSiteSchedule object.
341
   */
342
  void printOutput() {
343
    System.out.println("* siteScheduleID:       " + siteScheduleID);
344
    System.out.println("* documentListURL:      " + documentListURL);
345
    System.out.println("* ldapDN:               " + ldapDN);
346
    System.out.println("* dateNextHarvest:      " + dateNextHarvest);
347
    System.out.println("* dateLastHarvest:      " + dateLastHarvest);
348
    System.out.println("* updateFrequency:      " + updateFrequency);
349
    System.out.println("* unit:                 " + unit);
350
    System.out.println("* contactEmail:         " + contactEmail);
351
  }
352
  
353

    
354
  /**
355
   * Sends a report to the site summarizing the results of the harvest
356
   * operation.
357
   */
358
  void reportToSite() {
359
    System.out.println("Sending report to site: " + contactEmail);
360
  }
361
    
362

    
363
  /**
364
   * This inner class extends DefaultHandler. It parses the document list,
365
   * creating a new HarvestDocument object every time it finds a </Document>
366
   * end tag.
367
   */
368
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
369
  
370
    public String scope;
371
    public int identifier;
372
    public String identifierString;
373
    public String documentType;
374
    public int revision;
375
    public String revisionString;
376
    public String documentURL;
377
    private String currentQname;
378
    public final static String DEFAULT_PARSER = 
379
           "org.apache.xerces.parsers.SAXParser";
380
    private boolean schemaValidate = true;
381
	
382

    
383
	  /**
384
     * This method is called for any plain text within an element.
385
     * It parses the value for any of the following elements:
386
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
387
     * 
388
     * @param ch          the character array holding the parsed text
389
     * @param start       the start index
390
     * @param length      the text length
391
     * 
392
     */
393
    public void characters (char ch[], int start, int length) {
394
      String s = new String(ch, start, length);
395
 
396
      if (length > 0) {           
397
        if (currentQname.equals("scope")) {
398
          scope += s;
399
        }
400
        else if (currentQname.equals("identifier")) {
401
          identifierString += s;
402
        }
403
        else if (currentQname.equals("revision")) {
404
          revisionString += s;
405
        }
406
        else if (currentQname.equals("documentType")) {
407
          documentType += s;
408
        }
409
        else if (currentQname.equals("documentURL")) {
410
          documentURL += s;
411
        }
412
      }
413
    }
414

    
415

    
416
    /** 
417
     * Handles an end-of-document event.
418
     */
419
    public void endDocument () {
420
      System.out.println("Finished parsing " + documentListURL);
421
    }
422

    
423

    
424
    /** 
425
     * Handles an end-of-element event. If the end tag is </Document>, then
426
     * creates a new HarvestDocument object and pushes it to the document
427
     * list.
428
     * 
429
     * @param uri
430
     * @param localname
431
     * @param qname
432
     */
433
    public void endElement(String uri, 
434
                           String localname,
435
                           String qname) {
436
      
437
      HarvestDocument harvestDocument;
438
      
439
      if (qname.equals("identifier")) {
440
        identifier = Integer.parseInt(identifierString);
441
      }
442
      else if (qname.equals("revision")) {
443
        revision = Integer.parseInt(revisionString);
444
      }
445
      else if (qname.equals("document")) {
446
        harvestDocument = new HarvestDocument(
447
                                              harvester,
448
                                              HarvestSiteSchedule.this,
449
                                              scope,
450
                                              identifier,
451
                                              revision,
452
                                              documentType,
453
                                              documentURL
454
                                             );
455
        harvestDocumentList.add(harvestDocument);
456
      }
457

    
458
      currentQname = "";
459
    }
460

    
461

    
462
    /**
463
     * Method for handling errors during a parse
464
     *
465
     * @param exception         The parsing error
466
     * @exception SAXException  Description of Exception
467
     */
468
     public void error(SAXParseException e) throws SAXParseException {
469
        System.out.println("SAXParseException: " + e.getMessage());
470
        throw e;
471
    }
472

    
473

    
474
    /**
475
     * Run the validating parser
476
     *
477
     * @param xml             the xml stream to be validated
478
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
479
     * @exception IOException thrown when test files can't be opened
480
     * @exception ClassNotFoundException thrown when SAX Parser class not found
481
     * @exception SAXException
482
     * @exception SAXParserException
483
     */
484
    public void runParser(Reader xml, String schemaLocation)
485
           throws IOException, ClassNotFoundException,
486
                  SAXException, SAXParseException {
487

    
488
      // Get an instance of the parser
489
      XMLReader parser;
490

    
491
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
492
      // Set Handlers in the parser
493
      parser.setContentHandler((ContentHandler)this);
494
      parser.setErrorHandler((ErrorHandler)this);
495
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
496
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
497
      parser.setFeature("http://xml.org/sax/features/validation", true);
498
      parser.setProperty(
499
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
500
              schemaLocation);
501

    
502
      if (schemaValidate) {
503
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
504
                          true);
505
      }
506
    
507
      // Parse the document
508
      parser.parse(new InputSource(xml));
509
    }
510
    /**
511
     * Handles a start-of-document event.
512
     */
513
    public void startDocument () {
514
      System.out.println("Started parsing " + documentListURL);
515
    }
516

    
517

    
518
    /** 
519
     * Handles a start-of-element event.
520
     * 
521
     * @param uri
522
     * @param localname
523
     * @param qname
524
     * @param attributes
525
     */
526
    public void startElement(String uri, 
527
                             String localname,
528
                             String qname,
529
                             Attributes attributes) {
530
      
531
      currentQname = qname;
532

    
533
      if (qname.equals("scope")) {
534
        scope = "";
535
      }
536
      else if (qname.equals("identifier")) {
537
        identifierString = "";
538
      }
539
      else if (qname.equals("revision")) {
540
        revisionString = "";
541
      }
542
      else if (qname.equals("documentType")) {
543
        documentType = "";
544
      }
545
      else if (qname.equals("documentURL")) {
546
        documentURL = "";
547
      }
548
    }
549
  }
550
}
(4-4/7)