Project

General

Profile

1
/*
2
 * HarvestSiteSchedule.java
3
 *
4
 * Created on January 14, 2004, 4:47 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.FileNotFoundException;
10
import java.io.IOException;
11
import java.io.InputStream;
12
import java.io.InputStreamReader;
13
import java.io.Reader;
14
import java.net.MalformedURLException;
15
import java.net.URL;
16
import java.sql.Connection;
17
import java.sql.SQLException;
18
import java.sql.Statement;
19
import java.text.DateFormat;
20
import java.text.ParseException;
21
import java.text.SimpleDateFormat;
22
import java.util.ArrayList;
23
import java.util.Date;
24
import javax.xml.parsers.ParserConfigurationException;
25
import org.xml.sax.Attributes;
26
import org.xml.sax.ContentHandler;
27
import org.xml.sax.ErrorHandler;
28
import org.xml.sax.InputSource;
29
import org.xml.sax.SAXException;
30
import org.xml.sax.SAXParseException;
31
import org.xml.sax.XMLReader;
32
import org.xml.sax.helpers.DefaultHandler;
33
import org.xml.sax.helpers.XMLReaderFactory;
34

    
35
import edu.ucsb.nceas.metacat.client.Metacat;
36
import edu.ucsb.nceas.metacat.client.MetacatException;
37
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
38

    
39

    
40
/**
41
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
42
 * table, determining when and how to harvest the documents for a given site.
43
 * 
44
 * @author  costa
45
 */
46
class HarvestSiteSchedule {
47
    
48
  private String contactEmail;
49
  private String dateLastHarvest;
50
  private String dateNextHarvest;
51
  private long delta;
52
  private String documentListURL;
53
  private Harvester harvester;
54
  private ArrayList harvestDocumentList = new ArrayList();
55
  private String harvestSiteEndTime;
56
  private String harvestSiteStartTime;
57
  private String ldapDN;
58
  private String ldapPwd;
59
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
60
  int siteScheduleID;
61
  private String unit;
62
  private int updateFrequency;
63
    
64
  /**
65
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
66
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
67
   * 
68
   * @param harvester       the parent Harvester object
69
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
70
   * @param documentListURL the value of the DOCUMENTLISTURL field
71
   * @param ldapDN          the value of the LDAPDN field
72
   * @param ldapPwd    the value of the LDAPPASSWORD field
73
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
74
   * @param dateLastHarvest the value of the DATELASTHARVEST field
75
   * @param updateFrequency the value of the UPDATEFREQUENCY field
76
   * @param unit            the value of the UNIT field
77
   * @param contactEmail    the value of the CONTACT_EMAIL field
78
   */
79
  public HarvestSiteSchedule(
80
                              Harvester harvester,
81
                              int    siteScheduleID,
82
                              String documentListURL,
83
                              String ldapDN,
84
                              String ldapPwd,
85
                              String dateNextHarvest,
86
                              String dateLastHarvest,
87
                              int    updateFrequency,
88
                              String unit,
89
                              String contactEmail
90
                            )
91
  {
92
    this.harvester = harvester;
93
    this.siteScheduleID = siteScheduleID;
94
    this.documentListURL = documentListURL;
95
    this.ldapDN = ldapDN;
96
    this.ldapPwd = ldapPwd;
97
    this.dateNextHarvest = dateNextHarvest;
98
    this.dateLastHarvest = dateLastHarvest;
99
    this.updateFrequency = updateFrequency;
100
    this.unit = unit;
101
    this.contactEmail = contactEmail;
102
    
103
    // Calculate the value of delta, the number of milliseconds between the
104
    // last harvest date and the next harvest date.
105
    delta = updateFrequency * millisecondsPerDay;
106
    
107
    if (unit.equals("weeks")) {
108
      delta *= 7;
109
    }
110
    else if (unit.equals("months")) {
111
      delta *= 30;
112
    }
113
  }
114
  
115
  
116
  /**
117
   * Updates the DATELASTHARVEST value of the HARVEST_SITE_SCHEDULE table
118
   * after a harvest operation has completed. Calculates the date of the next 
119
   * harvest based on today's date and the update frequency.
120
   */
121
  private void dbUpdateHarvestSiteSchedule() {
122
		Connection conn;
123
    long currentTime;                    // Current time in milliseconds
124
    Date dateNextHarvest;                // Date of next harvest
125
    String lastHarvest;
126
    String nextHarvest;
127
    Date now = new Date();
128
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
129
		Statement stmt;
130
    long timeNextHarvest;
131
    
132
    conn = harvester.conn;
133
    now = new Date();
134
    currentTime = now.getTime();
135
    timeNextHarvest = currentTime + delta;
136
    dateNextHarvest = new Date(timeNextHarvest);
137
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
138
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
139
	
140
		try {
141
			stmt = conn.createStatement();							
142
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
143
                         nextHarvest +
144
                         " WHERE SITE_SCHEDULE_ID = " +
145
                         siteScheduleID);
146
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
147
                         lastHarvest +
148
                         " WHERE SITE_SCHEDULE_ID = " +
149
                         siteScheduleID);
150
			stmt.close();
151
		}
152
    catch(SQLException e) {
153
			System.out.println("SQLException: " + e.getMessage());
154
		}
155
  }
156
    
157

    
158
  /**
159
   * Boolean to determine whether this site is currently due for its next
160
   * harvest.
161
   * 
162
   * @retrun     true if due for harvest, otherwise false
163
   */
164
  private boolean dueForHarvest() {
165
    boolean dueForHarvest = false;
166
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
167
    Date now = new Date();
168
    Date dnh;                          // Date of next harvest
169
    long currentTime = now.getTime();  // Current time in milliseconds
170
    long timeNextHarvest = 0;
171
    
172
    try {
173
      dnh = dateFormat.parse(dateNextHarvest);
174
      timeNextHarvest = dnh.getTime();
175
      
176
      if (timeNextHarvest < currentTime) {
177
        dueForHarvest = true;
178
        System.out.println("Due for harvest: " + documentListURL);
179
      }
180
      else {
181
        System.out.println("Not due for harvest: " + documentListURL);
182
      }
183
    }
184
    catch (ParseException e) {
185
      System.out.println("Error parsing date: " + e.getMessage());
186
    }
187
    
188
    return dueForHarvest;
189
  }
190

    
191

    
192
  /**
193
   * Harvests each document in the site document list.
194
   * 
195
   * @throws SAXException
196
   * @throws IOException
197
   * @throws ParserConfigurationException
198
   */
199
  public void harvestDocumentList() {
200
    HarvestDocument harvestDocument;
201
    boolean success;
202
    
203
    if (dueForHarvest()) {
204
      try {
205
        success = parseDocumentList();
206

    
207
        /* If the document list was validated, then proceed with harvesting
208
         * the documents
209
         */
210
        if (success) {
211
          metacatLogin();
212
        
213
          for (int i = 0; i < harvestDocumentList.size(); i++) {
214
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
215
          
216
            if (harvestDocument != null) {
217
              harvestDocument.harvestDocument();
218
            }
219
          }
220

    
221
          metacatLogout();      
222
          dbUpdateHarvestSiteSchedule();  // Update the schedule
223
        }
224
      }
225
      catch (ParserConfigurationException e) {
226
        System.out.println("ParserConfigurationException: " + e.getMessage());
227
      }
228
      
229
      reportToSite();
230
    }
231
  }
232

    
233

    
234
  /**
235
   * Login to Metacat using the ldapDN and ldapPwd
236
   */
237
  private void metacatLogin() {
238
    Metacat metacat = harvester.metacat;
239

    
240
    if (harvester.connectToMetacat()) {
241
      try {
242
        System.out.println("Logging in to Metacat: " + ldapDN);
243
        metacat.login(ldapDN, ldapPwd);
244
        //System.out.println("Metacat login response: " + response);
245
        //sessionId = metacat.getSessionId();
246
        //System.out.println("Session ID: " + sessionId);
247
      } 
248
      catch (MetacatInaccessibleException e) {
249
        System.out.println("Metacat login failed." + e.getMessage());
250
      } 
251
      catch (Exception e) {
252
        System.out.println("Metacat login failed." + e.getMessage());
253
      }
254
    }    
255
  }
256
  
257
  
258
  /**
259
   * Logout from Metacat
260
   */
261
  private void metacatLogout() {
262
    Metacat metacat = harvester.metacat;
263

    
264
    if (harvester.connectToMetacat()) {
265
      try {    
266
        // Log out from the Metacat session
267
        System.out.println("Logging out from Metacat");
268
        metacat.logout();
269
      }
270
      catch (MetacatInaccessibleException e) {
271
        System.out.println("Metacat inaccessible: " + e.getMessage());
272
      }
273
      catch (MetacatException e) {
274
        System.out.println("Metacat exception: " + e.getMessage());
275
      }
276
    }
277
  }
278
  
279

    
280
  /**
281
   * Parse the site document list to find out which documents to harvest.
282
   * 
283
   * @return  true if successful, otherwise false
284
   */
285
  private boolean parseDocumentList() 
286
          throws ParserConfigurationException {
287
    DocumentListHandler documentListHandler = new DocumentListHandler();
288
    InputStream inputStream;
289
    InputStreamReader inputStreamReader;
290
    String schemaLocation = ".";
291
    boolean success = false;
292
    URL url;
293

    
294
    try {
295
      url = new URL(documentListURL);
296
      inputStream = url.openStream();
297
      harvester.addLogEntry(0, "", "GetDocListSuccess", 
298
                            siteScheduleID, null, "");
299
      inputStreamReader = new InputStreamReader(inputStream);
300
      documentListHandler.runParser(inputStreamReader, schemaLocation);
301
      harvester.addLogEntry(0, "", "ValidateDocListSuccess", 
302
                            siteScheduleID, null, "");
303
      success = true;
304
    }
305
    catch (MalformedURLException e){
306
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
307
                            "GetDocListError", siteScheduleID, null, "");
308
    }
309
    catch (FileNotFoundException e) {
310
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
311
                            "GetDocListError", siteScheduleID, null, "");
312
    }
313
    catch (SAXException e) {
314
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
315
                            "ValidateDocListError", siteScheduleID, null, "");
316
    }
317
    catch (ClassNotFoundException e) {
318
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
319
                            "ValidateDocListError", siteScheduleID, null, "");
320
    }
321
    catch (IOException e) {
322
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
323
                            "GetDocListError", siteScheduleID, null, "");
324
    }
325
    
326
    return success;
327
  }
328

    
329

    
330
  /**
331
   * Prints the data that is stored in this HarvestSiteSchedule object.
332
   */
333
  void printOutput() {
334
    System.out.println("* siteScheduleID:       " + siteScheduleID);
335
    System.out.println("* documentListURL:      " + documentListURL);
336
    System.out.println("* ldapDN:               " + ldapDN);
337
    System.out.println("* dateNextHarvest:      " + dateNextHarvest);
338
    System.out.println("* dateLastHarvest:      " + dateLastHarvest);
339
    System.out.println("* updateFrequency:      " + updateFrequency);
340
    System.out.println("* unit:                 " + unit);
341
    System.out.println("* contactEmail:         " + contactEmail);
342
  }
343
  
344

    
345
  /**
346
   * Sends a report to the site summarizing the results of the harvest
347
   * operation.
348
   */
349
  void reportToSite() {
350
    System.out.println("Sending report to site: " + contactEmail);
351
  }
352
    
353

    
354
  /**
355
   * This inner class extends DefaultHandler. It parses the document list,
356
   * creating a new HarvestDocument object every time it finds a </Document>
357
   * end tag.
358
   */
359
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
360
  
361
    public String scope;
362
    public int identifier;
363
    public String identifierString;
364
    public String documentType;
365
    public int revision;
366
    public String revisionString;
367
    public String documentURL;
368
    private String currentQname;
369
    public final static String DEFAULT_PARSER = 
370
           "org.apache.xerces.parsers.SAXParser";
371
    private boolean schemaValidate = true;
372
	
373

    
374
	  /**
375
     * This method is called for any plain text within an element.
376
     * It parses the value for any of the following elements:
377
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
378
     * 
379
     * @param ch          the character array holding the parsed text
380
     * @param start       the start index
381
     * @param length      the text length
382
     * 
383
     */
384
    public void characters (char ch[], int start, int length) {
385
      String s = new String(ch, start, length);
386
 
387
      if (length > 0) {           
388
        if (currentQname.equals("scope")) {
389
          scope += s;
390
        }
391
        else if (currentQname.equals("identifier")) {
392
          identifierString += s;
393
        }
394
        else if (currentQname.equals("revision")) {
395
          revisionString += s;
396
        }
397
        else if (currentQname.equals("documentType")) {
398
          documentType += s;
399
        }
400
        else if (currentQname.equals("documentURL")) {
401
          documentURL += s;
402
        }
403
      }
404
    }
405

    
406

    
407
    /** 
408
     * Handles an end-of-document event.
409
     */
410
    public void endDocument () {
411
      System.out.println("Finished parsing " + documentListURL);
412
    }
413

    
414

    
415
    /** 
416
     * Handles an end-of-element event. If the end tag is </Document>, then
417
     * creates a new HarvestDocument object and pushes it to the document
418
     * list.
419
     * 
420
     * @param uri
421
     * @param localname
422
     * @param qname
423
     */
424
    public void endElement(String uri, 
425
                           String localname,
426
                           String qname) {
427
      
428
      HarvestDocument harvestDocument;
429
      
430
      if (qname.equals("identifier")) {
431
        identifier = Integer.parseInt(identifierString);
432
      }
433
      else if (qname.equals("revision")) {
434
        revision = Integer.parseInt(revisionString);
435
      }
436
      else if (qname.equals("document")) {
437
        harvestDocument = new HarvestDocument(
438
                                              harvester,
439
                                              HarvestSiteSchedule.this,
440
                                              scope,
441
                                              identifier,
442
                                              revision,
443
                                              documentType,
444
                                              documentURL
445
                                             );
446
        harvestDocumentList.add(harvestDocument);
447
      }
448

    
449
      currentQname = "";
450
    }
451

    
452

    
453
    /**
454
     * Method for handling errors during a parse
455
     *
456
     * @param exception         The parsing error
457
     * @exception SAXException  Description of Exception
458
     */
459
     public void error(SAXParseException e) throws SAXParseException {
460
        System.out.println("SAXParseException: " + e.getMessage());
461
        throw e;
462
    }
463

    
464

    
465
    /**
466
     * Run the validating parser
467
     *
468
     * @param xml             the xml stream to be validated
469
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
470
     * @exception IOException thrown when test files can't be opened
471
     * @exception ClassNotFoundException thrown when SAX Parser class not found
472
     * @exception SAXException
473
     * @exception SAXParserException
474
     */
475
    public void runParser(Reader xml, String schemaLocation)
476
           throws IOException, ClassNotFoundException,
477
                  SAXException, SAXParseException {
478

    
479
      // Get an instance of the parser
480
      XMLReader parser;
481

    
482
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
483
      // Set Handlers in the parser
484
      parser.setContentHandler((ContentHandler)this);
485
      parser.setErrorHandler((ErrorHandler)this);
486
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
487
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
488
      parser.setFeature("http://xml.org/sax/features/validation", true);
489
      parser.setProperty(
490
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
491
              schemaLocation);
492

    
493
      if (schemaValidate) {
494
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
495
                          true);
496
      }
497
    
498
      // Parse the document
499
      parser.parse(new InputSource(xml));
500
    }
501
    /**
502
     * Handles a start-of-document event.
503
     */
504
    public void startDocument () {
505
      System.out.println("Started parsing " + documentListURL);
506
    }
507

    
508

    
509
    /** 
510
     * Handles a start-of-element event.
511
     * 
512
     * @param uri
513
     * @param localname
514
     * @param qname
515
     * @param attributes
516
     */
517
    public void startElement(String uri, 
518
                             String localname,
519
                             String qname,
520
                             Attributes attributes) {
521
      
522
      currentQname = qname;
523

    
524
      if (qname.equals("scope")) {
525
        scope = "";
526
      }
527
      else if (qname.equals("identifier")) {
528
        identifierString = "";
529
      }
530
      else if (qname.equals("revision")) {
531
        revisionString = "";
532
      }
533
      else if (qname.equals("documentType")) {
534
        documentType = "";
535
      }
536
      else if (qname.equals("documentURL")) {
537
        documentURL = "";
538
      }
539
    }
540
  }
541
}
(4-4/7)