Project

General

Profile

1
/*
2
 * HarvestSiteSchedule.java
3
 *
4
 * Created on January 14, 2004, 4:47 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.FileNotFoundException;
10
import java.io.IOException;
11
import java.io.InputStream;
12
import java.io.InputStreamReader;
13
import java.io.Reader;
14
import java.net.MalformedURLException;
15
import java.net.URL;
16
import java.sql.Connection;
17
import java.sql.SQLException;
18
import java.sql.Statement;
19
import java.text.DateFormat;
20
import java.text.ParseException;
21
import java.text.SimpleDateFormat;
22
import java.util.ArrayList;
23
import java.util.Date;
24
import javax.xml.parsers.ParserConfigurationException;
25
import org.xml.sax.Attributes;
26
import org.xml.sax.ContentHandler;
27
import org.xml.sax.ErrorHandler;
28
import org.xml.sax.InputSource;
29
import org.xml.sax.SAXException;
30
import org.xml.sax.SAXParseException;
31
import org.xml.sax.XMLReader;
32
import org.xml.sax.helpers.DefaultHandler;
33
import org.xml.sax.helpers.XMLReaderFactory;
34

    
35
import edu.ucsb.nceas.metacat.client.Metacat;
36
import edu.ucsb.nceas.metacat.client.MetacatException;
37
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
38

    
39

    
40
/**
41
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
42
 * table, determining when and how to harvest the documents for a given site.
43
 * 
44
 * @author  costa
45
 */
46
class HarvestSiteSchedule {
47
    
48
  private String contactEmail;
49
  private String dateLastHarvest;
50
  private String dateNextHarvest;
51
  private long delta;
52
  private String documentListURL;
53
  private Harvester harvester;
54
  private ArrayList harvestDocumentList = new ArrayList();
55
  private String harvestSiteEndTime;
56
  private String harvestSiteStartTime;
57
  private String ldapDN;
58
  private String ldapPwd;
59
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
60
  int siteScheduleID;
61
  private String unit;
62
  private int updateFrequency;
63
    
64
  /**
65
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
66
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
67
   * 
68
   * @param harvester       the parent Harvester object
69
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
70
   * @param documentListURL the value of the DOCUMENTLISTURL field
71
   * @param ldapDN          the value of the LDAPDN field
72
   * @param ldapPwd    the value of the LDAPPASSWORD field
73
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
74
   * @param dateLastHarvest the value of the DATELASTHARVEST field
75
   * @param updateFrequency the value of the UPDATEFREQUENCY field
76
   * @param unit            the value of the UNIT field
77
   * @param contactEmail    the value of the CONTACT_EMAIL field
78
   */
79
  public HarvestSiteSchedule(
80
                              Harvester harvester,
81
                              int    siteScheduleID,
82
                              String documentListURL,
83
                              String ldapDN,
84
                              String ldapPwd,
85
                              String dateNextHarvest,
86
                              String dateLastHarvest,
87
                              int    updateFrequency,
88
                              String unit,
89
                              String contactEmail
90
                            )
91
  {
92
    this.harvester = harvester;
93
    this.siteScheduleID = siteScheduleID;
94
    this.documentListURL = documentListURL;
95
    this.ldapDN = ldapDN;
96
    this.ldapPwd = ldapPwd;
97
    this.dateNextHarvest = dateNextHarvest;
98
    this.dateLastHarvest = dateLastHarvest;
99
    this.updateFrequency = updateFrequency;
100
    this.unit = unit;
101
    this.contactEmail = contactEmail;
102
    
103
    // Calculate the value of delta, the number of milliseconds between the
104
    // last harvest date and the next harvest date.
105
    delta = updateFrequency * millisecondsPerDay;
106
    
107
    if (unit.equals("weeks")) {
108
      delta *= 7;
109
    }
110
    else if (unit.equals("months")) {
111
      delta *= 30;
112
    }
113
  }
114
  
115
  
116
  /**
117
   * Updates the DATELASTHARVEST value of the HARVEST_SITE_SCHEDULE table
118
   * after a harvest operation has completed. Calculates the date of the next 
119
   * harvest based on today's date and the update frequency.
120
   */
121
  private void dbUpdateHarvestSiteSchedule() {
122
		Connection conn;
123
    long currentTime;                    // Current time in milliseconds
124
    Date dateNextHarvest;                // Date of next harvest
125
    String lastHarvest;
126
    String nextHarvest;
127
    Date now = new Date();
128
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
129
		Statement stmt;
130
    long timeNextHarvest;
131
    
132
    conn = harvester.conn;
133
    now = new Date();
134
    currentTime = now.getTime();
135
    timeNextHarvest = currentTime + delta;
136
    dateNextHarvest = new Date(timeNextHarvest);
137
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
138
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
139
	
140
		try {
141
			stmt = conn.createStatement();							
142
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
143
                         nextHarvest +
144
                         " WHERE SITE_SCHEDULE_ID = " +
145
                         siteScheduleID);
146
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
147
                         lastHarvest +
148
                         " WHERE SITE_SCHEDULE_ID = " +
149
                         siteScheduleID);
150
			stmt.close();
151
		}
152
    catch(SQLException e) {
153
			System.out.println("SQLException: " + e.getMessage());
154
		}
155
  }
156
    
157

    
158
  /**
159
   * Boolean to determine whether this site is currently due for its next
160
   * harvest.
161
   * 
162
   * @retrun     true if due for harvest, otherwise false
163
   */
164
  private boolean dueForHarvest() {
165
    boolean dueForHarvest = false;
166
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
167
    Date now = new Date();
168
    Date dlh;                          // Date of last harvest
169
    Date dnh;                          // Date of next harvest
170
    long currentTime = now.getTime();  // Current time in milliseconds
171
    long timeNextHarvest = 0;
172
    
173
    try {
174
      dlh = dateFormat.parse(dateLastHarvest);
175
      timeNextHarvest = dlh.getTime() + delta;
176
      dnh = new Date(timeNextHarvest);
177
      
178
      if (timeNextHarvest < currentTime) {
179
        dueForHarvest = true;
180
        System.out.println("Due for harvest: " + documentListURL);
181
      }
182
      else {
183
        System.out.println("Not due for harvest: " + documentListURL);
184
      }
185
    }
186
    catch (ParseException e) {
187
      System.out.println("Error parsing date: " + e.getMessage());
188
    }
189
    
190
    return dueForHarvest;
191
  }
192

    
193

    
194
  /**
195
   * Harvests each document in the site document list.
196
   * 
197
   * @throws SAXException
198
   * @throws IOException
199
   * @throws ParserConfigurationException
200
   */
201
  public void harvestDocumentList() {
202
    HarvestDocument harvestDocument;
203
    
204
    if (dueForHarvest()) {
205
      try {
206
        parseDocumentList();
207
        metacatLogin();
208
        
209
        for (int i = 0; i < harvestDocumentList.size(); i++) {
210
          harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
211
          
212
          if (harvestDocument != null) {
213
            harvestDocument.harvestDocument();
214
          }
215
        }
216

    
217
        metacatLogout();      
218
        dbUpdateHarvestSiteSchedule();
219
      }
220
      catch (ParserConfigurationException e) {
221
        System.out.println("ParserConfigurationException: " + e.getMessage());
222
      }
223
      
224
      reportToSite();
225
    }
226
  }
227

    
228

    
229
  /**
230
   * Login to Metacat using the ldapDN and ldapPwd
231
   */
232
  private void metacatLogin() {
233
    Metacat metacat = harvester.metacat;
234

    
235
    if (harvester.connectToMetacat()) {
236
      try {
237
        System.out.println("Logging in to Metacat: " + ldapDN);
238
        metacat.login(ldapDN, ldapPwd);
239
        //System.out.println("Metacat login response: " + response);
240
        //sessionId = metacat.getSessionId();
241
        //System.out.println("Session ID: " + sessionId);
242
      } 
243
      catch (MetacatInaccessibleException e) {
244
        System.out.println("Metacat login failed." + e.getMessage());
245
      } 
246
      catch (Exception e) {
247
        System.out.println("Metacat login failed." + e.getMessage());
248
      }
249
    }    
250
  }
251
  
252
  
253
  /**
254
   * Logout from Metacat
255
   */
256
  private void metacatLogout() {
257
    Metacat metacat = harvester.metacat;
258

    
259
    if (harvester.connectToMetacat()) {
260
      try {    
261
        // Log out from the Metacat session
262
        System.out.println("Logging out from Metacat");
263
        metacat.logout();
264
      }
265
      catch (MetacatInaccessibleException e) {
266
        System.out.println("Metacat inaccessible: " + e.getMessage());
267
      }
268
      catch (MetacatException e) {
269
        System.out.println("Metacat exception: " + e.getMessage());
270
      }
271
    }
272
  }
273
  
274

    
275
  /**
276
   * Parse the site document list to find out which documents to harvest.
277
   */
278
  private void parseDocumentList() 
279
          throws ParserConfigurationException {
280
    DocumentListHandler documentListHandler = new DocumentListHandler();
281
    InputStream inputStream;
282
    InputStreamReader inputStreamReader;
283
    String schemaLocation = ".";
284
    URL url;
285

    
286
    try {
287
      url = new URL(documentListURL);
288
      inputStream = url.openStream();
289
      harvester.addLogEntry(0, "", "GetDocListSuccess", 
290
                            siteScheduleID, null, "");
291
      inputStreamReader = new InputStreamReader(inputStream);
292
      documentListHandler.runParser(inputStreamReader, schemaLocation);
293
      harvester.addLogEntry(0, "", "ValidateDocListSuccess", 
294
                            siteScheduleID, null, "");
295
    }
296
    catch (MalformedURLException e){
297
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
298
                            "GetDocListError", siteScheduleID, null, "");
299
    }
300
    catch (FileNotFoundException e) {
301
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
302
                            "GetDocListError", siteScheduleID, null, "");
303
    }
304
    catch (SAXException e) {
305
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
306
                            "ValidateDocListError", siteScheduleID, null, "");
307
    }
308
    catch (ClassNotFoundException e) {
309
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
310
                            "ValidateDocListError", siteScheduleID, null, "");
311
    }
312
    catch (IOException e) {
313
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
314
                            "GetDocListError", siteScheduleID, null, "");
315
    }
316
  }
317

    
318

    
319
  /**
320
   * Prints the data that is stored in this HarvestSiteSchedule object.
321
   */
322
  void printOutput() {
323
    System.out.println("siteScheduleID:       " + siteScheduleID);
324
    System.out.println("documentListURL:      " + documentListURL);
325
    System.out.println("ldapDN:               " + ldapDN);
326
    System.out.println("dateNextHarvest:      " + dateNextHarvest);
327
    System.out.println("dateLastHarvest:      " + dateLastHarvest);
328
    System.out.println("updateFrequency:      " + updateFrequency);
329
    System.out.println("unit:                 " + unit);
330
    System.out.println("contactEmail:         " + contactEmail);
331
  }
332
  
333

    
334
  /**
335
   * Sends a report to the site summarizing the results of the harvest
336
   * operation.
337
   */
338
  void reportToSite() {
339
    System.out.println("Sending report to site: " + contactEmail);
340
  }
341
    
342

    
343
  /**
344
   * This inner class extends DefaultHandler. It parses the document list,
345
   * creating a new HarvestDocument object every time it finds a </Document>
346
   * end tag.
347
   */
348
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
349
  
350
    public String scope;
351
    public int identifier;
352
    public int revision;
353
    public String documentType;
354
    public String documentURL;
355
    private String currentQname;
356
    public final static String DEFAULT_PARSER = 
357
           "org.apache.xerces.parsers.SAXParser";
358
    private boolean schemaValidate = true;
359
	
360

    
361
	  /**
362
     * This method is called for any plain text within an element.
363
     * It parses the value for any of the following elements:
364
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
365
     * 
366
     * @param ch          the character array holding the parsed text
367
     * @param start       the start index
368
     * @param length      the text length
369
     * 
370
     */
371
    public void characters (char ch[], int start, int length) {
372
      String s = new String(ch, start, length);
373
 
374
      if (length > 0) {           
375
        if (currentQname.equals("scope")) {
376
          scope = s;
377
        }
378
        else if (currentQname.equals("identifier")) {
379
          identifier = Integer.parseInt(s);
380
        }
381
        else if (currentQname.equals("revision")) {
382
          revision = Integer.parseInt(s);
383
        }
384
        else if (currentQname.equals("documentType")) {
385
          documentType = s;
386
        }
387
        else if (currentQname.equals("documentURL")) {
388
          documentURL = s;
389
        }
390
        
391
        currentQname = "";
392
      }
393
    }
394

    
395

    
396
    /** 
397
     * Handles an end-of-document event.
398
     */
399
    public void endDocument () {
400
      System.out.println("Finished parsing " + documentListURL);
401
    }
402

    
403

    
404
    /** 
405
     * Handles an end-of-element event. If the end tag is </Document>, then
406
     * creates a new HarvestDocument object and pushes it to the document
407
     * list.
408
     * 
409
     * @param uri
410
     * @param localname
411
     * @param qname
412
     */
413
    public void endElement(String uri, 
414
                           String localname,
415
                           String qname) {
416
      
417
      HarvestDocument harvestDocument;
418
      
419
      if (qname.equals("document")) {
420
        harvestDocument = new HarvestDocument(
421
                                              harvester,
422
                                              HarvestSiteSchedule.this,
423
                                              scope,
424
                                              identifier,
425
                                              revision,
426
                                              documentType,
427
                                              documentURL
428
                                             );
429
        harvestDocumentList.add(harvestDocument);
430
      }
431
    }
432

    
433

    
434
    /**
435
     * Method for handling errors during a parse
436
     *
437
     * @param exception         The parsing error
438
     * @exception SAXException  Description of Exception
439
     */
440
     public void error(SAXParseException e) throws SAXParseException {
441
        System.out.println("SAXParseException: " + e.getMessage());
442
        throw e;
443
    }
444

    
445

    
446
    /**
447
     * Run the validating parser
448
     *
449
     * @param xml             the xml stream to be validated
450
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
451
     * @exception IOException thrown when test files can't be opened
452
     * @exception ClassNotFoundException thrown when SAX Parser class not found
453
     * @exception SAXException
454
     * @exception SAXParserException
455
     */
456
    public void runParser(Reader xml, String schemaLocation)
457
           throws IOException, ClassNotFoundException,
458
                  SAXException, SAXParseException {
459

    
460
      // Get an instance of the parser
461
      XMLReader parser;
462

    
463
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
464
      // Set Handlers in the parser
465
      parser.setContentHandler((ContentHandler)this);
466
      parser.setErrorHandler((ErrorHandler)this);
467
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
468
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
469
      parser.setFeature("http://xml.org/sax/features/validation", true);
470
      parser.setProperty(
471
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
472
              schemaLocation);
473

    
474
      if (schemaValidate) {
475
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
476
                          true);
477
      }
478
    
479
      // Parse the document
480
      parser.parse(new InputSource(xml));
481
    }
482
    /**
483
     * Handles a start-of-document event.
484
     */
485
    public void startDocument () {
486
      System.out.println("Started parsing " + documentListURL);
487
    }
488

    
489

    
490
    /** 
491
     * Handles a start-of-element event.
492
     * 
493
     * @param uri
494
     * @param localname
495
     * @param qname
496
     * @param attributes
497
     */
498
    public void startElement(String uri, 
499
                             String localname,
500
                             String qname,
501
                             Attributes attributes) {
502
      
503
      currentQname = qname;
504
    }
505
  }
506
}
(4-4/7)