Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2004-04-14 13:48:01 -0700 (Wed, 14 Apr 2004) $'
8
 * '$Revision: 2126 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.FileNotFoundException;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32
import java.io.PrintStream;
33
import java.io.Reader;
34
import java.net.MalformedURLException;
35
import java.net.URL;
36
import java.sql.Connection;
37
import java.sql.SQLException;
38
import java.sql.Statement;
39
import java.text.DateFormat;
40
import java.text.ParseException;
41
import java.text.SimpleDateFormat;
42
import java.util.ArrayList;
43
import java.util.Date;
44
import javax.xml.parsers.ParserConfigurationException;
45
import org.xml.sax.Attributes;
46
import org.xml.sax.ContentHandler;
47
import org.xml.sax.ErrorHandler;
48
import org.xml.sax.InputSource;
49
import org.xml.sax.SAXException;
50
import org.xml.sax.SAXParseException;
51
import org.xml.sax.XMLReader;
52
import org.xml.sax.helpers.DefaultHandler;
53
import org.xml.sax.helpers.XMLReaderFactory;
54

    
55
import edu.ucsb.nceas.metacat.client.Metacat;
56
import edu.ucsb.nceas.metacat.client.MetacatException;
57
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
58

    
59

    
60
/**
61
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
62
 * table, determining when and how to harvest the documents for a given site.
63
 * 
64
 * @author  costa
65
 */
66
class HarvestSiteSchedule {
67
    
68
  private String contactEmail;
69
  private String dateLastHarvest;
70
  private String dateNextHarvest;
71
  private long delta;
72
  private String documentListURL;
73
  private Harvester harvester;
74
  private ArrayList harvestDocumentList = new ArrayList();
75
  private String harvestSiteEndTime;
76
  private String harvestSiteStartTime;
77
  private String ldapDN;
78
  private String ldapPwd;
79
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
80
  int siteScheduleID;
81
  private String unit;
82
  private int updateFrequency;
83
    
84
  /**
85
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
86
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
87
   * 
88
   * @param harvester       the parent Harvester object
89
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
90
   * @param documentListURL the value of the DOCUMENTLISTURL field
91
   * @param ldapDN          the value of the LDAPDN field
92
   * @param ldapPwd    the value of the LDAPPASSWORD field
93
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
94
   * @param dateLastHarvest the value of the DATELASTHARVEST field
95
   * @param updateFrequency the value of the UPDATEFREQUENCY field
96
   * @param unit            the value of the UNIT field
97
   * @param contactEmail    the value of the CONTACT_EMAIL field
98
   */
99
  public HarvestSiteSchedule(
100
                              Harvester harvester,
101
                              int    siteScheduleID,
102
                              String documentListURL,
103
                              String ldapDN,
104
                              String ldapPwd,
105
                              String dateNextHarvest,
106
                              String dateLastHarvest,
107
                              int    updateFrequency,
108
                              String unit,
109
                              String contactEmail
110
                            )
111
  {
112
    this.harvester = harvester;
113
    this.siteScheduleID = siteScheduleID;
114
    this.documentListURL = documentListURL;
115
    this.ldapDN = ldapDN;
116
    this.ldapPwd = ldapPwd;
117
    this.dateNextHarvest = dateNextHarvest;
118
    this.dateLastHarvest = dateLastHarvest;
119
    this.updateFrequency = updateFrequency;
120
    this.unit = unit;
121
    this.contactEmail = contactEmail;
122
    
123
    // Calculate the value of delta, the number of milliseconds between the
124
    // last harvest date and the next harvest date.
125
    delta = updateFrequency * millisecondsPerDay;
126
    
127
    if (unit.equals("weeks")) {
128
      delta *= 7;
129
    }
130
    else if (unit.equals("months")) {
131
      delta *= 30;
132
    }
133
  }
134
  
135
  
136
  /**
137
   * Updates the DATELASTHARVEST and DATENEXTHARVEST values of the 
138
   * HARVEST_SITE_SCHEDULE table after a harvest operation has completed.
139
   * Calculates the date of the next harvest based on today's date and the 
140
   * update frequency.
141
   */
142
  private void dbUpdateHarvestDates() {
143
		Connection conn;
144
    long currentTime;                    // Current time in milliseconds
145
    Date dateNextHarvest;                // Date of next harvest
146
    String lastHarvest;
147
    String nextHarvest;
148
    Date now = new Date();
149
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
150
		Statement stmt;
151
    long timeNextHarvest;
152
    
153
    conn = harvester.conn;
154
    now = new Date();
155
    currentTime = now.getTime();
156
    timeNextHarvest = currentTime + delta;
157
    dateNextHarvest = new Date(timeNextHarvest);
158
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
159
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
160
	
161
		try {
162
			stmt = conn.createStatement();							
163
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
164
                         nextHarvest +
165
                         " WHERE SITE_SCHEDULE_ID = " +
166
                         siteScheduleID);
167
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
168
                         lastHarvest +
169
                         " WHERE SITE_SCHEDULE_ID = " +
170
                         siteScheduleID);
171
			stmt.close();
172
		}
173
    catch(SQLException e) {
174
			System.out.println("SQLException: " + e.getMessage());
175
		}
176
  }
177
    
178

    
179
  /**
180
   * Boolean to determine whether this site is currently due for its next
181
   * harvest.
182
   * 
183
   * @retrun     true if due for harvest, otherwise false
184
   */
185
  private boolean dueForHarvest() {
186
    boolean dueForHarvest = false;
187
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
188
    Date now = new Date();
189
    Date dnh;                          // Date of next harvest
190
    long currentTime = now.getTime();  // Current time in milliseconds
191
    long timeNextHarvest = 0;
192
    
193
    try {
194
      dnh = dateFormat.parse(dateNextHarvest);
195
      timeNextHarvest = dnh.getTime();
196
      
197
      if (timeNextHarvest < currentTime) {
198
        dueForHarvest = true;
199
        System.out.println("Due for harvest: " + documentListURL);
200
      }
201
      else {
202
        System.out.println("Not due for harvest: " + documentListURL);
203
      }
204
    }
205
    catch (ParseException e) {
206
      System.out.println("Error parsing date: " + e.getMessage());
207
    }
208
    
209
    return dueForHarvest;
210
  }
211

    
212

    
213
  /**
214
   * Harvests each document in the site document list.
215
   * 
216
   * @throws SAXException
217
   * @throws IOException
218
   * @throws ParserConfigurationException
219
   */
220
  public void harvestDocumentList() {
221
    HarvestDocument harvestDocument;
222
    boolean success;
223
    
224
    if (dueForHarvest()) {
225
      try {
226
        success = parseDocumentList();
227

    
228
        /* If the document list was validated, then proceed with harvesting
229
         * the documents
230
         */
231
        if (success) {
232
          metacatLogin();
233
        
234
          for (int i = 0; i < harvestDocumentList.size(); i++) {
235
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
236
          
237
            if (harvestDocument != null) {
238
              harvestDocument.harvestDocument();
239
            }
240
          }
241

    
242
          metacatLogout();      
243
          dbUpdateHarvestDates();  // Update the schedule
244
        }
245
      }
246
      catch (ParserConfigurationException e) {
247
        System.out.println("ParserConfigurationException: " + e.getMessage());
248
      }
249
      
250
      reportToSite();
251
    }
252
  }
253

    
254

    
255
  /**
256
   * Login to Metacat using the ldapDN and ldapPwd
257
   */
258
  private void metacatLogin() {
259
    Metacat metacat = harvester.metacat;
260

    
261
    if (harvester.connectToMetacat()) {
262
      try {
263
        System.out.println("Logging in to Metacat: " + ldapDN);
264
        metacat.login(ldapDN, ldapPwd);
265
        //System.out.println("Metacat login response: " + response);
266
        //sessionId = metacat.getSessionId();
267
        //System.out.println("Session ID: " + sessionId);
268
      } 
269
      catch (MetacatInaccessibleException e) {
270
        System.out.println("Metacat login failed." + e.getMessage());
271
      } 
272
      catch (Exception e) {
273
        System.out.println("Metacat login failed." + e.getMessage());
274
      }
275
    }    
276
  }
277
  
278
  
279
  /**
280
   * Logout from Metacat
281
   */
282
  private void metacatLogout() {
283
    Metacat metacat = harvester.metacat;
284

    
285
    if (harvester.connectToMetacat()) {
286
      try {    
287
        // Log out from the Metacat session
288
        System.out.println("Logging out from Metacat");
289
        metacat.logout();
290
      }
291
      catch (MetacatInaccessibleException e) {
292
        System.out.println("Metacat inaccessible: " + e.getMessage());
293
      }
294
      catch (MetacatException e) {
295
        System.out.println("Metacat exception: " + e.getMessage());
296
      }
297
    }
298
  }
299
  
300

    
301
  /**
302
   * Parse the site document list to find out which documents to harvest.
303
   * 
304
   * @return  true if successful, otherwise false
305
   */
306
  private boolean parseDocumentList() 
307
          throws ParserConfigurationException {
308
    DocumentListHandler documentListHandler = new DocumentListHandler();
309
    InputStream inputStream;
310
    InputStreamReader inputStreamReader;
311
    String schemaLocation = 
312
    "eml://ecoinformatics.org/harvestList ../../lib/harvester/harvestList.xsd";
313
    boolean success = false;
314
    URL url;
315

    
316
    try {
317
      url = new URL(documentListURL);
318
      inputStream = url.openStream();
319
      harvester.addLogEntry(0,
320
                            "Retrieved: " + documentListURL,
321
                            "GetDocListSuccess",
322
                            siteScheduleID,
323
                            null,
324
                            "");
325
      inputStreamReader = new InputStreamReader(inputStream);
326
      documentListHandler.runParser(inputStreamReader, schemaLocation);
327
      harvester.addLogEntry(0,
328
                            "Validated: " + documentListURL,
329
                            "ValidateDocListSuccess",
330
                            siteScheduleID,
331
                            null,
332
                            "");
333
      success = true;
334
    }
335
    catch (MalformedURLException e){
336
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
337
                            "GetDocListError", siteScheduleID, null, "");
338
    }
339
    catch (FileNotFoundException e) {
340
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
341
                            "GetDocListError", siteScheduleID, null, "");
342
    }
343
    catch (SAXException e) {
344
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
345
                            "ValidateDocListError", siteScheduleID, null, "");
346
    }
347
    catch (ClassNotFoundException e) {
348
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
349
                            "ValidateDocListError", siteScheduleID, null, "");
350
    }
351
    catch (IOException e) {
352
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
353
                            "GetDocListError", siteScheduleID, null, "");
354
    }
355
    
356
    return success;
357
  }
358

    
359

    
360
  /**
361
   * Prints the data that is stored in this HarvestSiteSchedule object.
362
   * 
363
   * @param out   the PrintStream to write to
364
   */
365
  void printOutput(PrintStream out) {
366
    out.println("* siteScheduleID:       " + siteScheduleID);
367
    out.println("* documentListURL:      " + documentListURL);
368
    out.println("* ldapDN:               " + ldapDN);
369
    out.println("* dateNextHarvest:      " + dateNextHarvest);
370
    out.println("* dateLastHarvest:      " + dateLastHarvest);
371
    out.println("* updateFrequency:      " + updateFrequency);
372
    out.println("* unit:                 " + unit);
373
    out.println("* contactEmail:         " + contactEmail);
374
  }
375
  
376

    
377
  /**
378
   * Sends a report to the site summarizing the results of the harvest at
379
   * that site.
380
   */
381
  void reportToSite() {
382
    PrintStream body;
383
    String from = harvester.harvesterAdministrator;
384
    String maxCodeLevel = "info";
385
    MailMessage msg;
386
    int nErrors = 0;
387
    String subject = "Report from Metacat Harvester: " + harvester.timestamp;
388
    String to = contactEmail;
389
    
390
    if (!to.equals("")) {
391
      System.out.println("Sending report to siteScheduleID=" + siteScheduleID +
392
                         " at address: " + contactEmail);
393
      try {
394
        msg = new MailMessage(harvester.smtpServer);
395
        msg.from(from);
396
        msg.to(to);
397
        msg.setSubject(subject);
398
        body = msg.getPrintStream();
399
        harvester.printHarvestLog(body, maxCodeLevel, siteScheduleID);
400
        msg.sendAndClose();        
401
      }
402
      catch (IOException e) {
403
        System.out.println("There was a problem sending email to " + to);
404
        System.out.println("IOException: " + e.getMessage());
405
      }
406
    }
407
  }
408
    
409

    
410
  /**
411
   * This inner class extends DefaultHandler. It parses the document list,
412
   * creating a new HarvestDocument object every time it finds a </Document>
413
   * end tag.
414
   */
415
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
416
  
417
    public String scope;
418
    public int identifier;
419
    public String identifierString;
420
    public String documentType;
421
    public int revision;
422
    public String revisionString;
423
    public String documentURL;
424
    private String currentQname;
425
    public final static String DEFAULT_PARSER = 
426
           "org.apache.xerces.parsers.SAXParser";
427
    private boolean schemaValidate = true;
428
	
429

    
430
	  /**
431
     * This method is called for any plain text within an element.
432
     * It parses the value for any of the following elements:
433
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
434
     * 
435
     * @param ch          the character array holding the parsed text
436
     * @param start       the start index
437
     * @param length      the text length
438
     * 
439
     */
440
    public void characters (char ch[], int start, int length) {
441
      String s = new String(ch, start, length);
442
 
443
      if (length > 0) {           
444
        if (currentQname.equals("scope")) {
445
          scope += s;
446
        }
447
        else if (currentQname.equals("identifier")) {
448
          identifierString += s;
449
        }
450
        else if (currentQname.equals("revision")) {
451
          revisionString += s;
452
        }
453
        else if (currentQname.equals("documentType")) {
454
          documentType += s;
455
        }
456
        else if (currentQname.equals("documentURL")) {
457
          documentURL += s;
458
        }
459
      }
460
    }
461

    
462

    
463
    /** 
464
     * Handles an end-of-document event.
465
     */
466
    public void endDocument () {
467
      System.out.println("Finished parsing " + documentListURL);
468
    }
469

    
470

    
471
    /** 
472
     * Handles an end-of-element event. If the end tag is </Document>, then
473
     * creates a new HarvestDocument object and pushes it to the document
474
     * list.
475
     * 
476
     * @param uri
477
     * @param localname
478
     * @param qname
479
     */
480
    public void endElement(String uri, 
481
                           String localname,
482
                           String qname) {
483
      
484
      HarvestDocument harvestDocument;
485
      
486
      if (qname.equals("identifier")) {
487
        identifier = Integer.parseInt(identifierString);
488
      }
489
      else if (qname.equals("revision")) {
490
        revision = Integer.parseInt(revisionString);
491
      }
492
      else if (qname.equals("document")) {
493
        harvestDocument = new HarvestDocument(
494
                                              harvester,
495
                                              HarvestSiteSchedule.this,
496
                                              scope,
497
                                              identifier,
498
                                              revision,
499
                                              documentType,
500
                                              documentURL
501
                                             );
502
        harvestDocumentList.add(harvestDocument);
503
      }
504

    
505
      currentQname = "";
506
    }
507

    
508

    
509
    /**
510
     * Method for handling errors during a parse
511
     *
512
     * @param exception         The parsing error
513
     * @exception SAXException  Description of Exception
514
     */
515
     public void error(SAXParseException e) throws SAXParseException {
516
        System.out.println("SAXParseException: " + e.getMessage());
517
        throw e;
518
    }
519

    
520

    
521
    /**
522
     * Run the validating parser
523
     *
524
     * @param xml             the xml stream to be validated
525
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
526
     * @exception IOException thrown when test files can't be opened
527
     * @exception ClassNotFoundException thrown when SAX Parser class not found
528
     * @exception SAXException
529
     * @exception SAXParserException
530
     */
531
    public void runParser(Reader xml, String schemaLocation)
532
           throws IOException, ClassNotFoundException,
533
                  SAXException, SAXParseException {
534

    
535
      // Get an instance of the parser
536
      XMLReader parser;
537

    
538
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
539
      // Set Handlers in the parser
540
      parser.setContentHandler((ContentHandler)this);
541
      parser.setErrorHandler((ErrorHandler)this);
542
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
543
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
544
      parser.setFeature("http://xml.org/sax/features/validation", true);
545
      parser.setProperty(
546
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
547
              schemaLocation);
548

    
549
      if (schemaValidate) {
550
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
551
                          true);
552
      }
553
    
554
      // Parse the document
555
      parser.parse(new InputSource(xml));
556
    }
557
    /**
558
     * Handles a start-of-document event.
559
     */
560
    public void startDocument () {
561
      System.out.println("Started parsing " + documentListURL);
562
    }
563

    
564

    
565
    /** 
566
     * Handles a start-of-element event.
567
     * 
568
     * @param uri
569
     * @param localname
570
     * @param qname
571
     * @param attributes
572
     */
573
    public void startElement(String uri, 
574
                             String localname,
575
                             String qname,
576
                             Attributes attributes) {
577
      
578
      currentQname = qname;
579

    
580
      if (qname.equals("scope")) {
581
        scope = "";
582
      }
583
      else if (qname.equals("identifier")) {
584
        identifierString = "";
585
      }
586
      else if (qname.equals("revision")) {
587
        revisionString = "";
588
      }
589
      else if (qname.equals("documentType")) {
590
        documentType = "";
591
      }
592
      else if (qname.equals("documentURL")) {
593
        documentURL = "";
594
      }
595
    }
596
  }
597
}
(4-4/9)