Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: jones $'
7
 *     '$Date: 2004-04-01 16:41:58 -0800 (Thu, 01 Apr 2004) $'
8
 * '$Revision: 2094 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.FileNotFoundException;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32
import java.io.PrintStream;
33
import java.io.Reader;
34
import java.net.MalformedURLException;
35
import java.net.URL;
36
import java.sql.Connection;
37
import java.sql.SQLException;
38
import java.sql.Statement;
39
import java.text.DateFormat;
40
import java.text.ParseException;
41
import java.text.SimpleDateFormat;
42
import java.util.ArrayList;
43
import java.util.Date;
44
import javax.xml.parsers.ParserConfigurationException;
45
import org.xml.sax.Attributes;
46
import org.xml.sax.ContentHandler;
47
import org.xml.sax.ErrorHandler;
48
import org.xml.sax.InputSource;
49
import org.xml.sax.SAXException;
50
import org.xml.sax.SAXParseException;
51
import org.xml.sax.XMLReader;
52
import org.xml.sax.helpers.DefaultHandler;
53
import org.xml.sax.helpers.XMLReaderFactory;
54

    
55
import edu.ucsb.nceas.metacat.client.Metacat;
56
import edu.ucsb.nceas.metacat.client.MetacatException;
57
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
58

    
59

    
60
/**
61
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
62
 * table, determining when and how to harvest the documents for a given site.
63
 * 
64
 * @author  costa
65
 */
66
class HarvestSiteSchedule {
67
    
68
  private String contactEmail;
69
  private String dateLastHarvest;
70
  private String dateNextHarvest;
71
  private long delta;
72
  private String documentListURL;
73
  private Harvester harvester;
74
  private ArrayList harvestDocumentList = new ArrayList();
75
  private String harvestSiteEndTime;
76
  private String harvestSiteStartTime;
77
  private String ldapDN;
78
  private String ldapPwd;
79
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
80
  int siteScheduleID;
81
  private String unit;
82
  private int updateFrequency;
83
    
84
  /**
85
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
86
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
87
   * 
88
   * @param harvester       the parent Harvester object
89
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
90
   * @param documentListURL the value of the DOCUMENTLISTURL field
91
   * @param ldapDN          the value of the LDAPDN field
92
   * @param ldapPwd    the value of the LDAPPASSWORD field
93
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
94
   * @param dateLastHarvest the value of the DATELASTHARVEST field
95
   * @param updateFrequency the value of the UPDATEFREQUENCY field
96
   * @param unit            the value of the UNIT field
97
   * @param contactEmail    the value of the CONTACT_EMAIL field
98
   */
99
  public HarvestSiteSchedule(
100
                              Harvester harvester,
101
                              int    siteScheduleID,
102
                              String documentListURL,
103
                              String ldapDN,
104
                              String ldapPwd,
105
                              String dateNextHarvest,
106
                              String dateLastHarvest,
107
                              int    updateFrequency,
108
                              String unit,
109
                              String contactEmail
110
                            )
111
  {
112
    this.harvester = harvester;
113
    this.siteScheduleID = siteScheduleID;
114
    this.documentListURL = documentListURL;
115
    this.ldapDN = ldapDN;
116
    this.ldapPwd = ldapPwd;
117
    this.dateNextHarvest = dateNextHarvest;
118
    this.dateLastHarvest = dateLastHarvest;
119
    this.updateFrequency = updateFrequency;
120
    this.unit = unit;
121
    this.contactEmail = contactEmail;
122
    
123
    // Calculate the value of delta, the number of milliseconds between the
124
    // last harvest date and the next harvest date.
125
    delta = updateFrequency * millisecondsPerDay;
126
    
127
    if (unit.equals("weeks")) {
128
      delta *= 7;
129
    }
130
    else if (unit.equals("months")) {
131
      delta *= 30;
132
    }
133
  }
134
  
135
  
136
  /**
137
   * Updates the DATELASTHARVEST and DATENEXTHARVEST values of the 
138
   * HARVEST_SITE_SCHEDULE table after a harvest operation has completed.
139
   * Calculates the date of the next harvest based on today's date and the 
140
   * update frequency.
141
   */
142
  private void dbUpdateHarvestDates() {
143
		Connection conn;
144
    long currentTime;                    // Current time in milliseconds
145
    Date dateNextHarvest;                // Date of next harvest
146
    String lastHarvest;
147
    String nextHarvest;
148
    Date now = new Date();
149
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
150
		Statement stmt;
151
    long timeNextHarvest;
152
    
153
    conn = harvester.conn;
154
    now = new Date();
155
    currentTime = now.getTime();
156
    timeNextHarvest = currentTime + delta;
157
    dateNextHarvest = new Date(timeNextHarvest);
158
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
159
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
160
	
161
		try {
162
			stmt = conn.createStatement();							
163
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
164
                         nextHarvest +
165
                         " WHERE SITE_SCHEDULE_ID = " +
166
                         siteScheduleID);
167
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
168
                         lastHarvest +
169
                         " WHERE SITE_SCHEDULE_ID = " +
170
                         siteScheduleID);
171
			stmt.close();
172
		}
173
    catch(SQLException e) {
174
			System.out.println("SQLException: " + e.getMessage());
175
		}
176
  }
177
    
178

    
179
  /**
180
   * Boolean to determine whether this site is currently due for its next
181
   * harvest.
182
   * 
183
   * @retrun     true if due for harvest, otherwise false
184
   */
185
  private boolean dueForHarvest() {
186
    boolean dueForHarvest = false;
187
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
188
    Date now = new Date();
189
    Date dnh;                          // Date of next harvest
190
    long currentTime = now.getTime();  // Current time in milliseconds
191
    long timeNextHarvest = 0;
192
    
193
    try {
194
      dnh = dateFormat.parse(dateNextHarvest);
195
      timeNextHarvest = dnh.getTime();
196
      
197
      if (timeNextHarvest < currentTime) {
198
        dueForHarvest = true;
199
        System.out.println("Due for harvest: " + documentListURL);
200
      }
201
      else {
202
        System.out.println("Not due for harvest: " + documentListURL);
203
      }
204
    }
205
    catch (ParseException e) {
206
      System.out.println("Error parsing date: " + e.getMessage());
207
    }
208
    
209
    return dueForHarvest;
210
  }
211

    
212

    
213
  /**
214
   * Harvests each document in the site document list.
215
   * 
216
   * @throws SAXException
217
   * @throws IOException
218
   * @throws ParserConfigurationException
219
   */
220
  public void harvestDocumentList() {
221
    HarvestDocument harvestDocument;
222
    boolean success;
223
    
224
    if (dueForHarvest()) {
225
      try {
226
        success = parseDocumentList();
227

    
228
        /* If the document list was validated, then proceed with harvesting
229
         * the documents
230
         */
231
        if (success) {
232
          metacatLogin();
233
        
234
          for (int i = 0; i < harvestDocumentList.size(); i++) {
235
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
236
          
237
            if (harvestDocument != null) {
238
              harvestDocument.harvestDocument();
239
            }
240
          }
241

    
242
          metacatLogout();      
243
          dbUpdateHarvestDates();  // Update the schedule
244
        }
245
      }
246
      catch (ParserConfigurationException e) {
247
        System.out.println("ParserConfigurationException: " + e.getMessage());
248
      }
249
      
250
      reportToSite();
251
    }
252
  }
253

    
254

    
255
  /**
256
   * Login to Metacat using the ldapDN and ldapPwd
257
   */
258
  private void metacatLogin() {
259
    Metacat metacat = harvester.metacat;
260

    
261
    if (harvester.connectToMetacat()) {
262
      try {
263
        System.out.println("Logging in to Metacat: " + ldapDN);
264
        metacat.login(ldapDN, ldapPwd);
265
        //System.out.println("Metacat login response: " + response);
266
        //sessionId = metacat.getSessionId();
267
        //System.out.println("Session ID: " + sessionId);
268
      } 
269
      catch (MetacatInaccessibleException e) {
270
        System.out.println("Metacat login failed." + e.getMessage());
271
      } 
272
      catch (Exception e) {
273
        System.out.println("Metacat login failed." + e.getMessage());
274
      }
275
    }    
276
  }
277
  
278
  
279
  /**
280
   * Logout from Metacat
281
   */
282
  private void metacatLogout() {
283
    Metacat metacat = harvester.metacat;
284

    
285
    if (harvester.connectToMetacat()) {
286
      try {    
287
        // Log out from the Metacat session
288
        System.out.println("Logging out from Metacat");
289
        metacat.logout();
290
      }
291
      catch (MetacatInaccessibleException e) {
292
        System.out.println("Metacat inaccessible: " + e.getMessage());
293
      }
294
      catch (MetacatException e) {
295
        System.out.println("Metacat exception: " + e.getMessage());
296
      }
297
    }
298
  }
299
  
300

    
301
  /**
302
   * Parse the site document list to find out which documents to harvest.
303
   * 
304
   * @return  true if successful, otherwise false
305
   */
306
  private boolean parseDocumentList() 
307
          throws ParserConfigurationException {
308
    DocumentListHandler documentListHandler = new DocumentListHandler();
309
    InputStream inputStream;
310
    InputStreamReader inputStreamReader;
311
    String schemaLocation = ".";
312
    boolean success = false;
313
    URL url;
314

    
315
    try {
316
      url = new URL(documentListURL);
317
      inputStream = url.openStream();
318
      harvester.addLogEntry(0,
319
                            "Retrieved: " + documentListURL,
320
                            "GetDocListSuccess",
321
                            siteScheduleID,
322
                            null,
323
                            "");
324
      inputStreamReader = new InputStreamReader(inputStream);
325
      documentListHandler.runParser(inputStreamReader, schemaLocation);
326
      harvester.addLogEntry(0,
327
                            "Validated: " + documentListURL,
328
                            "ValidateDocListSuccess",
329
                            siteScheduleID,
330
                            null,
331
                            "");
332
      success = true;
333
    }
334
    catch (MalformedURLException e){
335
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
336
                            "GetDocListError", siteScheduleID, null, "");
337
    }
338
    catch (FileNotFoundException e) {
339
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
340
                            "GetDocListError", siteScheduleID, null, "");
341
    }
342
    catch (SAXException e) {
343
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
344
                            "ValidateDocListError", siteScheduleID, null, "");
345
    }
346
    catch (ClassNotFoundException e) {
347
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
348
                            "ValidateDocListError", siteScheduleID, null, "");
349
    }
350
    catch (IOException e) {
351
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
352
                            "GetDocListError", siteScheduleID, null, "");
353
    }
354
    
355
    return success;
356
  }
357

    
358

    
359
  /**
360
   * Prints the data that is stored in this HarvestSiteSchedule object.
361
   * 
362
   * @param out   the PrintStream to write to
363
   */
364
  void printOutput(PrintStream out) {
365
    out.println("* siteScheduleID:       " + siteScheduleID);
366
    out.println("* documentListURL:      " + documentListURL);
367
    out.println("* ldapDN:               " + ldapDN);
368
    out.println("* dateNextHarvest:      " + dateNextHarvest);
369
    out.println("* dateLastHarvest:      " + dateLastHarvest);
370
    out.println("* updateFrequency:      " + updateFrequency);
371
    out.println("* unit:                 " + unit);
372
    out.println("* contactEmail:         " + contactEmail);
373
  }
374
  
375

    
376
  /**
377
   * Sends a report to the site summarizing the results of the harvest
378
   * operation.
379
   */
380
  void reportToSite() {
381
    PrintStream body;
382
    String from = "Metacat Harvester";
383
    MailMessage msg;
384
    String subject = "Report from Metacat Harvester";
385
    String to = contactEmail;
386
    
387
    if (!to.equals("")) {
388
      System.out.println("Sending report to siteScheduleID=" + siteScheduleID +
389
                         " at address: " + contactEmail);
390
      
391
      try {
392
        msg = new MailMessage();
393
        msg.from(from);
394
        msg.to(to);
395
        msg.setSubject(subject);
396
        body = msg.getPrintStream();
397
        
398
      }
399
      catch (IOException e) {
400
        System.out.println("There was a problem sending email to " + to);
401
        System.out.println("IOException: " + e.getMessage());
402
      }
403
      
404
    }
405
  }
406
    
407

    
408
  /**
409
   * This inner class extends DefaultHandler. It parses the document list,
410
   * creating a new HarvestDocument object every time it finds a </Document>
411
   * end tag.
412
   */
413
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
414
  
415
    public String scope;
416
    public int identifier;
417
    public String identifierString;
418
    public String documentType;
419
    public int revision;
420
    public String revisionString;
421
    public String documentURL;
422
    private String currentQname;
423
    public final static String DEFAULT_PARSER = 
424
           "org.apache.xerces.parsers.SAXParser";
425
    private boolean schemaValidate = true;
426
	
427

    
428
	  /**
429
     * This method is called for any plain text within an element.
430
     * It parses the value for any of the following elements:
431
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
432
     * 
433
     * @param ch          the character array holding the parsed text
434
     * @param start       the start index
435
     * @param length      the text length
436
     * 
437
     */
438
    public void characters (char ch[], int start, int length) {
439
      String s = new String(ch, start, length);
440
 
441
      if (length > 0) {           
442
        if (currentQname.equals("scope")) {
443
          scope += s;
444
        }
445
        else if (currentQname.equals("identifier")) {
446
          identifierString += s;
447
        }
448
        else if (currentQname.equals("revision")) {
449
          revisionString += s;
450
        }
451
        else if (currentQname.equals("documentType")) {
452
          documentType += s;
453
        }
454
        else if (currentQname.equals("documentURL")) {
455
          documentURL += s;
456
        }
457
      }
458
    }
459

    
460

    
461
    /** 
462
     * Handles an end-of-document event.
463
     */
464
    public void endDocument () {
465
      System.out.println("Finished parsing " + documentListURL);
466
    }
467

    
468

    
469
    /** 
470
     * Handles an end-of-element event. If the end tag is </Document>, then
471
     * creates a new HarvestDocument object and pushes it to the document
472
     * list.
473
     * 
474
     * @param uri
475
     * @param localname
476
     * @param qname
477
     */
478
    public void endElement(String uri, 
479
                           String localname,
480
                           String qname) {
481
      
482
      HarvestDocument harvestDocument;
483
      
484
      if (qname.equals("identifier")) {
485
        identifier = Integer.parseInt(identifierString);
486
      }
487
      else if (qname.equals("revision")) {
488
        revision = Integer.parseInt(revisionString);
489
      }
490
      else if (qname.equals("document")) {
491
        harvestDocument = new HarvestDocument(
492
                                              harvester,
493
                                              HarvestSiteSchedule.this,
494
                                              scope,
495
                                              identifier,
496
                                              revision,
497
                                              documentType,
498
                                              documentURL
499
                                             );
500
        harvestDocumentList.add(harvestDocument);
501
      }
502

    
503
      currentQname = "";
504
    }
505

    
506

    
507
    /**
508
     * Method for handling errors during a parse
509
     *
510
     * @param exception         The parsing error
511
     * @exception SAXException  Description of Exception
512
     */
513
     public void error(SAXParseException e) throws SAXParseException {
514
        System.out.println("SAXParseException: " + e.getMessage());
515
        throw e;
516
    }
517

    
518

    
519
    /**
520
     * Run the validating parser
521
     *
522
     * @param xml             the xml stream to be validated
523
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
524
     * @exception IOException thrown when test files can't be opened
525
     * @exception ClassNotFoundException thrown when SAX Parser class not found
526
     * @exception SAXException
527
     * @exception SAXParserException
528
     */
529
    public void runParser(Reader xml, String schemaLocation)
530
           throws IOException, ClassNotFoundException,
531
                  SAXException, SAXParseException {
532

    
533
      // Get an instance of the parser
534
      XMLReader parser;
535

    
536
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
537
      // Set Handlers in the parser
538
      parser.setContentHandler((ContentHandler)this);
539
      parser.setErrorHandler((ErrorHandler)this);
540
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
541
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
542
      parser.setFeature("http://xml.org/sax/features/validation", true);
543
      parser.setProperty(
544
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
545
              schemaLocation);
546

    
547
      if (schemaValidate) {
548
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
549
                          true);
550
      }
551
    
552
      // Parse the document
553
      parser.parse(new InputSource(xml));
554
    }
555
    /**
556
     * Handles a start-of-document event.
557
     */
558
    public void startDocument () {
559
      System.out.println("Started parsing " + documentListURL);
560
    }
561

    
562

    
563
    /** 
564
     * Handles a start-of-element event.
565
     * 
566
     * @param uri
567
     * @param localname
568
     * @param qname
569
     * @param attributes
570
     */
571
    public void startElement(String uri, 
572
                             String localname,
573
                             String qname,
574
                             Attributes attributes) {
575
      
576
      currentQname = qname;
577

    
578
      if (qname.equals("scope")) {
579
        scope = "";
580
      }
581
      else if (qname.equals("identifier")) {
582
        identifierString = "";
583
      }
584
      else if (qname.equals("revision")) {
585
        revisionString = "";
586
      }
587
      else if (qname.equals("documentType")) {
588
        documentType = "";
589
      }
590
      else if (qname.equals("documentURL")) {
591
        documentURL = "";
592
      }
593
    }
594
  }
595
}
(4-4/9)