Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: leinfelder $'
7
 *     '$Date: 2010-07-30 16:11:29 -0700 (Fri, 30 Jul 2010) $'
8
 * '$Revision: 5463 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.FileNotFoundException;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32
import java.io.PrintStream;
33
import java.io.Reader;
34
import java.net.MalformedURLException;
35
import java.net.URL;
36
import java.sql.Connection;
37
import java.sql.SQLException;
38
import java.sql.Statement;
39
import java.text.DateFormat;
40
import java.text.ParseException;
41
import java.text.SimpleDateFormat;
42
import java.util.ArrayList;
43
import java.util.Date;
44
import javax.xml.parsers.ParserConfigurationException;
45
import org.xml.sax.Attributes;
46
import org.xml.sax.ContentHandler;
47
import org.xml.sax.ErrorHandler;
48
import org.xml.sax.InputSource;
49
import org.xml.sax.SAXException;
50
import org.xml.sax.SAXParseException;
51
import org.xml.sax.XMLReader;
52
import org.xml.sax.helpers.DefaultHandler;
53
import org.xml.sax.helpers.XMLReaderFactory;
54

    
55
import edu.ucsb.nceas.metacat.client.Metacat;
56
import edu.ucsb.nceas.metacat.client.MetacatException;
57
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
58

    
59

    
60
/**
61
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
62
 * table, determining when and how to harvest the documents for a given site.
63
 * 
64
 * @author  costa
65
 */
66
public class HarvestSiteSchedule {
67
    
68
  private String contactEmail;
69
  private String dateLastHarvest;
70
  private String dateNextHarvest;
71
  private long delta;
72
  private String documentListURL;
73
  private Harvester harvester;
74
  private ArrayList harvestDocumentList = new ArrayList();
75
  private String harvestSiteEndTime;
76
  private String harvestSiteStartTime;
77
  private String ldapDN;
78
  private String ldapPwd;
79
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
80
  private String schemaLocation = 
81
    "eml://ecoinformatics.org/harvestList ../../lib/harvester/harvestList.xsd";
82
  int siteScheduleID;
83
  private String unit;
84
  private int updateFrequency;
85
    
86
  /**
87
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
88
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
89
   * 
90
   * @param harvester       the parent Harvester object
91
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
92
   * @param documentListURL the value of the DOCUMENTLISTURL field
93
   * @param ldapDN          the value of the LDAPDN field
94
   * @param ldapPwd    the value of the LDAPPASSWORD field
95
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
96
   * @param dateLastHarvest the value of the DATELASTHARVEST field
97
   * @param updateFrequency the value of the UPDATEFREQUENCY field
98
   * @param unit            the value of the UNIT field
99
   * @param contactEmail    the value of the CONTACT_EMAIL field
100
   */
101
  public HarvestSiteSchedule(
102
                              Harvester harvester,
103
                              int    siteScheduleID,
104
                              String documentListURL,
105
                              String ldapDN,
106
                              String ldapPwd,
107
                              String dateNextHarvest,
108
                              String dateLastHarvest,
109
                              int    updateFrequency,
110
                              String unit,
111
                              String contactEmail
112
                            )
113
  {
114
    this.harvester = harvester;
115
    this.siteScheduleID = siteScheduleID;
116
    this.documentListURL = documentListURL;
117
    this.ldapDN = ldapDN;
118
    this.ldapPwd = ldapPwd;
119
    this.dateNextHarvest = dateNextHarvest;
120
    this.dateLastHarvest = dateLastHarvest;
121
    this.updateFrequency = updateFrequency;
122
    this.unit = unit;
123
    this.contactEmail = contactEmail;
124
    
125
    // Calculate the value of delta, the number of milliseconds between the
126
    // last harvest date and the next harvest date.
127
    delta = updateFrequency * millisecondsPerDay;
128
    
129
    if (unit.equals("weeks")) {
130
      delta *= 7;
131
    }
132
    else if (unit.equals("months")) {
133
      delta *= 30;
134
    }
135
  }
136
  
137
  
138
  /**
139
   * Updates the DATELASTHARVEST and DATENEXTHARVEST values of the 
140
   * HARVEST_SITE_SCHEDULE table after a harvest operation has completed.
141
   * Calculates the date of the next harvest based on today's date and the 
142
   * update frequency.
143
   */
144
  private void dbUpdateHarvestDates() {
145
    Connection conn;
146
    long currentTime;                    // Current time in milliseconds
147
    Date dateNextHarvest;                // Date of next harvest
148
    String lastHarvest;
149
    String nextHarvest;
150
    Date now = new Date();
151
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
152
    Statement stmt;
153
    long timeNextHarvest;
154
    
155
    conn = harvester.getConnection();
156
    now = new Date();
157
    currentTime = now.getTime();
158
    timeNextHarvest = currentTime + delta;
159
    dateNextHarvest = new Date(timeNextHarvest);
160
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
161
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
162
	
163
	try {
164
      stmt = conn.createStatement();
165
      stmt.executeUpdate(
166
                         "UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
167
                         nextHarvest +
168
                         " WHERE SITE_SCHEDULE_ID = " +
169
                         siteScheduleID);
170
      stmt.executeUpdate(
171
                         "UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
172
                         lastHarvest +
173
                         " WHERE SITE_SCHEDULE_ID = " +
174
                         siteScheduleID);
175
      stmt.close();
176
    }
177
    catch(SQLException e) {
178
      System.out.println("SQLException: " + e.getMessage());
179
    }
180
  }
181
    
182

    
183
  /**
184
   * Boolean to determine whether this site is currently due for its next
185
   * harvest.
186
   * 
187
   * @retrun     true if due for harvest, otherwise false
188
   */
189
  public boolean dueForHarvest() {
190
    boolean dueForHarvest = false;
191
//    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
192
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
193
    Date now = new Date();
194
    Date dnh;                          // Date of next harvest
195
    long currentTime = now.getTime();  // Current time in milliseconds
196
    long timeNextHarvest = 0;
197
    
198
    try {
199
      dnh = dateFormat.parse(dateNextHarvest);
200
      timeNextHarvest = dnh.getTime();
201
      
202
      if (timeNextHarvest < currentTime) {
203
        dueForHarvest = true;
204
        System.out.println("Due for harvest: " + documentListURL);
205
      }
206
      else {
207
        System.out.println("Not due for harvest: " + documentListURL);
208
      }
209
    }
210
    catch (ParseException e) {
211
      System.out.println("Error parsing date: " + e.getMessage());
212
    }
213
    
214
    return dueForHarvest;
215
  }
216
  
217

    
218
  /**
219
   * Accessor method for the schemaLocation field.
220
   * 
221
   * @return schemaLocation  the schema location string
222
   */
223
  public String getSchemaLocation() {
224
    return schemaLocation;
225
  }
226

    
227

    
228
  /**
229
   * Harvests each document in the site document list.
230
   * 
231
   * @throws SAXException
232
   * @throws IOException
233
   * @throws ParserConfigurationException
234
   */
235
  public void harvestDocumentList() {
236
    HarvestDocument harvestDocument;
237
    boolean success;
238
    
239
    if (dueForHarvest()) {
240
      try {
241
        success = parseHarvestList();
242

    
243
        /* If the document list was validated, then proceed with harvesting
244
         * the documents
245
         */
246
        if (success) {
247
          metacatLogin();
248
        
249
          for (int i = 0; i < harvestDocumentList.size(); i++) {
250
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
251
          
252
            if (harvestDocument != null) {
253
              harvestDocument.harvestDocument();
254
            }
255
          }
256

    
257
          metacatLogout();      
258
          dbUpdateHarvestDates();  // Update the schedule
259
        }
260
      }
261
      catch (ParserConfigurationException e) {
262
        System.out.println("ParserConfigurationException: " + e.getMessage());
263
      }
264
      
265
      reportToSiteContact();
266
    }
267
  }
268

    
269

    
270
  /**
271
   * Login to Metacat using the ldapDN and ldapPwd
272
   */
273
  public void metacatLogin() {
274
    Metacat metacat = harvester.metacat;
275
    String response;
276

    
277
    if (harvester.connectToMetacat()) {
278
      try {
279
        System.out.println("Logging in to Metacat: " + ldapDN);
280
        response = metacat.login(ldapDN, ldapPwd);
281
        //System.out.println("Metacat login response: " + response);
282
      } 
283
      catch (MetacatInaccessibleException e) {
284
        System.out.println("Metacat login failed." + e.getMessage());
285
      } 
286
      catch (Exception e) {
287
        System.out.println("Metacat login failed." + e.getMessage());
288
      }
289
    }    
290
  }
291
  
292
  
293
  /**
294
   * Logout from Metacat
295
   */
296
  private void metacatLogout() {
297
    Metacat metacat = harvester.metacat;
298

    
299
    if (harvester.connectToMetacat()) {
300
      try {    
301
        // Log out from the Metacat session
302
        System.out.println("Logging out from Metacat");
303
        metacat.logout();
304
      }
305
      catch (MetacatInaccessibleException e) {
306
        System.out.println("Metacat inaccessible: " + e.getMessage());
307
      }
308
      catch (MetacatException e) {
309
        System.out.println("Metacat exception: " + e.getMessage());
310
      }
311
    }
312
  }
313
  
314

    
315
  /**
316
   * Parses the site harvest list XML file to find out which documents to 
317
   * harvest.
318
   * 
319
   * @return  true if successful, otherwise false
320
   */
321
  public boolean parseHarvestList() 
322
          throws ParserConfigurationException {
323
    DocumentListHandler documentListHandler = new DocumentListHandler();
324
    InputStream inputStream;
325
    InputStreamReader inputStreamReader;
326
    String schemaLocation = getSchemaLocation();
327
    boolean success = false;
328
    URL url;
329

    
330
    try {
331
      url = new URL(documentListURL);
332
      inputStream = url.openStream();
333
      harvester.addLogEntry(0,
334
                            "Retrieved: " + documentListURL,
335
                            "harvester.GetHarvestListSuccess",
336
                            siteScheduleID,
337
                            null,
338
                            "");
339
      inputStreamReader = new InputStreamReader(inputStream);
340
//      char[] harvestListChars = new char[1024];
341
//      inputStreamReader.read(harvestListChars, 0, 1024);
342
//      System.out.println("documentListURL: " + documentListURL);
343
//      String encoding = inputStreamReader.getEncoding();
344
//      System.out.println("encoding: " + encoding);
345
//      String harvestListStr = new String(harvestListChars);
346
//      System.out.println("harvestListStr:\n" + harvestListStr);
347
      documentListHandler.runParser(inputStreamReader, schemaLocation);
348
      harvester.addLogEntry(0,
349
                            "Validated: " + documentListURL,
350
                            "harvester.ValidateHarvestListSuccess",
351
                            siteScheduleID,
352
                            null,
353
                            "");
354
      success = true;
355
    }
356
    catch (MalformedURLException e){
357
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
358
                            "harvester.GetHarvestListError", siteScheduleID, null, "");
359
    }
360
    catch (FileNotFoundException e) {
361
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
362
                            "harvester.GetHarvestListError", siteScheduleID, null, "");
363
    }
364
    catch (SAXException e) {
365
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
366
                          "harvester.ValidateHarvestListError", siteScheduleID, null, "");
367
    }
368
    catch (ClassNotFoundException e) {
369
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
370
                          "harvester.ValidateHarvestListError", siteScheduleID, null, "");
371
    }
372
    catch (IOException e) {
373
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
374
                            "harvester.GetHarvestListError", siteScheduleID, null, "");
375
    }
376
    
377
    return success;
378
  }
379

    
380

    
381
  /**
382
   * Prints the data that is stored in this HarvestSiteSchedule object.
383
   * 
384
   * @param out   the PrintStream to write to
385
   */
386
  public void printOutput(PrintStream out) {
387
    out.println("* siteScheduleID:       " + siteScheduleID);
388
    out.println("* documentListURL:      " + documentListURL);
389
    out.println("* ldapDN:               " + ldapDN);
390
    out.println("* dateNextHarvest:      " + dateNextHarvest);
391
    out.println("* dateLastHarvest:      " + dateLastHarvest);
392
    out.println("* updateFrequency:      " + updateFrequency);
393
    out.println("* unit:                 " + unit);
394
    out.println("* contactEmail:         " + contactEmail);
395
  }
396
  
397
  /**
398
   * Reports a summary of the site harvest. Includes the following:
399
   *   A list of documents that were successfully inserted.
400
   *   A list of documents that were successfully updated.
401
   *   A list of documents that could not be accessed at the site.
402
   *   A list of documents that could not be uploaded to Metacat.
403
   *   A list of documents that were already found in Metacat.
404
   *   
405
   * @param out  the PrintStream to write to
406
   */
407
  void printSiteSummary(PrintStream out) {
408
    HarvestDocument harvestDocument;
409
    int nAccessError = 0;
410
    int nInserted = 0;
411
    int nMetacatHasIt = 0;
412
    int nUpdated = 0;
413
    int nUploadError = 0;
414
    
415
    for (int i = 0; i < harvestDocumentList.size(); i++) {
416
      harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
417
          
418
      if (harvestDocument != null) {
419
        if (harvestDocument.accessError)  { nAccessError++; }
420
        if (harvestDocument.inserted)     { nInserted++; }
421
        if (harvestDocument.metacatHasIt) { nMetacatHasIt++; }
422
        if (harvestDocument.updated)      { nUpdated++; }
423
        if (harvestDocument.uploadError)  { nUploadError++; }
424
      }
425
    }
426
    
427
    if (nInserted > 0) {
428
      printSiteSummaryHeader(out);
429
      out.println("* The following document(s) were successfully inserted:");
430
      for (int i = 0; i < harvestDocumentList.size(); i++) {
431
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
432
        if (harvestDocument != null) {
433
          if (harvestDocument.inserted)  {
434
            harvestDocument.prettyPrint(out);
435
          }
436
        }
437
      }
438
      printSiteSummaryTrailer(out);
439
    }
440

    
441
    if (nUpdated > 0) {
442
      printSiteSummaryHeader(out);
443
      out.println("* The following document(s) were successfully updated:");
444
      for (int i = 0; i < harvestDocumentList.size(); i++) {
445
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
446
        if (harvestDocument != null) {
447
          if (harvestDocument.updated)  {
448
            harvestDocument.prettyPrint(out);
449
          }
450
        }
451
      }
452
      printSiteSummaryTrailer(out);
453
    }
454

    
455
    if (nAccessError > 0) {
456
      printSiteSummaryHeader(out);
457
      out.println("* The following document(s) could not be accessed");
458
      out.println("* at the site. Please check the URL to ensure that it is");
459
      out.println("* accessible at the site.");
460
      for (int i = 0; i < harvestDocumentList.size(); i++) {
461
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
462
        if (harvestDocument != null) {
463
          if (harvestDocument.accessError)  {
464
            harvestDocument.prettyPrint(out);
465
          }
466
        }
467
      }
468
      printSiteSummaryTrailer(out);
469
    }
470

    
471
    if (nUploadError > 0) {
472
      printSiteSummaryHeader(out);
473
      out.println("* The following document(s) could not be uploaded to");
474
      out.println("* Metacat because an error of some kind occurred.");
475
      out.println("* (See log entries below for additional details.) :");
476
      for (int i = 0; i < harvestDocumentList.size(); i++) {
477
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
478
        if (harvestDocument != null) {
479
          if (harvestDocument.uploadError)  {
480
            harvestDocument.prettyPrint(out);
481
          }
482
        }
483
      }
484
      printSiteSummaryTrailer(out);
485
    }
486

    
487
    if (nMetacatHasIt > 0) {
488
      printSiteSummaryHeader(out);
489
      out.println("* The following document(s) were already found in Metacat:");
490

    
491
      for (int i = 0; i < harvestDocumentList.size(); i++) {
492
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
493
        if (harvestDocument != null) {
494
          if (harvestDocument.metacatHasIt)  {
495
            harvestDocument.prettyPrint(out);
496
          }
497
        }
498
      }
499
      printSiteSummaryTrailer(out);
500
    }
501

    
502
  }
503
  
504

    
505
  /**
506
   * Prints the header lines of a site summary entry.
507
   * 
508
   * @param out    the PrintStream to write to
509
   */
510
  void printSiteSummaryHeader(PrintStream out) {
511
    final String filler = Harvester.filler;
512
    final String marker = Harvester.marker;
513

    
514
    out.println("");
515
    out.println(marker);
516
    out.println(filler);
517
  }
518
  
519

    
520
  /**
521
   * Prints the trailing lines of a site summary entry.
522
   * 
523
   * @param out    the PrintStream to write to
524
   */
525
  void printSiteSummaryTrailer(PrintStream out) {
526
    final String filler = Harvester.filler;
527
    final String marker = Harvester.marker;
528

    
529
    out.println(filler);
530
    out.println(marker);
531
  }
532
  
533

    
534
  /**
535
   * Sends a report to the Site Contact summarizing the results of the harvest 
536
   * at that site.
537
   */
538
  void reportToSiteContact() {
539
    PrintStream body;
540
    String from = harvester.harvesterAdministrator;
541
    String[] fromArray;
542
    String maxCodeLevel = "notice";
543
    MailMessage msg;
544
    int nErrors = 0;
545
    String subject = "Report from Metacat Harvester: " + harvester.timestamp;
546
    String to = contactEmail;
547
    String[] toArray;
548
    
549
    if (!to.equals("")) {
550
      System.out.println("Sending report to siteScheduleID=" + siteScheduleID +
551
                         " at address: " + contactEmail);
552
      try {
553
        msg = new MailMessage(harvester.smtpServer);
554
        
555
        if (from.indexOf(',') > 0) {
556
          fromArray = from.split(",");
557
          
558
          for (int i = 0; i < fromArray.length; i++) {
559
            if (i == 0) {
560
              msg.from(fromArray[i]);
561
            }
562
            
563
            msg.cc(fromArray[i]);
564
            
565
          }
566
        }
567
        else if (from.indexOf(';') > 0) {
568
          fromArray = from.split(";");
569

    
570
          for (int i = 0; i < fromArray.length; i++) {
571
            if (i == 0) {
572
              msg.from(fromArray[i]);
573
            }
574
            
575
            msg.cc(fromArray[i]);
576
            
577
          }
578
        }
579
        else {
580
          msg.from(from);
581
          msg.cc(from);
582
        }
583
        
584
        if (to.indexOf(',') > 0) {
585
          toArray = to.split(",");
586
          
587
          for (int i = 0; i < toArray.length; i++) {
588
            msg.to(toArray[i]);
589
          }
590
        }
591
        else if (to.indexOf(';') > 0) {
592
          toArray = to.split(";");
593
          
594
          for (int i = 0; i < toArray.length; i++) {
595
            msg.to(toArray[i]);
596
          }
597
        }
598
        else {
599
          msg.to(to);
600
        }
601
        
602
        msg.setSubject(subject);
603
        body = msg.getPrintStream();
604
        harvester.printHarvestHeader(body, siteScheduleID);
605
        printSiteSummary(body);
606
        harvester.printHarvestLog(body, maxCodeLevel, siteScheduleID);
607
        msg.sendAndClose();        
608
      }
609
      catch (IOException e) {
610
        System.out.println("There was a problem sending email to " + to);
611
        System.out.println("IOException: " + e.getMessage());
612
      }
613
    }
614
  }
615
    
616

    
617
  /**
618
   * Accessor method for setting the value of the schemaLocation field.
619
   * 
620
   * @param schemaLocation  the new value of the schemaLocation field
621
   */
622
  public void setSchemaLocation(String schemaLocation) {
623
    this.schemaLocation = schemaLocation;
624
  }
625

    
626

    
627
  /**
628
   * This inner class extends DefaultHandler. It parses the document list,
629
   * creating a new HarvestDocument object every time it finds a </Document>
630
   * end tag.
631
   */
632
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
633
  
634
    public String scope;
635
    public int identifier;
636
    public String identifierString;
637
    public String documentType;
638
    public int revision;
639
    public String revisionString;
640
    public String documentURL;
641
    private String currentQname;
642
    public final static String DEFAULT_PARSER = 
643
           "org.apache.xerces.parsers.SAXParser";
644
    private boolean schemaValidate = true;
645
	
646

    
647
	  /**
648
     * This method is called for any plain text within an element.
649
     * It parses the value for any of the following elements:
650
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
651
     * 
652
     * @param ch          the character array holding the parsed text
653
     * @param start       the start index
654
     * @param length      the text length
655
     * 
656
     */
657
    public void characters (char ch[], int start, int length) {
658
      String s = new String(ch, start, length);
659
 
660
      if (length > 0) {           
661
        if (currentQname.equals("scope")) {
662
          scope += s;
663
        }
664
        else if (currentQname.equals("identifier")) {
665
          identifierString += s;
666
        }
667
        else if (currentQname.equals("revision")) {
668
          revisionString += s;
669
        }
670
        else if (currentQname.equals("documentType")) {
671
          documentType += s;
672
        }
673
        else if (currentQname.equals("documentURL")) {
674
          documentURL += s;
675
        }
676
      }
677
    }
678

    
679

    
680
    /** 
681
     * Handles an end-of-document event.
682
     */
683
    public void endDocument () {
684
      System.out.println("Finished parsing " + documentListURL);
685
    }
686

    
687

    
688
    /** 
689
     * Handles an end-of-element event. If the end tag is </Document>, then
690
     * creates a new HarvestDocument object and pushes it to the document
691
     * list.
692
     * 
693
     * @param uri
694
     * @param localname
695
     * @param qname
696
     */
697
    public void endElement(String uri, 
698
                           String localname,
699
                           String qname) {
700
      
701
      HarvestDocument harvestDocument;
702
      
703
      if (qname.equals("identifier")) {
704
        identifier = Integer.parseInt(identifierString);
705
      }
706
      else if (qname.equals("revision")) {
707
        revision = Integer.parseInt(revisionString);
708
      }
709
      else if (qname.equals("document")) {
710
        harvestDocument = new HarvestDocument(
711
                                              harvester,
712
                                              HarvestSiteSchedule.this,
713
                                              scope,
714
                                              identifier,
715
                                              revision,
716
                                              documentType,
717
                                              documentURL
718
                                             );
719
        harvestDocumentList.add(harvestDocument);
720
      }
721

    
722
      currentQname = "";
723
    }
724

    
725

    
726
    /**
727
     * Method for handling errors during a parse
728
     *
729
     * @param exception         The parsing error
730
     * @exception SAXException  Description of Exception
731
     */
732
     public void error(SAXParseException e) throws SAXParseException {
733
        System.out.println("SAXParseException: " + e.getMessage());
734
        throw e;
735
    }
736

    
737

    
738
    /**
739
     * Run the validating parser
740
     *
741
     * @param xml             the xml stream to be validated
742
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
743
     * @exception IOException thrown when test files can't be opened
744
     * @exception ClassNotFoundException thrown when SAX Parser class not found
745
     * @exception SAXException
746
     * @exception SAXParserException
747
     */
748
    public void runParser(Reader xml, String schemaLocation)
749
           throws IOException, ClassNotFoundException,
750
                  SAXException, SAXParseException {
751

    
752
      // Get an instance of the parser
753
      XMLReader parser;
754

    
755
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
756
      // Set Handlers in the parser
757
      parser.setContentHandler((ContentHandler)this);
758
      parser.setErrorHandler((ErrorHandler)this);
759
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
760
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
761
      parser.setFeature("http://xml.org/sax/features/validation", true);
762
      parser.setProperty(
763
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
764
              schemaLocation);
765

    
766
      if (schemaValidate) {
767
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
768
                          true);
769
      }
770
    
771
      // Parse the document
772
      parser.parse(new InputSource(xml));
773
    }
774
    /**
775
     * Handles a start-of-document event.
776
     */
777
    public void startDocument () {
778
      System.out.println("Started parsing " + documentListURL);
779
    }
780

    
781

    
782
    /** 
783
     * Handles a start-of-element event.
784
     * 
785
     * @param uri
786
     * @param localname
787
     * @param qname
788
     * @param attributes
789
     */
790
    public void startElement(String uri, 
791
                             String localname,
792
                             String qname,
793
                             Attributes attributes) {
794
      
795
      currentQname = qname;
796

    
797
      if (qname.equals("scope")) {
798
        scope = "";
799
      }
800
      else if (qname.equals("identifier")) {
801
        identifierString = "";
802
      }
803
      else if (qname.equals("revision")) {
804
        revisionString = "";
805
      }
806
      else if (qname.equals("documentType")) {
807
        documentType = "";
808
      }
809
      else if (qname.equals("documentURL")) {
810
        documentURL = "";
811
      }
812
    }
813
  }
814
}
(5-5/11)