Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2005-11-18 13:18:15 -0800 (Fri, 18 Nov 2005) $'
8
 * '$Revision: 2765 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.FileNotFoundException;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32
import java.io.PrintStream;
33
import java.io.Reader;
34
import java.net.MalformedURLException;
35
import java.net.URL;
36
import java.sql.Connection;
37
import java.sql.SQLException;
38
import java.sql.Statement;
39
import java.text.DateFormat;
40
import java.text.ParseException;
41
import java.text.SimpleDateFormat;
42
import java.util.ArrayList;
43
import java.util.Date;
44
import javax.xml.parsers.ParserConfigurationException;
45
import org.xml.sax.Attributes;
46
import org.xml.sax.ContentHandler;
47
import org.xml.sax.ErrorHandler;
48
import org.xml.sax.InputSource;
49
import org.xml.sax.SAXException;
50
import org.xml.sax.SAXParseException;
51
import org.xml.sax.XMLReader;
52
import org.xml.sax.helpers.DefaultHandler;
53
import org.xml.sax.helpers.XMLReaderFactory;
54

    
55
import edu.ucsb.nceas.metacat.client.Metacat;
56
import edu.ucsb.nceas.metacat.client.MetacatException;
57
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
58

    
59

    
60
/**
61
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
62
 * table, determining when and how to harvest the documents for a given site.
63
 * 
64
 * @author  costa
65
 */
66
public class HarvestSiteSchedule {
67
    
68
  private String contactEmail;
69
  private String dateLastHarvest;
70
  private String dateNextHarvest;
71
  private long delta;
72
  private String documentListURL;
73
  private Harvester harvester;
74
  private ArrayList harvestDocumentList = new ArrayList();
75
  private String harvestSiteEndTime;
76
  private String harvestSiteStartTime;
77
  private String ldapDN;
78
  private String ldapPwd;
79
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
80
  private String schemaLocation = 
81
    "eml://ecoinformatics.org/harvestList ../../lib/harvester/harvestList.xsd";
82
  int siteScheduleID;
83
  private String unit;
84
  private int updateFrequency;
85
    
86
  /**
87
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
88
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
89
   * 
90
   * @param harvester       the parent Harvester object
91
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
92
   * @param documentListURL the value of the DOCUMENTLISTURL field
93
   * @param ldapDN          the value of the LDAPDN field
94
   * @param ldapPwd    the value of the LDAPPASSWORD field
95
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
96
   * @param dateLastHarvest the value of the DATELASTHARVEST field
97
   * @param updateFrequency the value of the UPDATEFREQUENCY field
98
   * @param unit            the value of the UNIT field
99
   * @param contactEmail    the value of the CONTACT_EMAIL field
100
   */
101
  public HarvestSiteSchedule(
102
                              Harvester harvester,
103
                              int    siteScheduleID,
104
                              String documentListURL,
105
                              String ldapDN,
106
                              String ldapPwd,
107
                              String dateNextHarvest,
108
                              String dateLastHarvest,
109
                              int    updateFrequency,
110
                              String unit,
111
                              String contactEmail
112
                            )
113
  {
114
    this.harvester = harvester;
115
    this.siteScheduleID = siteScheduleID;
116
    this.documentListURL = documentListURL;
117
    this.ldapDN = ldapDN;
118
    this.ldapPwd = ldapPwd;
119
    this.dateNextHarvest = dateNextHarvest;
120
    this.dateLastHarvest = dateLastHarvest;
121
    this.updateFrequency = updateFrequency;
122
    this.unit = unit;
123
    this.contactEmail = contactEmail;
124
    
125
    // Calculate the value of delta, the number of milliseconds between the
126
    // last harvest date and the next harvest date.
127
    delta = updateFrequency * millisecondsPerDay;
128
    
129
    if (unit.equals("weeks")) {
130
      delta *= 7;
131
    }
132
    else if (unit.equals("months")) {
133
      delta *= 30;
134
    }
135
  }
136
  
137
  
138
  /**
139
   * Updates the DATELASTHARVEST and DATENEXTHARVEST values of the 
140
   * HARVEST_SITE_SCHEDULE table after a harvest operation has completed.
141
   * Calculates the date of the next harvest based on today's date and the 
142
   * update frequency.
143
   */
144
  private void dbUpdateHarvestDates() {
145
    Connection conn;
146
    long currentTime;                    // Current time in milliseconds
147
    Date dateNextHarvest;                // Date of next harvest
148
    String lastHarvest;
149
    String nextHarvest;
150
    Date now = new Date();
151
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
152
    Statement stmt;
153
    long timeNextHarvest;
154
    
155
    conn = harvester.getConnection();
156
    now = new Date();
157
    currentTime = now.getTime();
158
    timeNextHarvest = currentTime + delta;
159
    dateNextHarvest = new Date(timeNextHarvest);
160
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
161
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
162
	
163
	try {
164
      stmt = conn.createStatement();
165
      stmt.executeUpdate(
166
                         "UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
167
                         nextHarvest +
168
                         " WHERE SITE_SCHEDULE_ID = " +
169
                         siteScheduleID);
170
      stmt.executeUpdate(
171
                         "UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
172
                         lastHarvest +
173
                         " WHERE SITE_SCHEDULE_ID = " +
174
                         siteScheduleID);
175
      stmt.close();
176
    }
177
    catch(SQLException e) {
178
      System.out.println("SQLException: " + e.getMessage());
179
    }
180
  }
181
    
182

    
183
  /**
184
   * Boolean to determine whether this site is currently due for its next
185
   * harvest.
186
   * 
187
   * @retrun     true if due for harvest, otherwise false
188
   */
189
  public boolean dueForHarvest() {
190
    boolean dueForHarvest = false;
191
//    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
192
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
193
    Date now = new Date();
194
    Date dnh;                          // Date of next harvest
195
    long currentTime = now.getTime();  // Current time in milliseconds
196
    long timeNextHarvest = 0;
197
    
198
    try {
199
      dnh = dateFormat.parse(dateNextHarvest);
200
      timeNextHarvest = dnh.getTime();
201
      
202
      if (timeNextHarvest < currentTime) {
203
        dueForHarvest = true;
204
        System.out.println("Due for harvest: " + documentListURL);
205
      }
206
      else {
207
        System.out.println("Not due for harvest: " + documentListURL);
208
      }
209
    }
210
    catch (ParseException e) {
211
      System.out.println("Error parsing date: " + e.getMessage());
212
    }
213
    
214
    return dueForHarvest;
215
  }
216
  
217

    
218
  /**
219
   * Accessor method for the schemaLocation field.
220
   * 
221
   * @return schemaLocation  the schema location string
222
   */
223
  public String getSchemaLocation() {
224
    return schemaLocation;
225
  }
226

    
227

    
228
  /**
229
   * Harvests each document in the site document list.
230
   * 
231
   * @throws SAXException
232
   * @throws IOException
233
   * @throws ParserConfigurationException
234
   */
235
  public void harvestDocumentList() {
236
    HarvestDocument harvestDocument;
237
    boolean success;
238
    
239
    if (dueForHarvest()) {
240
      try {
241
        success = parseHarvestList();
242

    
243
        /* If the document list was validated, then proceed with harvesting
244
         * the documents
245
         */
246
        if (success) {
247
          metacatLogin();
248
        
249
          for (int i = 0; i < harvestDocumentList.size(); i++) {
250
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
251
          
252
            if (harvestDocument != null) {
253
              harvestDocument.harvestDocument();
254
            }
255
          }
256

    
257
          metacatLogout();      
258
          dbUpdateHarvestDates();  // Update the schedule
259
        }
260
      }
261
      catch (ParserConfigurationException e) {
262
        System.out.println("ParserConfigurationException: " + e.getMessage());
263
      }
264
      
265
      reportToSiteContact();
266
    }
267
  }
268

    
269

    
270
  /**
271
   * Login to Metacat using the ldapDN and ldapPwd
272
   */
273
  public void metacatLogin() {
274
    Metacat metacat = harvester.metacat;
275
    String response;
276

    
277
    if (harvester.connectToMetacat()) {
278
      try {
279
        System.out.println("Logging in to Metacat: " + ldapDN);
280
        response = metacat.login(ldapDN, ldapPwd);
281
        //System.out.println("Metacat login response: " + response);
282
      } 
283
      catch (MetacatInaccessibleException e) {
284
        System.out.println("Metacat login failed." + e.getMessage());
285
      } 
286
      catch (Exception e) {
287
        System.out.println("Metacat login failed." + e.getMessage());
288
      }
289
    }    
290
  }
291
  
292
  
293
  /**
294
   * Logout from Metacat
295
   */
296
  private void metacatLogout() {
297
    Metacat metacat = harvester.metacat;
298

    
299
    if (harvester.connectToMetacat()) {
300
      try {    
301
        // Log out from the Metacat session
302
        System.out.println("Logging out from Metacat");
303
        metacat.logout();
304
      }
305
      catch (MetacatInaccessibleException e) {
306
        System.out.println("Metacat inaccessible: " + e.getMessage());
307
      }
308
      catch (MetacatException e) {
309
        System.out.println("Metacat exception: " + e.getMessage());
310
      }
311
    }
312
  }
313
  
314

    
315
  /**
316
   * Parses the site harvest list XML file to find out which documents to 
317
   * harvest.
318
   * 
319
   * @return  true if successful, otherwise false
320
   */
321
  public boolean parseHarvestList() 
322
          throws ParserConfigurationException {
323
    DocumentListHandler documentListHandler = new DocumentListHandler();
324
    InputStream inputStream;
325
    InputStreamReader inputStreamReader;
326
    String schemaLocation = getSchemaLocation();
327
    boolean success = false;
328
    URL url;
329

    
330
    try {
331
      url = new URL(documentListURL);
332
      inputStream = url.openStream();
333
      harvester.addLogEntry(0,
334
                            "Retrieved: " + documentListURL,
335
                            "GetHarvestListSuccess",
336
                            siteScheduleID,
337
                            null,
338
                            "");
339
      inputStreamReader = new InputStreamReader(inputStream);
340
      documentListHandler.runParser(inputStreamReader, schemaLocation);
341
      harvester.addLogEntry(0,
342
                            "Validated: " + documentListURL,
343
                            "ValidateHarvestListSuccess",
344
                            siteScheduleID,
345
                            null,
346
                            "");
347
      success = true;
348
    }
349
    catch (MalformedURLException e){
350
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
351
                            "GetHarvestListError", siteScheduleID, null, "");
352
    }
353
    catch (FileNotFoundException e) {
354
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
355
                            "GetHarvestListError", siteScheduleID, null, "");
356
    }
357
    catch (SAXException e) {
358
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
359
                          "ValidateHarvestListError", siteScheduleID, null, "");
360
    }
361
    catch (ClassNotFoundException e) {
362
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
363
                          "ValidateHarvestListError", siteScheduleID, null, "");
364
    }
365
    catch (IOException e) {
366
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
367
                            "GetHarvestListError", siteScheduleID, null, "");
368
    }
369
    
370
    return success;
371
  }
372

    
373

    
374
  /**
375
   * Prints the data that is stored in this HarvestSiteSchedule object.
376
   * 
377
   * @param out   the PrintStream to write to
378
   */
379
  public void printOutput(PrintStream out) {
380
    out.println("* siteScheduleID:       " + siteScheduleID);
381
    out.println("* documentListURL:      " + documentListURL);
382
    out.println("* ldapDN:               " + ldapDN);
383
    out.println("* dateNextHarvest:      " + dateNextHarvest);
384
    out.println("* dateLastHarvest:      " + dateLastHarvest);
385
    out.println("* updateFrequency:      " + updateFrequency);
386
    out.println("* unit:                 " + unit);
387
    out.println("* contactEmail:         " + contactEmail);
388
  }
389
  
390
  /**
391
   * Reports a summary of the site harvest. Includes the following:
392
   *   A list of documents that were successfully inserted.
393
   *   A list of documents that were successfully updated.
394
   *   A list of documents that could not be accessed at the site.
395
   *   A list of documents that could not be uploaded to Metacat.
396
   *   A list of documents that were already found in Metacat.
397
   *   
398
   * @param out  the PrintStream to write to
399
   */
400
  void printSiteSummary(PrintStream out) {
401
    HarvestDocument harvestDocument;
402
    int nAccessError = 0;
403
    int nInserted = 0;
404
    int nMetacatHasIt = 0;
405
    int nUpdated = 0;
406
    int nUploadError = 0;
407
    
408
    for (int i = 0; i < harvestDocumentList.size(); i++) {
409
      harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
410
          
411
      if (harvestDocument != null) {
412
        if (harvestDocument.accessError)  { nAccessError++; }
413
        if (harvestDocument.inserted)     { nInserted++; }
414
        if (harvestDocument.metacatHasIt) { nMetacatHasIt++; }
415
        if (harvestDocument.updated)      { nUpdated++; }
416
        if (harvestDocument.uploadError)  { nUploadError++; }
417
      }
418
    }
419
    
420
    if (nInserted > 0) {
421
      printSiteSummaryHeader(out);
422
      out.println("* The following document(s) were successfully inserted:");
423
      for (int i = 0; i < harvestDocumentList.size(); i++) {
424
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
425
        if (harvestDocument != null) {
426
          if (harvestDocument.inserted)  {
427
            harvestDocument.prettyPrint(out);
428
          }
429
        }
430
      }
431
      printSiteSummaryTrailer(out);
432
    }
433

    
434
    if (nUpdated > 0) {
435
      printSiteSummaryHeader(out);
436
      out.println("* The following document(s) were successfully updated:");
437
      for (int i = 0; i < harvestDocumentList.size(); i++) {
438
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
439
        if (harvestDocument != null) {
440
          if (harvestDocument.updated)  {
441
            harvestDocument.prettyPrint(out);
442
          }
443
        }
444
      }
445
      printSiteSummaryTrailer(out);
446
    }
447

    
448
    if (nAccessError > 0) {
449
      printSiteSummaryHeader(out);
450
      out.println("* The following document(s) could not be accessed");
451
      out.println("* at the site. Please check the URL to ensure that it is");
452
      out.println("* accessible at the site.");
453
      for (int i = 0; i < harvestDocumentList.size(); i++) {
454
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
455
        if (harvestDocument != null) {
456
          if (harvestDocument.accessError)  {
457
            harvestDocument.prettyPrint(out);
458
          }
459
        }
460
      }
461
      printSiteSummaryTrailer(out);
462
    }
463

    
464
    if (nUploadError > 0) {
465
      printSiteSummaryHeader(out);
466
      out.println("* The following document(s) could not be uploaded to");
467
      out.println("* Metacat because an error of some kind occurred.");
468
      out.println("* See log entries below for additional details.) :");
469
      for (int i = 0; i < harvestDocumentList.size(); i++) {
470
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
471
        if (harvestDocument != null) {
472
          if (harvestDocument.uploadError)  {
473
            harvestDocument.prettyPrint(out);
474
          }
475
        }
476
      }
477
      printSiteSummaryTrailer(out);
478
    }
479

    
480
    if (nMetacatHasIt > 0) {
481
      printSiteSummaryHeader(out);
482
      out.println("* The following document(s) were already found in Metacat.");
483
      out.println("* See log entries below for additional details.) :");
484
      for (int i = 0; i < harvestDocumentList.size(); i++) {
485
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
486
        if (harvestDocument != null) {
487
          if (harvestDocument.metacatHasIt)  {
488
            harvestDocument.prettyPrint(out);
489
          }
490
        }
491
      }
492
      printSiteSummaryTrailer(out);
493
    }
494

    
495
  }
496
  
497

    
498
  /**
499
   * Prints the header lines of a site summary entry.
500
   * 
501
   * @param out    the PrintStream to write to
502
   */
503
  void printSiteSummaryHeader(PrintStream out) {
504
    final String filler = Harvester.filler;
505
    final String marker = Harvester.marker;
506

    
507
    out.println("");
508
    out.println(marker);
509
    out.println(filler);
510
  }
511
  
512

    
513
  /**
514
   * Prints the trailing lines of a site summary entry.
515
   * 
516
   * @param out    the PrintStream to write to
517
   */
518
  void printSiteSummaryTrailer(PrintStream out) {
519
    final String filler = Harvester.filler;
520
    final String marker = Harvester.marker;
521

    
522
    out.println(filler);
523
    out.println(marker);
524
  }
525
  
526

    
527
  /**
528
   * Sends a report to the Site Contact summarizing the results of the harvest 
529
   * at that site.
530
   */
531
  void reportToSiteContact() {
532
    PrintStream body;
533
    String from = harvester.harvesterAdministrator;
534
    String[] fromArray;
535
    String maxCodeLevel = "notice";
536
    MailMessage msg;
537
    int nErrors = 0;
538
    String subject = "Report from Metacat Harvester: " + harvester.timestamp;
539
    String to = contactEmail;
540
    String[] toArray;
541
    
542
    if (!to.equals("")) {
543
      System.out.println("Sending report to siteScheduleID=" + siteScheduleID +
544
                         " at address: " + contactEmail);
545
      try {
546
        msg = new MailMessage(harvester.smtpServer);
547
        
548
        if (from.indexOf(',') > 0) {
549
          fromArray = from.split(",");
550
          
551
          for (int i = 0; i < fromArray.length; i++) {
552
            if (i == 0) {
553
              msg.from(fromArray[i]);
554
            }
555
            
556
            msg.cc(fromArray[i]);
557
            
558
          }
559
        }
560
        else if (from.indexOf(';') > 0) {
561
          fromArray = from.split(";");
562

    
563
          for (int i = 0; i < fromArray.length; i++) {
564
            if (i == 0) {
565
              msg.from(fromArray[i]);
566
            }
567
            
568
            msg.cc(fromArray[i]);
569
            
570
          }
571
        }
572
        else {
573
          msg.from(from);
574
          msg.cc(from);
575
        }
576
        
577
        if (to.indexOf(',') > 0) {
578
          toArray = to.split(",");
579
          
580
          for (int i = 0; i < toArray.length; i++) {
581
            msg.to(toArray[i]);
582
          }
583
        }
584
        else if (to.indexOf(';') > 0) {
585
          toArray = to.split(";");
586
          
587
          for (int i = 0; i < toArray.length; i++) {
588
            msg.to(toArray[i]);
589
          }
590
        }
591
        else {
592
          msg.to(to);
593
        }
594
        
595
        msg.setSubject(subject);
596
        body = msg.getPrintStream();
597
        harvester.printHarvestHeader(body, siteScheduleID);
598
        printSiteSummary(body);
599
        harvester.printHarvestLog(body, maxCodeLevel, siteScheduleID);
600
        msg.sendAndClose();        
601
      }
602
      catch (IOException e) {
603
        System.out.println("There was a problem sending email to " + to);
604
        System.out.println("IOException: " + e.getMessage());
605
      }
606
    }
607
  }
608
    
609

    
610
  /**
611
   * Accessor method for setting the value of the schemaLocation field.
612
   * 
613
   * @param schemaLocation  the new value of the schemaLocation field
614
   */
615
  public void setSchemaLocation(String schemaLocation) {
616
    this.schemaLocation = schemaLocation;
617
  }
618

    
619

    
620
  /**
621
   * This inner class extends DefaultHandler. It parses the document list,
622
   * creating a new HarvestDocument object every time it finds a </Document>
623
   * end tag.
624
   */
625
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
626
  
627
    public String scope;
628
    public int identifier;
629
    public String identifierString;
630
    public String documentType;
631
    public int revision;
632
    public String revisionString;
633
    public String documentURL;
634
    private String currentQname;
635
    public final static String DEFAULT_PARSER = 
636
           "org.apache.xerces.parsers.SAXParser";
637
    private boolean schemaValidate = true;
638
	
639

    
640
	  /**
641
     * This method is called for any plain text within an element.
642
     * It parses the value for any of the following elements:
643
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
644
     * 
645
     * @param ch          the character array holding the parsed text
646
     * @param start       the start index
647
     * @param length      the text length
648
     * 
649
     */
650
    public void characters (char ch[], int start, int length) {
651
      String s = new String(ch, start, length);
652
 
653
      if (length > 0) {           
654
        if (currentQname.equals("scope")) {
655
          scope += s;
656
        }
657
        else if (currentQname.equals("identifier")) {
658
          identifierString += s;
659
        }
660
        else if (currentQname.equals("revision")) {
661
          revisionString += s;
662
        }
663
        else if (currentQname.equals("documentType")) {
664
          documentType += s;
665
        }
666
        else if (currentQname.equals("documentURL")) {
667
          documentURL += s;
668
        }
669
      }
670
    }
671

    
672

    
673
    /** 
674
     * Handles an end-of-document event.
675
     */
676
    public void endDocument () {
677
      System.out.println("Finished parsing " + documentListURL);
678
    }
679

    
680

    
681
    /** 
682
     * Handles an end-of-element event. If the end tag is </Document>, then
683
     * creates a new HarvestDocument object and pushes it to the document
684
     * list.
685
     * 
686
     * @param uri
687
     * @param localname
688
     * @param qname
689
     */
690
    public void endElement(String uri, 
691
                           String localname,
692
                           String qname) {
693
      
694
      HarvestDocument harvestDocument;
695
      
696
      if (qname.equals("identifier")) {
697
        identifier = Integer.parseInt(identifierString);
698
      }
699
      else if (qname.equals("revision")) {
700
        revision = Integer.parseInt(revisionString);
701
      }
702
      else if (qname.equals("document")) {
703
        harvestDocument = new HarvestDocument(
704
                                              harvester,
705
                                              HarvestSiteSchedule.this,
706
                                              scope,
707
                                              identifier,
708
                                              revision,
709
                                              documentType,
710
                                              documentURL
711
                                             );
712
        harvestDocumentList.add(harvestDocument);
713
      }
714

    
715
      currentQname = "";
716
    }
717

    
718

    
719
    /**
720
     * Method for handling errors during a parse
721
     *
722
     * @param exception         The parsing error
723
     * @exception SAXException  Description of Exception
724
     */
725
     public void error(SAXParseException e) throws SAXParseException {
726
        System.out.println("SAXParseException: " + e.getMessage());
727
        throw e;
728
    }
729

    
730

    
731
    /**
732
     * Run the validating parser
733
     *
734
     * @param xml             the xml stream to be validated
735
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
736
     * @exception IOException thrown when test files can't be opened
737
     * @exception ClassNotFoundException thrown when SAX Parser class not found
738
     * @exception SAXException
739
     * @exception SAXParserException
740
     */
741
    public void runParser(Reader xml, String schemaLocation)
742
           throws IOException, ClassNotFoundException,
743
                  SAXException, SAXParseException {
744

    
745
      // Get an instance of the parser
746
      XMLReader parser;
747

    
748
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
749
      // Set Handlers in the parser
750
      parser.setContentHandler((ContentHandler)this);
751
      parser.setErrorHandler((ErrorHandler)this);
752
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
753
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
754
      parser.setFeature("http://xml.org/sax/features/validation", true);
755
      parser.setProperty(
756
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
757
              schemaLocation);
758

    
759
      if (schemaValidate) {
760
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
761
                          true);
762
      }
763
    
764
      // Parse the document
765
      parser.parse(new InputSource(xml));
766
    }
767
    /**
768
     * Handles a start-of-document event.
769
     */
770
    public void startDocument () {
771
      System.out.println("Started parsing " + documentListURL);
772
    }
773

    
774

    
775
    /** 
776
     * Handles a start-of-element event.
777
     * 
778
     * @param uri
779
     * @param localname
780
     * @param qname
781
     * @param attributes
782
     */
783
    public void startElement(String uri, 
784
                             String localname,
785
                             String qname,
786
                             Attributes attributes) {
787
      
788
      currentQname = qname;
789

    
790
      if (qname.equals("scope")) {
791
        scope = "";
792
      }
793
      else if (qname.equals("identifier")) {
794
        identifierString = "";
795
      }
796
      else if (qname.equals("revision")) {
797
        revisionString = "";
798
      }
799
      else if (qname.equals("documentType")) {
800
        documentType = "";
801
      }
802
      else if (qname.equals("documentURL")) {
803
        documentURL = "";
804
      }
805
    }
806
  }
807
}
(5-5/11)