Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2004-11-09 15:34:25 -0800 (Tue, 09 Nov 2004) $'
8
 * '$Revision: 2330 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import com.oreilly.servlet.MailMessage;
28
import java.io.FileNotFoundException;
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.io.InputStreamReader;
32
import java.io.PrintStream;
33
import java.io.Reader;
34
import java.net.MalformedURLException;
35
import java.net.URL;
36
import java.sql.Connection;
37
import java.sql.SQLException;
38
import java.sql.Statement;
39
import java.text.DateFormat;
40
import java.text.ParseException;
41
import java.text.SimpleDateFormat;
42
import java.util.ArrayList;
43
import java.util.Date;
44
import javax.xml.parsers.ParserConfigurationException;
45
import org.xml.sax.Attributes;
46
import org.xml.sax.ContentHandler;
47
import org.xml.sax.ErrorHandler;
48
import org.xml.sax.InputSource;
49
import org.xml.sax.SAXException;
50
import org.xml.sax.SAXParseException;
51
import org.xml.sax.XMLReader;
52
import org.xml.sax.helpers.DefaultHandler;
53
import org.xml.sax.helpers.XMLReaderFactory;
54

    
55
import edu.ucsb.nceas.metacat.client.Metacat;
56
import edu.ucsb.nceas.metacat.client.MetacatException;
57
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
58

    
59

    
60
/**
61
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
62
 * table, determining when and how to harvest the documents for a given site.
63
 * 
64
 * @author  costa
65
 */
66
public class HarvestSiteSchedule {
67
    
68
  private String contactEmail;
69
  private String dateLastHarvest;
70
  private String dateNextHarvest;
71
  private long delta;
72
  private String documentListURL;
73
  private Harvester harvester;
74
  private ArrayList harvestDocumentList = new ArrayList();
75
  private String harvestSiteEndTime;
76
  private String harvestSiteStartTime;
77
  private String ldapDN;
78
  private String ldapPwd;
79
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
80
  private String schemaLocation = 
81
    "eml://ecoinformatics.org/harvestList ../../lib/harvester/harvestList.xsd";
82
  int siteScheduleID;
83
  private String unit;
84
  private int updateFrequency;
85
    
86
  /**
87
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
88
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
89
   * 
90
   * @param harvester       the parent Harvester object
91
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
92
   * @param documentListURL the value of the DOCUMENTLISTURL field
93
   * @param ldapDN          the value of the LDAPDN field
94
   * @param ldapPwd    the value of the LDAPPASSWORD field
95
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
96
   * @param dateLastHarvest the value of the DATELASTHARVEST field
97
   * @param updateFrequency the value of the UPDATEFREQUENCY field
98
   * @param unit            the value of the UNIT field
99
   * @param contactEmail    the value of the CONTACT_EMAIL field
100
   */
101
  public HarvestSiteSchedule(
102
                              Harvester harvester,
103
                              int    siteScheduleID,
104
                              String documentListURL,
105
                              String ldapDN,
106
                              String ldapPwd,
107
                              String dateNextHarvest,
108
                              String dateLastHarvest,
109
                              int    updateFrequency,
110
                              String unit,
111
                              String contactEmail
112
                            )
113
  {
114
    this.harvester = harvester;
115
    this.siteScheduleID = siteScheduleID;
116
    this.documentListURL = documentListURL;
117
    this.ldapDN = ldapDN;
118
    this.ldapPwd = ldapPwd;
119
    this.dateNextHarvest = dateNextHarvest;
120
    this.dateLastHarvest = dateLastHarvest;
121
    this.updateFrequency = updateFrequency;
122
    this.unit = unit;
123
    this.contactEmail = contactEmail;
124
    
125
    // Calculate the value of delta, the number of milliseconds between the
126
    // last harvest date and the next harvest date.
127
    delta = updateFrequency * millisecondsPerDay;
128
    
129
    if (unit.equals("weeks")) {
130
      delta *= 7;
131
    }
132
    else if (unit.equals("months")) {
133
      delta *= 30;
134
    }
135
  }
136
  
137
  
138
  /**
139
   * Updates the DATELASTHARVEST and DATENEXTHARVEST values of the 
140
   * HARVEST_SITE_SCHEDULE table after a harvest operation has completed.
141
   * Calculates the date of the next harvest based on today's date and the 
142
   * update frequency.
143
   */
144
  private void dbUpdateHarvestDates() {
145
		Connection conn;
146
    long currentTime;                    // Current time in milliseconds
147
    Date dateNextHarvest;                // Date of next harvest
148
    String lastHarvest;
149
    String nextHarvest;
150
    Date now = new Date();
151
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
152
		Statement stmt;
153
    long timeNextHarvest;
154
    
155
    conn = harvester.getConnection();
156
    now = new Date();
157
    currentTime = now.getTime();
158
    timeNextHarvest = currentTime + delta;
159
    dateNextHarvest = new Date(timeNextHarvest);
160
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
161
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
162
	
163
		try {
164
			stmt = conn.createStatement();							
165
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " +
166
                         nextHarvest +
167
                         " WHERE SITE_SCHEDULE_ID = " +
168
                         siteScheduleID);
169
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " +
170
                         lastHarvest +
171
                         " WHERE SITE_SCHEDULE_ID = " +
172
                         siteScheduleID);
173
			stmt.close();
174
		}
175
    catch(SQLException e) {
176
			System.out.println("SQLException: " + e.getMessage());
177
		}
178
  }
179
    
180

    
181
  /**
182
   * Boolean to determine whether this site is currently due for its next
183
   * harvest.
184
   * 
185
   * @retrun     true if due for harvest, otherwise false
186
   */
187
  public boolean dueForHarvest() {
188
    boolean dueForHarvest = false;
189
//    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
190
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
191
    Date now = new Date();
192
    Date dnh;                          // Date of next harvest
193
    long currentTime = now.getTime();  // Current time in milliseconds
194
    long timeNextHarvest = 0;
195
    
196
    try {
197
      dnh = dateFormat.parse(dateNextHarvest);
198
      timeNextHarvest = dnh.getTime();
199
      
200
      if (timeNextHarvest < currentTime) {
201
        dueForHarvest = true;
202
        System.out.println("Due for harvest: " + documentListURL);
203
      }
204
      else {
205
        System.out.println("Not due for harvest: " + documentListURL);
206
      }
207
    }
208
    catch (ParseException e) {
209
      System.out.println("Error parsing date: " + e.getMessage());
210
    }
211
    
212
    return dueForHarvest;
213
  }
214
  
215

    
216
  /**
217
   * Accessor method for the schemaLocation field.
218
   * 
219
   * @return schemaLocation  the schema location string
220
   */
221
  public String getSchemaLocation() {
222
    return schemaLocation;
223
  }
224

    
225

    
226
  /**
227
   * Harvests each document in the site document list.
228
   * 
229
   * @throws SAXException
230
   * @throws IOException
231
   * @throws ParserConfigurationException
232
   */
233
  public void harvestDocumentList() {
234
    HarvestDocument harvestDocument;
235
    boolean success;
236
    
237
    if (dueForHarvest()) {
238
      try {
239
        success = parseHarvestList();
240

    
241
        /* If the document list was validated, then proceed with harvesting
242
         * the documents
243
         */
244
        if (success) {
245
          metacatLogin();
246
        
247
          for (int i = 0; i < harvestDocumentList.size(); i++) {
248
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
249
          
250
            if (harvestDocument != null) {
251
              harvestDocument.harvestDocument();
252
            }
253
          }
254

    
255
          metacatLogout();      
256
          dbUpdateHarvestDates();  // Update the schedule
257
        }
258
      }
259
      catch (ParserConfigurationException e) {
260
        System.out.println("ParserConfigurationException: " + e.getMessage());
261
      }
262
      
263
      reportToSiteContact();
264
    }
265
  }
266

    
267

    
268
  /**
269
   * Login to Metacat using the ldapDN and ldapPwd
270
   */
271
  public void metacatLogin() {
272
    Metacat metacat = harvester.metacat;
273
    String response;
274

    
275
    if (harvester.connectToMetacat()) {
276
      try {
277
        System.out.println("Logging in to Metacat: " + ldapDN);
278
        response = metacat.login(ldapDN, ldapPwd);
279
        //System.out.println("Metacat login response: " + response);
280
      } 
281
      catch (MetacatInaccessibleException e) {
282
        System.out.println("Metacat login failed." + e.getMessage());
283
      } 
284
      catch (Exception e) {
285
        System.out.println("Metacat login failed." + e.getMessage());
286
      }
287
    }    
288
  }
289
  
290
  
291
  /**
292
   * Logout from Metacat
293
   */
294
  private void metacatLogout() {
295
    Metacat metacat = harvester.metacat;
296

    
297
    if (harvester.connectToMetacat()) {
298
      try {    
299
        // Log out from the Metacat session
300
        System.out.println("Logging out from Metacat");
301
        metacat.logout();
302
      }
303
      catch (MetacatInaccessibleException e) {
304
        System.out.println("Metacat inaccessible: " + e.getMessage());
305
      }
306
      catch (MetacatException e) {
307
        System.out.println("Metacat exception: " + e.getMessage());
308
      }
309
    }
310
  }
311
  
312

    
313
  /**
314
   * Parses the site harvest list XML file to find out which documents to 
315
   * harvest.
316
   * 
317
   * @return  true if successful, otherwise false
318
   */
319
  public boolean parseHarvestList() 
320
          throws ParserConfigurationException {
321
    DocumentListHandler documentListHandler = new DocumentListHandler();
322
    InputStream inputStream;
323
    InputStreamReader inputStreamReader;
324
    String schemaLocation = getSchemaLocation();
325
    boolean success = false;
326
    URL url;
327

    
328
    try {
329
      url = new URL(documentListURL);
330
      inputStream = url.openStream();
331
      harvester.addLogEntry(0,
332
                            "Retrieved: " + documentListURL,
333
                            "GetHarvestListSuccess",
334
                            siteScheduleID,
335
                            null,
336
                            "");
337
      inputStreamReader = new InputStreamReader(inputStream);
338
      documentListHandler.runParser(inputStreamReader, schemaLocation);
339
      harvester.addLogEntry(0,
340
                            "Validated: " + documentListURL,
341
                            "ValidateHarvestListSuccess",
342
                            siteScheduleID,
343
                            null,
344
                            "");
345
      success = true;
346
    }
347
    catch (MalformedURLException e){
348
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
349
                            "GetHarvestListError", siteScheduleID, null, "");
350
    }
351
    catch (FileNotFoundException e) {
352
      harvester.addLogEntry(1, "FileNotFoundException: " + e.getMessage(), 
353
                            "GetHarvestListError", siteScheduleID, null, "");
354
    }
355
    catch (SAXException e) {
356
      harvester.addLogEntry(1, "SAXException: " + e.getMessage(), 
357
                            "ValidateHarvestListError", siteScheduleID, null, "");
358
    }
359
    catch (ClassNotFoundException e) {
360
      harvester.addLogEntry(1, "ClassNotFoundException: " + e.getMessage(),
361
                            "ValidateHarvestListError", siteScheduleID, null, "");
362
    }
363
    catch (IOException e) {
364
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
365
                            "GetHarvestListError", siteScheduleID, null, "");
366
    }
367
    
368
    return success;
369
  }
370

    
371

    
372
  /**
373
   * Prints the data that is stored in this HarvestSiteSchedule object.
374
   * 
375
   * @param out   the PrintStream to write to
376
   */
377
  public void printOutput(PrintStream out) {
378
    out.println("* siteScheduleID:       " + siteScheduleID);
379
    out.println("* documentListURL:      " + documentListURL);
380
    out.println("* ldapDN:               " + ldapDN);
381
    out.println("* dateNextHarvest:      " + dateNextHarvest);
382
    out.println("* dateLastHarvest:      " + dateLastHarvest);
383
    out.println("* updateFrequency:      " + updateFrequency);
384
    out.println("* unit:                 " + unit);
385
    out.println("* contactEmail:         " + contactEmail);
386
  }
387
  
388
  /**
389
   * Reports a summary of the site harvest. Includes the following:
390
   *   A list of documents that were successfully inserted.
391
   *   A list of documents that were successfully updated.
392
   *   A list of documents that could not be accessed at the site.
393
   *   A list of documents that could not be uploaded to Metacat.
394
   *   A list of documents that were already found in Metacat.
395
   *   
396
   * @param out  the PrintStream to write to
397
   */
398
  void printSiteSummary(PrintStream out) {
399
    HarvestDocument harvestDocument;
400
    int nAccessError = 0;
401
    int nInserted = 0;
402
    int nMetacatHasIt = 0;
403
    int nUpdated = 0;
404
    int nUploadError = 0;
405
    
406
    for (int i = 0; i < harvestDocumentList.size(); i++) {
407
      harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
408
          
409
      if (harvestDocument != null) {
410
        if (harvestDocument.accessError)  { nAccessError++; }
411
        if (harvestDocument.inserted)     { nInserted++; }
412
        if (harvestDocument.metacatHasIt) { nMetacatHasIt++; }
413
        if (harvestDocument.updated)      { nUpdated++; }
414
        if (harvestDocument.uploadError)  { nUploadError++; }
415
      }
416
    }
417
    
418
    if (nInserted > 0) {
419
      printSiteSummaryHeader(out);
420
      out.println("* The following document(s) were successfully inserted:");
421
      for (int i = 0; i < harvestDocumentList.size(); i++) {
422
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
423
        if (harvestDocument != null) {
424
          if (harvestDocument.inserted)  {
425
            harvestDocument.prettyPrint(out);
426
          }
427
        }
428
      }
429
      printSiteSummaryTrailer(out);
430
    }
431

    
432
    if (nUpdated > 0) {
433
      printSiteSummaryHeader(out);
434
      out.println("* The following document(s) were successfully updated:");
435
      for (int i = 0; i < harvestDocumentList.size(); i++) {
436
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
437
        if (harvestDocument != null) {
438
          if (harvestDocument.updated)  {
439
            harvestDocument.prettyPrint(out);
440
          }
441
        }
442
      }
443
      printSiteSummaryTrailer(out);
444
    }
445

    
446
    if (nAccessError > 0) {
447
      printSiteSummaryHeader(out);
448
      out.println("* The following document(s) could not be accessed");
449
      out.println("* at the site. Please check the URL to ensure that it is");
450
      out.println("* accessible at the site.");
451
      for (int i = 0; i < harvestDocumentList.size(); i++) {
452
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
453
        if (harvestDocument != null) {
454
          if (harvestDocument.accessError)  {
455
            harvestDocument.prettyPrint(out);
456
          }
457
        }
458
      }
459
      printSiteSummaryTrailer(out);
460
    }
461

    
462
    if (nUploadError > 0) {
463
      printSiteSummaryHeader(out);
464
      out.println("* The following document(s) could not be uploaded to");
465
      out.println("* Metacat because an error of some kind occurred.");
466
      out.println("* See log entries below for additional details.) :");
467
      for (int i = 0; i < harvestDocumentList.size(); i++) {
468
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);          
469
        if (harvestDocument != null) {
470
          if (harvestDocument.uploadError)  {
471
            harvestDocument.prettyPrint(out);
472
          }
473
        }
474
      }
475
      printSiteSummaryTrailer(out);
476
    }
477

    
478
    if (nMetacatHasIt > 0) {
479
      printSiteSummaryHeader(out);
480
      out.println("* The following document(s) were already found in Metacat.");
481
      out.println("* See log entries below for additional details.) :");
482
      for (int i = 0; i < harvestDocumentList.size(); i++) {
483
        harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
484
        if (harvestDocument != null) {
485
          if (harvestDocument.metacatHasIt)  {
486
            harvestDocument.prettyPrint(out);
487
          }
488
        }
489
      }
490
      printSiteSummaryTrailer(out);
491
    }
492

    
493
  }
494
  
495

    
496
  /**
497
   * Prints the header lines of a site summary entry.
498
   * 
499
   * @param out    the PrintStream to write to
500
   */
501
  void printSiteSummaryHeader(PrintStream out) {
502
    final String filler = Harvester.filler;
503
    final String marker = Harvester.marker;
504

    
505
    out.println("");
506
    out.println(marker);
507
    out.println(filler);
508
  }
509
  
510

    
511
  /**
512
   * Prints the trailing lines of a site summary entry.
513
   * 
514
   * @param out    the PrintStream to write to
515
   */
516
  void printSiteSummaryTrailer(PrintStream out) {
517
    final String filler = Harvester.filler;
518
    final String marker = Harvester.marker;
519

    
520
    out.println(filler);
521
    out.println(marker);
522
  }
523
  
524

    
525
  /**
526
   * Sends a report to the Site Contact summarizing the results of the harvest 
527
   * at that site.
528
   */
529
  void reportToSiteContact() {
530
    PrintStream body;
531
    String from = harvester.harvesterAdministrator;
532
    String[] fromArray;
533
    String maxCodeLevel = "info";
534
    MailMessage msg;
535
    int nErrors = 0;
536
    String subject = "Report from Metacat Harvester: " + harvester.timestamp;
537
    String to = contactEmail;
538
    String[] toArray;
539
    
540
    if (!to.equals("")) {
541
      System.out.println("Sending report to siteScheduleID=" + siteScheduleID +
542
                         " at address: " + contactEmail);
543
      try {
544
        msg = new MailMessage(harvester.smtpServer);
545
        
546
        if (from.indexOf(',') > 0) {
547
          fromArray = from.split(",");
548
          
549
          for (int i = 0; i < fromArray.length; i++) {
550
            if (i == 0) {
551
              msg.from(fromArray[i]);
552
            }
553
            
554
            msg.cc(fromArray[i]);
555
            
556
          }
557
        }
558
        else if (from.indexOf(';') > 0) {
559
          fromArray = from.split(";");
560

    
561
          for (int i = 0; i < fromArray.length; i++) {
562
            if (i == 0) {
563
              msg.from(fromArray[i]);
564
            }
565
            
566
            msg.cc(fromArray[i]);
567
            
568
          }
569
        }
570
        else {
571
          msg.from(from);
572
          msg.cc(from);
573
        }
574
        
575
        if (to.indexOf(',') > 0) {
576
          toArray = to.split(",");
577
          
578
          for (int i = 0; i < toArray.length; i++) {
579
            msg.to(toArray[i]);
580
          }
581
        }
582
        else if (to.indexOf(';') > 0) {
583
          toArray = to.split(";");
584
          
585
          for (int i = 0; i < toArray.length; i++) {
586
            msg.to(toArray[i]);
587
          }
588
        }
589
        else {
590
          msg.to(to);
591
        }
592
        
593
        msg.setSubject(subject);
594
        body = msg.getPrintStream();
595
        harvester.printHarvestHeader(body, siteScheduleID);
596
        printSiteSummary(body);
597
        harvester.printHarvestLog(body, maxCodeLevel, siteScheduleID);
598
        msg.sendAndClose();        
599
      }
600
      catch (IOException e) {
601
        System.out.println("There was a problem sending email to " + to);
602
        System.out.println("IOException: " + e.getMessage());
603
      }
604
    }
605
  }
606
    
607

    
608
  /**
609
   * Accessor method for setting the value of the schemaLocation field.
610
   * 
611
   * @param schemaLocation  the new value of the schemaLocation field
612
   */
613
  public void setSchemaLocation(String schemaLocation) {
614
    this.schemaLocation = schemaLocation;
615
  }
616

    
617

    
618
  /**
619
   * This inner class extends DefaultHandler. It parses the document list,
620
   * creating a new HarvestDocument object every time it finds a </Document>
621
   * end tag.
622
   */
623
  class DocumentListHandler extends DefaultHandler implements ErrorHandler {
624
  
625
    public String scope;
626
    public int identifier;
627
    public String identifierString;
628
    public String documentType;
629
    public int revision;
630
    public String revisionString;
631
    public String documentURL;
632
    private String currentQname;
633
    public final static String DEFAULT_PARSER = 
634
           "org.apache.xerces.parsers.SAXParser";
635
    private boolean schemaValidate = true;
636
	
637

    
638
	  /**
639
     * This method is called for any plain text within an element.
640
     * It parses the value for any of the following elements:
641
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
642
     * 
643
     * @param ch          the character array holding the parsed text
644
     * @param start       the start index
645
     * @param length      the text length
646
     * 
647
     */
648
    public void characters (char ch[], int start, int length) {
649
      String s = new String(ch, start, length);
650
 
651
      if (length > 0) {           
652
        if (currentQname.equals("scope")) {
653
          scope += s;
654
        }
655
        else if (currentQname.equals("identifier")) {
656
          identifierString += s;
657
        }
658
        else if (currentQname.equals("revision")) {
659
          revisionString += s;
660
        }
661
        else if (currentQname.equals("documentType")) {
662
          documentType += s;
663
        }
664
        else if (currentQname.equals("documentURL")) {
665
          documentURL += s;
666
        }
667
      }
668
    }
669

    
670

    
671
    /** 
672
     * Handles an end-of-document event.
673
     */
674
    public void endDocument () {
675
      System.out.println("Finished parsing " + documentListURL);
676
    }
677

    
678

    
679
    /** 
680
     * Handles an end-of-element event. If the end tag is </Document>, then
681
     * creates a new HarvestDocument object and pushes it to the document
682
     * list.
683
     * 
684
     * @param uri
685
     * @param localname
686
     * @param qname
687
     */
688
    public void endElement(String uri, 
689
                           String localname,
690
                           String qname) {
691
      
692
      HarvestDocument harvestDocument;
693
      
694
      if (qname.equals("identifier")) {
695
        identifier = Integer.parseInt(identifierString);
696
      }
697
      else if (qname.equals("revision")) {
698
        revision = Integer.parseInt(revisionString);
699
      }
700
      else if (qname.equals("document")) {
701
        harvestDocument = new HarvestDocument(
702
                                              harvester,
703
                                              HarvestSiteSchedule.this,
704
                                              scope,
705
                                              identifier,
706
                                              revision,
707
                                              documentType,
708
                                              documentURL
709
                                             );
710
        harvestDocumentList.add(harvestDocument);
711
      }
712

    
713
      currentQname = "";
714
    }
715

    
716

    
717
    /**
718
     * Method for handling errors during a parse
719
     *
720
     * @param exception         The parsing error
721
     * @exception SAXException  Description of Exception
722
     */
723
     public void error(SAXParseException e) throws SAXParseException {
724
        System.out.println("SAXParseException: " + e.getMessage());
725
        throw e;
726
    }
727

    
728

    
729
    /**
730
     * Run the validating parser
731
     *
732
     * @param xml             the xml stream to be validated
733
     * @schemaLocation        relative path the to XML Schema file, e.g. "."
734
     * @exception IOException thrown when test files can't be opened
735
     * @exception ClassNotFoundException thrown when SAX Parser class not found
736
     * @exception SAXException
737
     * @exception SAXParserException
738
     */
739
    public void runParser(Reader xml, String schemaLocation)
740
           throws IOException, ClassNotFoundException,
741
                  SAXException, SAXParseException {
742

    
743
      // Get an instance of the parser
744
      XMLReader parser;
745

    
746
      parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER);
747
      // Set Handlers in the parser
748
      parser.setContentHandler((ContentHandler)this);
749
      parser.setErrorHandler((ErrorHandler)this);
750
      parser.setFeature("http://xml.org/sax/features/namespaces", true);
751
      parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
752
      parser.setFeature("http://xml.org/sax/features/validation", true);
753
      parser.setProperty(
754
              "http://apache.org/xml/properties/schema/external-schemaLocation", 
755
              schemaLocation);
756

    
757
      if (schemaValidate) {
758
        parser.setFeature("http://apache.org/xml/features/validation/schema", 
759
                          true);
760
      }
761
    
762
      // Parse the document
763
      parser.parse(new InputSource(xml));
764
    }
765
    /**
766
     * Handles a start-of-document event.
767
     */
768
    public void startDocument () {
769
      System.out.println("Started parsing " + documentListURL);
770
    }
771

    
772

    
773
    /** 
774
     * Handles a start-of-element event.
775
     * 
776
     * @param uri
777
     * @param localname
778
     * @param qname
779
     * @param attributes
780
     */
781
    public void startElement(String uri, 
782
                             String localname,
783
                             String qname,
784
                             Attributes attributes) {
785
      
786
      currentQname = qname;
787

    
788
      if (qname.equals("scope")) {
789
        scope = "";
790
      }
791
      else if (qname.equals("identifier")) {
792
        identifierString = "";
793
      }
794
      else if (qname.equals("revision")) {
795
        revisionString = "";
796
      }
797
      else if (qname.equals("documentType")) {
798
        documentType = "";
799
      }
800
      else if (qname.equals("documentURL")) {
801
        documentURL = "";
802
      }
803
    }
804
  }
805
}
(5-5/10)