Project

General

Profile

1
/*
2
 * HarvestSiteSchedule.java
3
 *
4
 * Created on January 14, 2004, 4:47 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.*;
10
import java.sql.Connection;
11
import java.sql.SQLException;
12
import java.sql.Statement;
13
import java.text.*;
14
import java.util.*;
15
import javax.xml.parsers.*;
16
import org.xml.sax.*;
17
import org.xml.sax.helpers.*;
18

    
19
import edu.ucsb.nceas.metacat.client.*;
20

    
21

    
22
/**
23
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
24
 * table, determining when and how to harvest the documents for a given site.
25
 * 
26
 * @author  costa
27
 */
28
class HarvestSiteSchedule {
29
    
30
  private String contactEmail;
31
  private String dateLastHarvest;
32
  private String dateNextHarvest;
33
  private long delta;
34
  private String documentListURL;
35
  private Harvester harvester;
36
  private int harvestDocumentIndex = 0;
37
  private HarvestDocument[] harvestDocumentList = new HarvestDocument[30];
38
  private String harvestSiteEndTime;
39
  private String harvestSiteStartTime;
40
  private String ldapDN;
41
  private String ldapPassword;
42
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
43
  private int siteScheduleID;
44
  private String unit;
45
  private int updateFrequency;
46
    
47
  /**
48
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
49
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
50
   * 
51
   * @param harvester       the parent Harvester object
52
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
53
   * @param documentListURL the value of the DOCUMENTLISTURL field
54
   * @param ldapDN          the value of the LDAPDN field
55
   * @param ldapPassword    the value of the LDAPPASSWORD field
56
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
57
   * @param dateLastHarvest the value of the DATELASTHARVEST field
58
   * @param updateFrequency the value of the UPDATEFREQUENCY field
59
   * @param unit            the value of the UNIT field
60
   * @param contactEmail    the value of the CONTACT_EMAIL field
61
   */
62
  public HarvestSiteSchedule(
63
                              Harvester harvester,
64
                              int    siteScheduleID,
65
                              String documentListURL,
66
                              String ldapDN,
67
                              String ldapPassword,
68
                              String dateNextHarvest,
69
                              String dateLastHarvest,
70
                              int    updateFrequency,
71
                              String unit,
72
                              String contactEmail
73
                            )
74
  {
75
    this.harvester = harvester;
76
    this.siteScheduleID = siteScheduleID;
77
    this.documentListURL = documentListURL;
78
    this.ldapDN = ldapDN;
79
    this.ldapPassword = ldapPassword;
80
    this.dateNextHarvest = dateNextHarvest;
81
    this.dateLastHarvest = dateLastHarvest;
82
    this.updateFrequency = updateFrequency;
83
    this.unit = unit;
84
    this.contactEmail = contactEmail;
85
    
86
    // Calculate the value of delta, the number of milliseconds between the
87
    // last harvest date and the next harvest date.
88
    delta = updateFrequency * millisecondsPerDay;
89
    
90
    if (unit.equals("weeks")) {
91
      delta *= 7;
92
    }
93
    else if (unit.equals("months")) {
94
      delta *= 30;
95
    }
96
  }
97
  
98
  
99
  /**
100
   * Updates the DATELASTHARVEST value of the HARVEST_SITE_SCHEDULE table
101
   * after a harvest operation has completed. Calculates the date of the next 
102
   * harvest based on today's date and the update frequency.
103
   */
104
  private void dbUpdateHarvestSiteSchedule() {
105
		Connection con;
106
    long currentTime;                    // Current time in milliseconds
107
    Date dateNextHarvest;                // Date of next harvest
108
    String lastHarvest;
109
    String nextHarvest;
110
    Date now = new Date();
111
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
112
		Statement stmt;
113
    long timeNextHarvest;
114
    
115
    con = harvester.conn;
116
    now = new Date();
117
    currentTime = now.getTime();         // Current time in milliseconds
118
    timeNextHarvest = currentTime + delta;
119
    dateNextHarvest = new Date(timeNextHarvest);
120
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
121
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
122
	
123
    System.out.println("Date of next harvest: " + nextHarvest);
124
    System.out.println("Date of last harvest: " + lastHarvest);
125

    
126
		try {
127
			stmt = con.createStatement();							
128
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " + nextHarvest + " WHERE SITE_SCHEDULE_ID = " + siteScheduleID);
129
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " + lastHarvest + " WHERE SITE_SCHEDULE_ID = " + siteScheduleID);
130
			stmt.close();
131
		}
132
    catch(SQLException e) {
133
			System.err.println("SQLException: " + e.getMessage());
134
		}
135
  }
136
    
137

    
138
  /**
139
   * Boolean to determine whether this site is currently due for its next
140
   * harvest.
141
   * 
142
   * @retrun     true if due for harvest, otherwise false
143
   */
144
  private boolean dueForHarvest() {
145
    boolean dueForHarvest = false;
146
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
147
    Date now = new Date();
148
    Date dlh;                          // Date of last harvest
149
    Date dnh;                          // Date of next harvest
150
    long currentTime = now.getTime();  // Current time in milliseconds
151
    long timeNextHarvest = 0;
152
    
153
    try {
154
      dlh = dateFormat.parse(dateLastHarvest);
155
      timeNextHarvest = dlh.getTime() + delta;
156
      dnh = new Date(timeNextHarvest);
157
      
158
      if (timeNextHarvest < currentTime) {
159
        dueForHarvest = true;
160
      }
161
      else {
162
        System.out.println("Next harvest date: " + dnh.toString());
163
      }
164
    }
165
    catch (ParseException e) {
166
      System.err.println("Error parsing date: " + e.getMessage());
167
    }
168
    
169
    //return dueForHarvest;
170
    return true;
171
  }
172

    
173

    
174
  /**
175
   * Harvests each document in the site document list.
176
   * 
177
   * @throws SAXException
178
   * @throws IOException
179
   * @throws ParserConfigurationException
180
   */
181
  public void harvestDocumentList() {
182
    HarvestDocument harvestDocument;
183
    
184
    if (dueForHarvest()) {
185
      try {
186
        parseDocumentList();
187
        metacatLogin();
188
        
189
        for (int i = 0; i < harvestDocumentList.length; i++) {
190
          harvestDocument = harvestDocumentList[i];
191
          
192
          if (harvestDocument != null) {
193
            harvestDocument.printOutput();
194
            harvestDocument.harvestDocument();
195
          }
196
        }
197

    
198
        metacatLogout();      
199
        dbUpdateHarvestSiteSchedule();
200
      }
201
      catch (ParserConfigurationException e) {
202
        System.err.println("ParserConfigurationException: " + e.getMessage());
203
      }
204
      catch (SAXException e) {
205
        System.err.println("SAXException: " + e.getMessage());
206
      }
207
      catch (IOException e) {
208
        System.err.println("IOException: " + e.getMessage());
209
      }
210
      
211
      reportToSite();
212
    }
213
  }
214

    
215

    
216
  /**
217
   * Login to Metacat using the ldapDN and ldapPassword
218
   */
219
  private void metacatLogin() {
220
    Metacat metacat = harvester.metacat;
221

    
222
    if (harvester.connectToMetacat()) {
223

    
224
      try {
225
        System.out.println("Logging in to Metacat: " + ldapDN);
226
        metacat.login(ldapDN, ldapPassword);
227
        //System.out.println("Metacat login response: " + response);
228
        //sessionId = metacat.getSessionId();
229
        //System.out.println("Session ID: " + sessionId);
230
      } 
231
      catch (MetacatInaccessibleException e) {
232
        System.out.println("Metacat login failed." + e.getMessage());
233
      } 
234
      catch (Exception e) {
235
        System.out.println("Metacat login failed." + e.getMessage());
236
      }
237
    }
238
    else {
239
      System.out.println("Not logging in to Metacat");
240
    }
241
    
242
  }
243
  
244
  
245
  /**
246
   * Logout from Metacat
247
   */
248
  private void metacatLogout() {
249
    Metacat metacat = harvester.metacat;
250

    
251
    if (harvester.connectToMetacat()) {
252
      try {    
253
        // Log out from the Metacat session
254
        System.out.println("Logging out from Metacat");
255
        metacat.logout();
256
      }
257
      catch (MetacatInaccessibleException e) {
258
        System.out.println("Metacat inaccessible: " + e.getMessage());
259
      }
260
      catch (MetacatException e) {
261
        System.out.println("Metacat exception: " + e.getMessage());
262
      }
263
    }
264
    else {
265
      System.out.println("Not logging out from Metacat");
266
    }
267
  }
268
  
269

    
270
  /**
271
   * Parse the site document list to find out which documents to harvest.
272
   * 
273
   * @throws SAXException
274
   * @throws IOException
275
   * @throws ParserConfigurationException
276
   */
277
  private void parseDocumentList() 
278
    throws SAXException, IOException, ParserConfigurationException {
279
    
280
    // Create a parser factory and use it to create a parser
281
    SAXParserFactory parserFactory = SAXParserFactory.newInstance();
282
    SAXParser parser = parserFactory.newSAXParser();
283
	
284
    // Instantiate a DefaultHandler subclass to do your counting for you
285
    DocumentListHandler handler = new DocumentListHandler();
286
	
287
    // Start the parser. It reads the document list and calls methods of the handler.
288
    parser.parse(documentListURL, handler);
289
  }
290

    
291

    
292
  /**
293
   * Prints the data that is stored in this HarvestSiteSchedule object.
294
   */
295
  void printOutput() {
296
    System.out.println("");
297
    System.out.println("siteScheduleID: " + siteScheduleID);
298
    System.out.println("documentListURL: " + documentListURL);
299
    System.out.println("ldapDN: " + ldapDN);
300
    System.out.println("ldapPassword: " + ldapPassword);
301
    System.out.println("dateNextHarvest: " + dateNextHarvest);
302
    System.out.println("dateLastHarvest: " + dateLastHarvest);
303
    System.out.println("updateFrequency: " + updateFrequency);
304
    System.out.println("unit: " + unit);
305
    System.out.println("contactEmail: " + contactEmail);
306
  }
307
  
308

    
309
  /**
310
   * Pushes a HarvestDocument object onto the harvestDocumentList.
311
   * 
312
   * @param harvestDocument    a new HarvestDocument object to add to the list
313
   */
314
  void pushHarvestDocument(HarvestDocument harvestDocument) {
315
    harvestDocumentList[harvestDocumentIndex] = harvestDocument;
316
    harvestDocumentIndex++;
317
  }
318
  
319

    
320
  /**
321
   * Sends a report to the site summarizing the results of the harvest
322
   * operation.
323
   */
324
  void reportToSite() {
325
    System.out.println("Sending report to site.\n");
326
  }
327
    
328

    
329
  /**
330
   * This inner class extends DefaultHandler. It parses the document list,
331
   * creating a new HarvestDocument object every time it finds a </Document>
332
   * end tag.
333
   */
334
  class DocumentListHandler extends DefaultHandler {
335
  
336
    public String scope;
337
    public int identifier;
338
    public int revision;
339
    public String documentType;
340
    public String documentURL;
341
    private String currentQname;
342
	
343

    
344
    /**
345
     * Handles a start-of-document event.
346
     */
347
    public void startDocument () {
348
      System.out.println("Started parsing " + documentListURL);
349
    }
350

    
351

    
352
    /** 
353
     * Handles an end-of-document event.
354
     */
355
    public void endDocument () {
356
      System.out.println("Finished parsing " + documentListURL);
357
    }
358

    
359

    
360
    /** 
361
     * Handles a start-of-element event.
362
     * 
363
     * @param uri
364
     * @param localname
365
     * @param qname
366
     * @param attributes
367
     */
368
    public void startElement(String uri, 
369
                             String localname,
370
                             String qname,
371
                             Attributes attributes) {
372
      
373
      currentQname = qname;
374
    }
375

    
376

    
377
    /** 
378
     * Handles an end-of-element event. If the end tag is </Document>, then
379
     * creates a new HarvestDocument object and pushes it to the document
380
     * list.
381
     * 
382
     * @param uri
383
     * @param localname
384
     * @param qname
385
     */
386
    public void endElement(String uri, 
387
                           String localname,
388
                           String qname) {
389
      
390
      HarvestDocument harvestDocument;
391
      
392
      if (qname.equals("document")) {
393
        harvestDocument = new HarvestDocument(
394
                                              harvester,
395
                                              HarvestSiteSchedule.this,
396
                                              scope,
397
                                              identifier,
398
                                              revision,
399
                                              documentType,
400
                                              documentURL
401
                                             );
402
        pushHarvestDocument(harvestDocument);
403
      }
404
    }
405

    
406

    
407
	  /**
408
     * This method is called for any plain text within an element.
409
     * It parses the value for any of the following elements:
410
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
411
     * 
412
     * @param ch          the character array holding the parsed text
413
     * @param start       the start index
414
     * @param length      the text length
415
     * 
416
     */
417
    public void characters (char ch[], int start, int length) {
418
      String s = new String(ch, start, length);
419
 
420
      if (length > 0) {           
421
        if (currentQname.equals("scope")) {
422
          scope = s;
423
        }
424
        else if (currentQname.equals("identifier")) {
425
          identifier = Integer.parseInt(s);
426
        }
427
        else if (currentQname.equals("revision")) {
428
          revision = Integer.parseInt(s);
429
        }
430
        else if (currentQname.equals("documentType")) {
431
          documentType = s;
432
        }
433
        else if (currentQname.equals("documentURL")) {
434
          documentURL = s;
435
        }
436
        
437
        currentQname = "";
438
      }
439
    }
440

    
441
  }
442
}
(4-4/7)