Project

General

Profile

« Previous | Next » 

Revision 2022

Harvester source files

View differences:

src/edu/ucsb/nceas/metacat/harvesterClient/HarvestDetailLog.java
1
/*
2
 * HarvestDetailLog.java
3
 *
4
 * Created on January 14, 2004, 4:59 PM
5
 */
6

  
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

  
9
/**
10
 * HarvestDetailLog manages data and operations corresponding to the
11
 * HARVEST_DETAIL_LOG table. It records errors encountered while attempting
12
 * to harvest a particular document.
13
 * 
14
 * @author  costa
15
 */
16
public class HarvestDetailLog {
17
    
18
  private int detailLogID;
19
  private String errorMessage;
20
  private Object harvestDocument;
21
  private int harvestLogID;
22
    
23

  
24
  /** 
25
   * Creates a new instance of HarvestDetailLog.
26
   */
27
  public HarvestDetailLog() {
28
  }
29

  
30

  
31
  /**
32
   * Inserts a new entry into the HARVEST_DETAIL_LOG table, based on the 
33
   * contents of this HarvestDetailLog object. Not yet implemented.
34
   */
35
  public void dnInsertHarvestDetailLogEntry() {
36
  }
37
    
38
}
0 39

  
src/edu/ucsb/nceas/metacat/harvesterClient/Harvester.java
1
/*
2
 * Harvester.java
3
 *
4
 * Created on January 14, 2004, 4:44 PM
5
 */
6

  
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

  
9
import java.io.*;
10
import java.sql.*;
11
import java.util.*;
12
import javax.xml.parsers.*;
13
import org.xml.sax.*;
14
import org.xml.sax.helpers.*;
15

  
16
import edu.ucsb.nceas.metacat.client.*;
17

  
18
/**
19
 * Harvester is the main class for the Harvester application. The main
20
 * method creates a single Harvester object which drives the application.
21
 * 
22
 * @author    costa
23
 * 
24
 */
25
public class Harvester {
26

  
27
  /*
28
   * Class fields
29
   */
30
   
31

  
32
  /* 
33
   * Class methods
34
   */
35
   
36
  /**
37
   * Constructor. Creates a new instance of Harvester.
38
   */
39
  public Harvester() {
40
  }
41
    
42
  /**
43
   * Determines whether Harvester is running on a Win32 platform. Used
44
   * during development to aid in resolving platform dependencies.
45
   * 
46
   * @return    true if this in Win32, false otherwise
47
   */
48
  public static boolean isWin32 () {
49
    boolean isWin32;
50
    String osName = System.getProperty("os.name");
51
    
52
    isWin32 = (osName.startsWith("Windows"));
53
    return isWin32;
54
  }
55
  
56

  
57
  /**
58
    * Harvester main method.
59
    * 
60
    * @param args        the command line arguments
61
    * @throws SAXException
62
    * @throws IOException
63
    * @throws ParserConfigurationException
64
    */
65
  public static void main(String[] args) {
66
    Harvester harvester = new Harvester();
67
    
68
    harvester.startup();                  // Start up Harvester
69
    harvester.readHarvestSiteSchedule();  // Read the database table
70
    harvester.harvest();                  // Harvest the documents
71
    harvester.shutdown();                 // Shut down Harvester
72
  }
73

  
74

  
75
  /*
76
   * Object fields
77
   */
78

  
79
  Connection conn;
80
  private Object harvestEndTime;
81
  private Object harvestLogList;
82
  private HarvestSiteSchedule[] harvestSiteScheduleList = 
83
          new HarvestSiteSchedule[20];
84
  private int harvestSiteScheduleIndex = 0;
85
  private Object harvestStartTime;
86
  Metacat metacat;
87
  Properties properties;
88
    
89

  
90
  /*
91
   * Object methods
92
   */
93

  
94
  /**
95
   * Determines whether Harvester should attempt to connect to Metacat.
96
   * Used during development and testing.
97
   * 
98
   * @return     true if Harvester should connect, otherwise false
99
   */
100
  boolean connectToMetacat () {
101
    boolean connectToMetacat = Harvester.isWin32() ? false : true;
102
    
103
    return connectToMetacat;
104
  }
105
    
106

  
107
  /**
108
   * For every Harvest site schedule in the database, harvest the
109
   * documents for that site if they are due to be harvested.
110
   * 
111
   * @throws SAXException
112
   * @throws IOException
113
   * @throws ParserConfigurationException
114
   */
115
  private void harvest() {
116
    HarvestSiteSchedule harvestSiteSchedule;
117

  
118
    for (int i = 0; i < harvestSiteScheduleList.length; i++) {
119
      harvestSiteSchedule = harvestSiteScheduleList[i];
120
      if (harvestSiteSchedule != null) {
121
        harvestSiteSchedule.printOutput();
122
        harvestSiteSchedule.harvestDocumentList();
123
      }
124
    }
125
        
126
    reportToAdministrator();
127
  }
128
  
129
  
130
  /**
131
   * Loads Harvester properties
132
   */
133
  private void loadProperties() {
134
    String homedir = System.getProperty("user.home");
135
    File configfile = new File(homedir, "harvester.properties");
136
    
137
    properties = new Properties();
138

  
139
    try {
140
      properties.load(new FileInputStream(configfile));
141
      properties.list(System.out);
142
    }
143
    catch (IOException e) {
144
      System.err.println("IOException: " + e.getMessage());
145
      System.exit(1);
146
    }
147
  }
148
    
149

  
150
  /**
151
   * Reads the HARVEST_SITE_SCHEDULE table in the database, creating
152
   * a HarvestSiteSchedule object for each row in the table.
153
   */
154
  private void readHarvestSiteSchedule() {
155
    HarvestSiteSchedule harvestSiteSchedule;
156
    ResultSet rs;
157
    SQLWarning warn;
158
    Statement stmt;
159

  
160
    String contactEmail;
161
    String dateLastHarvest;
162
    String dateNextHarvest;
163
    String documentListURL;
164
    String ldapDN;
165
    String ldapPassword;
166
    int siteScheduleID;
167
    String unit;
168
    int updateFrequency;
169
        
170
    try {
171
      // Read the HARVEST_SITE_SCHEDULE table
172
      stmt = conn.createStatement();
173
      rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE");
174
      warn = rs.getWarnings();
175

  
176
      if (warn != null) {
177
        System.out.println("\n---Warning---\n");
178

  
179
        while (warn != null) {
180
          System.out.println("Message: " + warn.getMessage());
181
          System.out.println("SQLState: " + warn.getSQLState());
182
          System.out.print("Vendor error code: ");
183
          System.out.println(warn.getErrorCode());
184
          System.out.println("");
185
          warn = warn.getNextWarning();
186
        }
187
      }
188
     
189
      while (rs.next()) {
190
        siteScheduleID = rs.getInt("SITE_SCHEDULE_ID");
191
        documentListURL = rs.getString("DOCUMENTLISTURL");
192
        ldapDN = rs.getString("LDAPDN");
193
//        ldapPassword = rs.getString("LDAPPASSWORD");
194
        ldapPassword = "ntre4dc";
195
        dateNextHarvest = rs.getString("DATENEXTHARVEST");
196
        dateLastHarvest = rs.getString("DATELASTHARVEST");
197
        updateFrequency = rs.getInt("UPDATEFREQUENCY");
198
        unit = rs.getString("UNIT");
199
        contactEmail = rs.getString("CONTACT_EMAIL");
200
        
201
        warn = rs.getWarnings();
202

  
203
        if (warn != null) {
204
          System.out.println("\n---Warning---\n");
205
      
206
          while (warn != null) {
207
            System.out.println("Message: " + warn.getMessage());
208
            System.out.println("SQLState: " + warn.getSQLState());
209
            System.out.print("Vendor error code: ");
210
            System.out.println(warn.getErrorCode());
211
            System.out.println("");
212
            warn = warn.getNextWarning();
213
          }
214
        }
215
      
216
        harvestSiteSchedule = new HarvestSiteSchedule(
217
                                                      this,
218
                                                      siteScheduleID,
219
                                                      documentListURL,
220
                                                      ldapDN,
221
                                                      ldapPassword,
222
                                                      dateNextHarvest,
223
                                                      dateLastHarvest,
224
                                                      updateFrequency,
225
                                                      unit,
226
                                                      contactEmail
227
                                                     );
228

  
229
        harvestSiteScheduleList[harvestSiteScheduleIndex] = harvestSiteSchedule;
230
        harvestSiteScheduleIndex++;
231
      }
232
    } 
233
    catch (SQLException e) {
234
      System.out.println("Database access failed " + e);
235
      System.exit(1);
236
    }
237
    
238
  }
239
    
240

  
241
  /**
242
   * Sends a report to the Harvester administrator.
243
   */
244
  void reportToAdministrator() {
245
    System.out.println("\nSending report to administrator.");
246
  }
247
    
248

  
249
  /**
250
   * Shuts down Harvester. Performs cleanup operations such as logging out
251
   * of Metacat and disconnecting from the database.
252
   */
253
  private void shutdown() {
254
    // Log shutdown operation
255
    System.out.println("Shutting Down Harvester");
256

  
257
    try {
258
      // Close the database connection
259
      System.out.println("Closing the database connection");
260
      conn.close();
261
    }
262
    catch (SQLException e) {
263
      System.out.println("Database access failed " + e);
264
    }
265
  }
266
    
267

  
268
  /**
269
   * Initializes Harvester at startup. Connects to the database and to Metacat.
270
   */
271
  private void startup() {
272
    String dbDriver;
273
    String metacatURL;
274
    String osName = Harvester.isWin32() ? "Windows" : "Unix";
275
    String password;
276
//    String response;
277
    String sessionId;
278
		String url;
279
    String user;
280
    String userName = System.getProperty("user.name");
281
    SQLWarning warn;
282
    
283
    // Log startup operation
284
    System.out.println("*****************************************************");
285
    System.out.println("Starting Up Harvester");
286
    System.out.println("OS is " + osName);
287
    
288
    loadProperties();
289
    
290
    dbDriver = properties.getProperty("dbDriver");
291
    url = properties.getProperty("url");
292
    user = properties.getProperty("user");
293
    password = properties.getProperty("password");
294
    metacatURL = properties.getProperty("metacatURL");
295

  
296
    // Load the jdbc driver
297
    try {
298
      Class.forName(dbDriver);
299
    }
300
    catch (ClassNotFoundException e) {
301
      System.out.println("Can't load driver " + e);
302
      System.exit(1);
303
    } 
304

  
305
    // Make the database connection
306
    try {
307
      System.out.println("Getting connection to Harvester tables");
308
      conn = DriverManager.getConnection(url, user, password);
309

  
310
      // If a SQLWarning object is available, print its warning(s).
311
      // There may be multiple warnings chained.
312
      warn = conn.getWarnings();
313
      
314
      if (warn != null) {
315
        while (warn != null) {
316
          System.out.println("SQLState: " + warn.getSQLState());
317
          System.out.println("Message:  " + warn.getMessage());
318
          System.out.println("Vendor: " + warn.getErrorCode());
319
          System.out.println("");
320
          warn = warn.getNextWarning();
321
        }
322
      }
323
    }
324
    catch (SQLException e) {
325
      System.out.println("Database access failed " + e);
326
      System.exit(1);
327
    }
328
      
329
    if (connectToMetacat()) {      
330
      try {
331
        System.out.println("Connecting to Metacat: " + metacatURL);
332
        metacat = MetacatFactory.createMetacatConnection(metacatURL);
333
      } 
334
      catch (MetacatInaccessibleException e) {
335
        System.out.println("Metacat connection failed." + e.getMessage());
336
      } 
337
      catch (Exception e) {
338
        System.out.println("Metacat connection failed." + e.getMessage());
339
      }
340
    }
341
    else {
342
      System.out.println("Not connecting to Metacat");
343
    }
344
  }
345

  
346

  
347
  /**
348
   * Writes one or more log entries to the HARVEST_LOG table.
349
   */
350
  private void writeHarvestLog() {
351
  }
352
    
353
}
0 354

  
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestDocument.java
1
/*
2
 * HarvestDocument.java
3
 *
4
 * Created on January 14, 2004, 4:37 PM
5
 */
6

  
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

  
9
import java.io.InputStream;
10
import java.io.InputStreamReader;
11
import java.io.IOException;
12
import java.io.StringReader;
13
import java.net.MalformedURLException;
14
import java.net.URL;
15

  
16
import edu.ucsb.nceas.metacat.client.*;
17
import edu.ucsb.nceas.utilities.IOUtil;
18

  
19

  
20
/**
21
 * HarvestDocument manages operations and data for a single document to be
22
 * harvested.
23
 * 
24
 * @author  costa
25
 */
26
public class HarvestDocument {
27
  
28
  private String documentName;  
29
  private String documentType;
30
  private String documentURL;
31
  private Harvester harvester;
32
  private HarvestSiteSchedule harvestSiteSchedule;
33
  private int identifier;
34
  private int revision;
35
  private String scope;
36
    
37

  
38
  /**
39
   * Creates a new instance of HarvestDocument. Initialized with the data
40
   * that was read from a single <document> element in site document list.
41
   * 
42
   * @param harvester            the parent Harvester object
43
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
44
   * @param scope                the value of the <scope> element
45
   * @param identifier           the value of the <identifier> element
46
   * @param revision             the value of the <revision> element
47
   * @param documentType         the value of the <documentType> element
48
   * @param documentURL          the value of the <documentURL> element
49
   */
50
  public HarvestDocument(
51
                          Harvester harvester,
52
                          HarvestSiteSchedule harvestSiteSchedule,
53
                          String scope,
54
                          int identifier,
55
                          int revision,
56
                          String documentType,
57
                          String documentURL
58
                        ) {
59
    this.harvester = harvester;
60
    this.harvestSiteSchedule = harvestSiteSchedule;
61
    this.documentType = documentType;
62
    this.documentURL = documentURL;
63
    this.scope = scope;
64
    this.identifier = identifier;
65
    this.revision = revision;
66
    
67
    this.documentName = scope + "." + identifier;
68
  }
69

  
70

  
71
  /**
72
   * Retrieve the document from the site using its <documentURL> value.
73
   * 
74
   * @return   A StringReader containing the document string.
75
   */
76
  private StringReader getSiteDocument() {
77
    String documentString;
78
    InputStream inputStream;
79
    InputStreamReader inputStreamReader;
80
    StringReader stringReader = null;
81
    URL url;
82
    
83
    try {
84
      url = new URL(documentURL);
85
      inputStream = url.openStream();
86
      inputStreamReader = new InputStreamReader(inputStream);
87
      documentString = IOUtil.getAsString(inputStreamReader, true);
88
      stringReader = new StringReader(documentString);
89
      System.out.println("  Successfully read document: " + documentURL);
90
    }
91
    catch (MalformedURLException e) {
92
      System.err.println("MalformedURLException: " + e.getMessage());
93
    }
94
    catch (IOException e) {
95
      System.err.println("IOException: " + e.getMessage());
96
    }
97
    
98
    return stringReader;
99
  }
100
    
101

  
102
  /**
103
   * Harvest the document from the site. Unless Metacat already has the
104
   * document, retrieve the document from the site and put (insert or
105
   * update) it to Metacat. If Metacat already has the document, determine
106
   * the highest revision stored in Metacat so that this can be reported
107
   * back to the user.
108
   */
109
  public void harvestDocument() {
110
    int highestRevision;
111
    String metacatReturnString;
112
    StringReader stringReader;
113

  
114
    /* If metacat already has this document, determine the highest revision in
115
     * metacat and report it to the user; else, insert or delete the document 
116
     * into metacat.
117
     */
118
    if (metacatHasDocument()) {
119
      System.out.println("  metacat has document");
120
      highestRevision = metacatHighestRevision();
121
      System.out.println("  metacatHighestRevision: " + highestRevision);
122
    }
123
    else {
124
      stringReader = getSiteDocument();
125
      if (stringReader != null) {
126
        if (parseDocument()) {
127
          metacatReturnString = putMetacatDocument(stringReader);
128
          System.out.println("  " + metacatReturnString);
129
        }
130
        else {
131
          System.out.println("Error parsing document.");
132
        }
133
      }
134
      else {
135
        System.out.print("  Error reading document at URL: ");
136
        System.out.println(documentURL);
137
      }
138
    }
139
  }
140
    
141

  
142
  /**
143
   * Boolean to determine whether Metacat already has this document.
144
   * 
145
   * @return  true if Metacat has the document, otherwise false
146
   */
147
  private boolean metacatHasDocument() {
148
    boolean     hasDocument = false;
149

  
150
    return hasDocument;
151
  }
152
    
153

  
154
  /**
155
   * Determines the highest revision that Metacat has for this document.
156
   * 
157
   * @return  int representing the highest revision for this document in Metacat
158
   */
159
  private int metacatHighestRevision() {
160
    int         highestRevision = 0;
161
        
162
    return highestRevision;
163
  }
164
  
165
  
166
  /**
167
   * Parse the document to determine whether it is valid EML prior to inserting
168
   * or updating it to Metacat. This is QA/QC measure. Currently unimplemented.
169
   * 
170
   * @return  true if the document is valid EML, otherwise false
171
   */
172
  private boolean parseDocument () {
173
    boolean success = true;
174
    
175
    return success;
176
  }
177
  
178
  
179
  /**
180
   * Print the data fields and values in this HarvestDocument object.
181
   */
182
  void printOutput() 
183
  {
184
    System.out.println("");
185
    System.out.println("  scope: " + scope);
186
    System.out.println("  identifier: " + identifier);
187
    System.out.println("  revision: " + revision);
188
    System.out.println("  documentType: " + documentType);
189
    System.out.println("  documentURL: " + documentURL);
190
    System.out.println("  documentName: " + documentName);
191
  }
192
 
193
 
194
  /**
195
   * Insert or update this document to Metacat. If revision equals 1, do an
196
   * insert; otherwise, do an update.
197
   * 
198
   * @return  the Metacat return string from the insert or update operation
199
   */
200
  private String putMetacatDocument(StringReader stringReader) {
201
    String docid = scope + "." + identifier + "." + revision;
202
    Metacat metacat = harvester.metacat;
203
    String metacatReturnString = "";
204

  
205
    if (harvester.connectToMetacat()) {
206
      try {
207
        if (revision == 1) {
208
          System.out.println("  Inserting document to metacat: " + docid);
209
          metacatReturnString = metacat.insert(docid, stringReader, null);
210
        }
211
        else {
212
          System.out.println("  Updating document to metacat: " + docid);
213
          metacatReturnString = metacat.update(docid, stringReader, null);
214
        }
215
      }
216
      catch (MetacatInaccessibleException e) {
217
        System.err.println("MetacatInaccessibleException: " + e.getMessage());
218
      }
219
      catch (InsufficientKarmaException e) {
220
        System.err.println("InsufficientKarmaException: " + e.getMessage());
221
      }
222
      catch (MetacatException e) {
223
        System.err.println("MetacatException: " + e.getMessage());
224
      }
225
      catch (IOException e) {
226
        System.err.println("IOException: " + e.getMessage());
227
      }
228
    }
229
    else {
230
      metacatReturnString = "Not putting document to metacat";
231
    }
232
        
233
    return metacatReturnString;
234
  }
235
    
236
}
0 237

  
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestLog.java
1
/*
2
 * HarvestLog.java
3
 *
4
 * Created on January 14, 2004, 4:55 PM
5
 */
6

  
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

  
9
/**
10
 * Manages log entries to be inserted to the HARVEST_LOG table.
11
 *
12
 * @author  costa
13
 */
14
public class HarvestLog {
15
    
16
  private Object harvestDate;
17
  private String harvestOperationCode;
18
  private String message;
19
  private int siteScheduleID;
20
  private String status;
21
    
22
  /** 
23
    * Creates a new instance of HarvestLog.
24
    */
25
  public HarvestLog() {
26
  }
27
    
28

  
29
  /**
30
   * Retrieves the value of the EXPLANATION field of the HARVEST_OPERATION
31
   * table based on the value of the HARVEST_OPERATION_CODE field. 
32
   * Not yet implemented.
33
   * 
34
   * @param harvestOperationCode  string value of the harvest operation code
35
   * @return           the explanation for this harvest operation, a String
36
   */
37
  public String dbGetExplanation(String harvestOperationCode) {
38
    String explanation = "";
39
        
40
    return explanation;
41
  }
42
  
43

  
44
  /**
45
   * Retrieves the value of the HARVEST_OPERATION_CODE_LEVEL field of the
46
   * HARVEST_OPERATION table based on the value of the HARVEST_OPERATION_CODE 
47
   * field. Not yet implemented.
48
   * 
49
   * @param harvestOperationCode  string value of the harvest operation code
50
   * @return          the code level value, an int
51
   */
52
  public int dbGetHarvestOperationCodeLevel(String harvestOperationCode) {
53
    int codeLevel = 0;
54
        
55
    return codeLevel;
56
  }
57
    
58

  
59
  /**
60
   * Inserts a new entry into the HARVEST_LOG table, based on the contents of
61
   * this HarvestLog object. Not yet implemented.
62
   */
63
  public void dbInsertHarvestLogEntry() {
64
  }
65

  
66
}
0 67

  
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestSiteSchedule.java
1
/*
2
 * HarvestSiteSchedule.java
3
 *
4
 * Created on January 14, 2004, 4:47 PM
5
 */
6

  
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

  
9
import java.io.*;
10
import java.sql.Connection;
11
import java.sql.SQLException;
12
import java.sql.Statement;
13
import java.text.*;
14
import java.util.*;
15
import javax.xml.parsers.*;
16
import org.xml.sax.*;
17
import org.xml.sax.helpers.*;
18

  
19
import edu.ucsb.nceas.metacat.client.*;
20

  
21

  
22
/**
23
 * HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE
24
 * table, determining when and how to harvest the documents for a given site.
25
 * 
26
 * @author  costa
27
 */
28
class HarvestSiteSchedule {
29
    
30
  private String contactEmail;
31
  private String dateLastHarvest;
32
  private String dateNextHarvest;
33
  private long delta;
34
  private String documentListURL;
35
  private Harvester harvester;
36
  private int harvestDocumentIndex = 0;
37
  private HarvestDocument[] harvestDocumentList = new HarvestDocument[30];
38
  private String harvestSiteEndTime;
39
  private String harvestSiteStartTime;
40
  private String ldapDN;
41
  private String ldapPassword;
42
  final private long millisecondsPerDay = (1000 * 60 * 60 * 24);
43
  private int siteScheduleID;
44
  private String unit;
45
  private int updateFrequency;
46
    
47
  /**
48
   * Creates a new instance of HarvestSiteSchedule. Initialized with the data
49
   * that was read from a single row in the HARVEST_SITE_SCHEDULE table.
50
   * 
51
   * @param harvester       the parent Harvester object
52
   * @param siteScheduleID  the value of the SITE_SCHEDULE_ID field
53
   * @param documentListURL the value of the DOCUMENTLISTURL field
54
   * @param ldapDN          the value of the LDAPDN field
55
   * @param ldapPassword    the value of the LDAPPASSWORD field
56
   * @param dateNextHarvest the value of the DATENEXTHARVEST field
57
   * @param dateLastHarvest the value of the DATELASTHARVEST field
58
   * @param updateFrequency the value of the UPDATEFREQUENCY field
59
   * @param unit            the value of the UNIT field
60
   * @param contactEmail    the value of the CONTACT_EMAIL field
61
   */
62
  public HarvestSiteSchedule(
63
                              Harvester harvester,
64
                              int    siteScheduleID,
65
                              String documentListURL,
66
                              String ldapDN,
67
                              String ldapPassword,
68
                              String dateNextHarvest,
69
                              String dateLastHarvest,
70
                              int    updateFrequency,
71
                              String unit,
72
                              String contactEmail
73
                            )
74
  {
75
    this.harvester = harvester;
76
    this.siteScheduleID = siteScheduleID;
77
    this.documentListURL = documentListURL;
78
    this.ldapDN = ldapDN;
79
    this.ldapPassword = ldapPassword;
80
    this.dateNextHarvest = dateNextHarvest;
81
    this.dateLastHarvest = dateLastHarvest;
82
    this.updateFrequency = updateFrequency;
83
    this.unit = unit;
84
    this.contactEmail = contactEmail;
85
    
86
    // Calculate the value of delta, the number of milliseconds between the
87
    // last harvest date and the next harvest date.
88
    delta = updateFrequency * millisecondsPerDay;
89
    
90
    if (unit.equals("weeks")) {
91
      delta *= 7;
92
    }
93
    else if (unit.equals("months")) {
94
      delta *= 30;
95
    }
96
  }
97
  
98
  
99
  /**
100
   * Updates the DATELASTHARVEST value of the HARVEST_SITE_SCHEDULE table
101
   * after a harvest operation has completed. Calculates the date of the next 
102
   * harvest based on today's date and the update frequency.
103
   */
104
  private void dbUpdateHarvestSiteSchedule() {
105
		Connection con;
106
    long currentTime;                    // Current time in milliseconds
107
    Date dateNextHarvest;                // Date of next harvest
108
    String lastHarvest;
109
    String nextHarvest;
110
    Date now = new Date();
111
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
112
		Statement stmt;
113
    long timeNextHarvest;
114
    
115
    con = harvester.conn;
116
    now = new Date();
117
    currentTime = now.getTime();         // Current time in milliseconds
118
    timeNextHarvest = currentTime + delta;
119
    dateNextHarvest = new Date(timeNextHarvest);
120
    nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'";
121
    lastHarvest = "'" + simpleDateFormat.format(now) + "'";
122
	
123
    System.out.println("Date of next harvest: " + nextHarvest);
124
    System.out.println("Date of last harvest: " + lastHarvest);
125

  
126
		try {
127
			stmt = con.createStatement();							
128
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " + nextHarvest + " WHERE SITE_SCHEDULE_ID = " + siteScheduleID);
129
			stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " + lastHarvest + " WHERE SITE_SCHEDULE_ID = " + siteScheduleID);
130
			stmt.close();
131
		}
132
    catch(SQLException e) {
133
			System.err.println("SQLException: " + e.getMessage());
134
		}
135
  }
136
    
137

  
138
  /**
139
   * Boolean to determine whether this site is currently due for its next
140
   * harvest.
141
   * 
142
   * @retrun     true if due for harvest, otherwise false
143
   */
144
  private boolean dueForHarvest() {
145
    boolean dueForHarvest = false;
146
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
147
    Date now = new Date();
148
    Date dlh;                          // Date of last harvest
149
    Date dnh;                          // Date of next harvest
150
    long currentTime = now.getTime();  // Current time in milliseconds
151
    long timeNextHarvest = 0;
152
    
153
    try {
154
      dlh = dateFormat.parse(dateLastHarvest);
155
      timeNextHarvest = dlh.getTime() + delta;
156
      dnh = new Date(timeNextHarvest);
157
      
158
      if (timeNextHarvest < currentTime) {
159
        dueForHarvest = true;
160
      }
161
      else {
162
        System.out.println("Next harvest date: " + dnh.toString());
163
      }
164
    }
165
    catch (ParseException e) {
166
      System.err.println("Error parsing date: " + e.getMessage());
167
    }
168
    
169
    //return dueForHarvest;
170
    return true;
171
  }
172

  
173

  
174
  /**
175
   * Harvests each document in the site document list.
176
   * 
177
   * @throws SAXException
178
   * @throws IOException
179
   * @throws ParserConfigurationException
180
   */
181
  public void harvestDocumentList() {
182
    HarvestDocument harvestDocument;
183
    
184
    if (dueForHarvest()) {
185
      try {
186
        parseDocumentList();
187
        metacatLogin();
188
        
189
        for (int i = 0; i < harvestDocumentList.length; i++) {
190
          harvestDocument = harvestDocumentList[i];
191
          
192
          if (harvestDocument != null) {
193
            harvestDocument.printOutput();
194
            harvestDocument.harvestDocument();
195
          }
196
        }
197

  
198
        metacatLogout();      
199
        dbUpdateHarvestSiteSchedule();
200
      }
201
      catch (ParserConfigurationException e) {
202
        System.err.println("ParserConfigurationException: " + e.getMessage());
203
      }
204
      catch (SAXException e) {
205
        System.err.println("SAXException: " + e.getMessage());
206
      }
207
      catch (IOException e) {
208
        System.err.println("IOException: " + e.getMessage());
209
      }
210
      
211
      reportToSite();
212
    }
213
  }
214

  
215

  
216
  /**
217
   * Login to Metacat using the ldapDN and ldapPassword
218
   */
219
  private void metacatLogin() {
220
    Metacat metacat = harvester.metacat;
221

  
222
    if (harvester.connectToMetacat()) {
223

  
224
      try {
225
        System.out.println("Logging in to Metacat: " + ldapDN);
226
        metacat.login(ldapDN, ldapPassword);
227
        //System.out.println("Metacat login response: " + response);
228
        //sessionId = metacat.getSessionId();
229
        //System.out.println("Session ID: " + sessionId);
230
      } 
231
      catch (MetacatInaccessibleException e) {
232
        System.out.println("Metacat login failed." + e.getMessage());
233
      } 
234
      catch (Exception e) {
235
        System.out.println("Metacat login failed." + e.getMessage());
236
      }
237
    }
238
    else {
239
      System.out.println("Not logging in to Metacat");
240
    }
241
    
242
  }
243
  
244
  
245
  /**
246
   * Logout from Metacat
247
   */
248
  private void metacatLogout() {
249
    Metacat metacat = harvester.metacat;
250

  
251
    if (harvester.connectToMetacat()) {
252
      try {    
253
        // Log out from the Metacat session
254
        System.out.println("Logging out from Metacat");
255
        metacat.logout();
256
      }
257
      catch (MetacatInaccessibleException e) {
258
        System.out.println("Metacat inaccessible: " + e.getMessage());
259
      }
260
      catch (MetacatException e) {
261
        System.out.println("Metacat exception: " + e.getMessage());
262
      }
263
    }
264
    else {
265
      System.out.println("Not logging out from Metacat");
266
    }
267
  }
268
  
269

  
270
  /**
271
   * Parse the site document list to find out which documents to harvest.
272
   * 
273
   * @throws SAXException
274
   * @throws IOException
275
   * @throws ParserConfigurationException
276
   */
277
  private void parseDocumentList() 
278
    throws SAXException, IOException, ParserConfigurationException {
279
    
280
    // Create a parser factory and use it to create a parser
281
    SAXParserFactory parserFactory = SAXParserFactory.newInstance();
282
    SAXParser parser = parserFactory.newSAXParser();
283
	
284
    // Instantiate a DefaultHandler subclass to do your counting for you
285
    DocumentListHandler handler = new DocumentListHandler();
286
	
287
    // Start the parser. It reads the document list and calls methods of the handler.
288
    parser.parse(documentListURL, handler);
289
  }
290

  
291

  
292
  /**
293
   * Prints the data that is stored in this HarvestSiteSchedule object.
294
   */
295
  void printOutput() {
296
    System.out.println("");
297
    System.out.println("siteScheduleID: " + siteScheduleID);
298
    System.out.println("documentListURL: " + documentListURL);
299
    System.out.println("ldapDN: " + ldapDN);
300
    System.out.println("ldapPassword: " + ldapPassword);
301
    System.out.println("dateNextHarvest: " + dateNextHarvest);
302
    System.out.println("dateLastHarvest: " + dateLastHarvest);
303
    System.out.println("updateFrequency: " + updateFrequency);
304
    System.out.println("unit: " + unit);
305
    System.out.println("contactEmail: " + contactEmail);
306
  }
307
  
308

  
309
  /**
310
   * Pushes a HarvestDocument object onto the harvestDocumentList.
311
   * 
312
   * @param harvestDocument    a new HarvestDocument object to add to the list
313
   */
314
  void pushHarvestDocument(HarvestDocument harvestDocument) {
315
    harvestDocumentList[harvestDocumentIndex] = harvestDocument;
316
    harvestDocumentIndex++;
317
  }
318
  
319

  
320
  /**
321
   * Sends a report to the site summarizing the results of the harvest
322
   * operation.
323
   */
324
  void reportToSite() {
325
    System.out.println("Sending report to site.\n");
326
  }
327
    
328

  
329
  /**
330
   * This inner class extends DefaultHandler. It parses the document list,
331
   * creating a new HarvestDocument object every time it finds a </Document>
332
   * end tag.
333
   */
334
  class DocumentListHandler extends DefaultHandler {
335
  
336
    public String scope;
337
    public int identifier;
338
    public int revision;
339
    public String documentType;
340
    public String documentURL;
341
    private String currentQname;
342
	
343

  
344
    /**
345
     * Handles a start-of-document event.
346
     */
347
    public void startDocument () {
348
      System.out.println("Started parsing " + documentListURL);
349
    }
350

  
351

  
352
    /** 
353
     * Handles an end-of-document event.
354
     */
355
    public void endDocument () {
356
      System.out.println("Finished parsing " + documentListURL);
357
    }
358

  
359

  
360
    /** 
361
     * Handles a start-of-element event.
362
     * 
363
     * @param uri
364
     * @param localname
365
     * @param qname
366
     * @param attributes
367
     */
368
    public void startElement(String uri, 
369
                             String localname,
370
                             String qname,
371
                             Attributes attributes) {
372
      
373
      currentQname = qname;
374
    }
375

  
376

  
377
    /** 
378
     * Handles an end-of-element event. If the end tag is </Document>, then
379
     * creates a new HarvestDocument object and pushes it to the document
380
     * list.
381
     * 
382
     * @param uri
383
     * @param localname
384
     * @param qname
385
     */
386
    public void endElement(String uri, 
387
                           String localname,
388
                           String qname) {
389
      
390
      HarvestDocument harvestDocument;
391
      
392
      if (qname.equals("document")) {
393
        harvestDocument = new HarvestDocument(
394
                                              harvester,
395
                                              HarvestSiteSchedule.this,
396
                                              scope,
397
                                              identifier,
398
                                              revision,
399
                                              documentType,
400
                                              documentURL
401
                                             );
402
        pushHarvestDocument(harvestDocument);
403
      }
404
    }
405

  
406

  
407
	  /**
408
     * This method is called for any plain text within an element.
409
     * It parses the value for any of the following elements:
410
     * <scope>, <identifier>, <revision>, <documentType>, <documentURL>
411
     * 
412
     * @param ch          the character array holding the parsed text
413
     * @param start       the start index
414
     * @param length      the text length
415
     * 
416
     */
417
    public void characters (char ch[], int start, int length) {
418
      String s = new String(ch, start, length);
419
 
420
      if (length > 0) {           
421
        if (currentQname.equals("scope")) {
422
          scope = s;
423
        }
424
        else if (currentQname.equals("identifier")) {
425
          identifier = Integer.parseInt(s);
426
        }
427
        else if (currentQname.equals("revision")) {
428
          revision = Integer.parseInt(s);
429
        }
430
        else if (currentQname.equals("documentType")) {
431
          documentType = s;
432
        }
433
        else if (currentQname.equals("documentURL")) {
434
          documentURL = s;
435
        }
436
        
437
        currentQname = "";
438
      }
439
    }
440

  
441
  }
442
}
0 443

  

Also available in: Unified diff