Revision 2022
Added by Duane Costa almost 21 years ago
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestDetailLog.java | ||
---|---|---|
1 |
/* |
|
2 |
* HarvestDetailLog.java |
|
3 |
* |
|
4 |
* Created on January 14, 2004, 4:59 PM |
|
5 |
*/ |
|
6 |
|
|
7 |
package edu.ucsb.nceas.metacat.harvesterClient; |
|
8 |
|
|
9 |
/** |
|
10 |
* HarvestDetailLog manages data and operations corresponding to the |
|
11 |
* HARVEST_DETAIL_LOG table. It records errors encountered while attempting |
|
12 |
* to harvest a particular document. |
|
13 |
* |
|
14 |
* @author costa |
|
15 |
*/ |
|
16 |
public class HarvestDetailLog { |
|
17 |
|
|
18 |
private int detailLogID; |
|
19 |
private String errorMessage; |
|
20 |
private Object harvestDocument; |
|
21 |
private int harvestLogID; |
|
22 |
|
|
23 |
|
|
24 |
/** |
|
25 |
* Creates a new instance of HarvestDetailLog. |
|
26 |
*/ |
|
27 |
public HarvestDetailLog() { |
|
28 |
} |
|
29 |
|
|
30 |
|
|
31 |
/** |
|
32 |
* Inserts a new entry into the HARVEST_DETAIL_LOG table, based on the |
|
33 |
* contents of this HarvestDetailLog object. Not yet implemented. |
|
34 |
*/ |
|
35 |
public void dnInsertHarvestDetailLogEntry() { |
|
36 |
} |
|
37 |
|
|
38 |
} |
|
0 | 39 |
src/edu/ucsb/nceas/metacat/harvesterClient/Harvester.java | ||
---|---|---|
1 |
/* |
|
2 |
* Harvester.java |
|
3 |
* |
|
4 |
* Created on January 14, 2004, 4:44 PM |
|
5 |
*/ |
|
6 |
|
|
7 |
package edu.ucsb.nceas.metacat.harvesterClient; |
|
8 |
|
|
9 |
import java.io.*; |
|
10 |
import java.sql.*; |
|
11 |
import java.util.*; |
|
12 |
import javax.xml.parsers.*; |
|
13 |
import org.xml.sax.*; |
|
14 |
import org.xml.sax.helpers.*; |
|
15 |
|
|
16 |
import edu.ucsb.nceas.metacat.client.*; |
|
17 |
|
|
18 |
/** |
|
19 |
* Harvester is the main class for the Harvester application. The main |
|
20 |
* method creates a single Harvester object which drives the application. |
|
21 |
* |
|
22 |
* @author costa |
|
23 |
* |
|
24 |
*/ |
|
25 |
public class Harvester { |
|
26 |
|
|
27 |
/* |
|
28 |
* Class fields |
|
29 |
*/ |
|
30 |
|
|
31 |
|
|
32 |
/* |
|
33 |
* Class methods |
|
34 |
*/ |
|
35 |
|
|
36 |
/** |
|
37 |
* Constructor. Creates a new instance of Harvester. |
|
38 |
*/ |
|
39 |
public Harvester() { |
|
40 |
} |
|
41 |
|
|
42 |
/** |
|
43 |
* Determines whether Harvester is running on a Win32 platform. Used |
|
44 |
* during development to aid in resolving platform dependencies. |
|
45 |
* |
|
46 |
* @return true if this in Win32, false otherwise |
|
47 |
*/ |
|
48 |
public static boolean isWin32 () { |
|
49 |
boolean isWin32; |
|
50 |
String osName = System.getProperty("os.name"); |
|
51 |
|
|
52 |
isWin32 = (osName.startsWith("Windows")); |
|
53 |
return isWin32; |
|
54 |
} |
|
55 |
|
|
56 |
|
|
57 |
/** |
|
58 |
* Harvester main method. |
|
59 |
* |
|
60 |
* @param args the command line arguments |
|
61 |
* @throws SAXException |
|
62 |
* @throws IOException |
|
63 |
* @throws ParserConfigurationException |
|
64 |
*/ |
|
65 |
public static void main(String[] args) { |
|
66 |
Harvester harvester = new Harvester(); |
|
67 |
|
|
68 |
harvester.startup(); // Start up Harvester |
|
69 |
harvester.readHarvestSiteSchedule(); // Read the database table |
|
70 |
harvester.harvest(); // Harvest the documents |
|
71 |
harvester.shutdown(); // Shut down Harvester |
|
72 |
} |
|
73 |
|
|
74 |
|
|
75 |
/* |
|
76 |
* Object fields |
|
77 |
*/ |
|
78 |
|
|
79 |
Connection conn; |
|
80 |
private Object harvestEndTime; |
|
81 |
private Object harvestLogList; |
|
82 |
private HarvestSiteSchedule[] harvestSiteScheduleList = |
|
83 |
new HarvestSiteSchedule[20]; |
|
84 |
private int harvestSiteScheduleIndex = 0; |
|
85 |
private Object harvestStartTime; |
|
86 |
Metacat metacat; |
|
87 |
Properties properties; |
|
88 |
|
|
89 |
|
|
90 |
/* |
|
91 |
* Object methods |
|
92 |
*/ |
|
93 |
|
|
94 |
/** |
|
95 |
* Determines whether Harvester should attempt to connect to Metacat. |
|
96 |
* Used during development and testing. |
|
97 |
* |
|
98 |
* @return true if Harvester should connect, otherwise false |
|
99 |
*/ |
|
100 |
boolean connectToMetacat () { |
|
101 |
boolean connectToMetacat = Harvester.isWin32() ? false : true; |
|
102 |
|
|
103 |
return connectToMetacat; |
|
104 |
} |
|
105 |
|
|
106 |
|
|
107 |
/** |
|
108 |
* For every Harvest site schedule in the database, harvest the |
|
109 |
* documents for that site if they are due to be harvested. |
|
110 |
* |
|
111 |
* @throws SAXException |
|
112 |
* @throws IOException |
|
113 |
* @throws ParserConfigurationException |
|
114 |
*/ |
|
115 |
private void harvest() { |
|
116 |
HarvestSiteSchedule harvestSiteSchedule; |
|
117 |
|
|
118 |
for (int i = 0; i < harvestSiteScheduleList.length; i++) { |
|
119 |
harvestSiteSchedule = harvestSiteScheduleList[i]; |
|
120 |
if (harvestSiteSchedule != null) { |
|
121 |
harvestSiteSchedule.printOutput(); |
|
122 |
harvestSiteSchedule.harvestDocumentList(); |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
reportToAdministrator(); |
|
127 |
} |
|
128 |
|
|
129 |
|
|
130 |
/** |
|
131 |
* Loads Harvester properties |
|
132 |
*/ |
|
133 |
private void loadProperties() { |
|
134 |
String homedir = System.getProperty("user.home"); |
|
135 |
File configfile = new File(homedir, "harvester.properties"); |
|
136 |
|
|
137 |
properties = new Properties(); |
|
138 |
|
|
139 |
try { |
|
140 |
properties.load(new FileInputStream(configfile)); |
|
141 |
properties.list(System.out); |
|
142 |
} |
|
143 |
catch (IOException e) { |
|
144 |
System.err.println("IOException: " + e.getMessage()); |
|
145 |
System.exit(1); |
|
146 |
} |
|
147 |
} |
|
148 |
|
|
149 |
|
|
150 |
/** |
|
151 |
* Reads the HARVEST_SITE_SCHEDULE table in the database, creating |
|
152 |
* a HarvestSiteSchedule object for each row in the table. |
|
153 |
*/ |
|
154 |
private void readHarvestSiteSchedule() { |
|
155 |
HarvestSiteSchedule harvestSiteSchedule; |
|
156 |
ResultSet rs; |
|
157 |
SQLWarning warn; |
|
158 |
Statement stmt; |
|
159 |
|
|
160 |
String contactEmail; |
|
161 |
String dateLastHarvest; |
|
162 |
String dateNextHarvest; |
|
163 |
String documentListURL; |
|
164 |
String ldapDN; |
|
165 |
String ldapPassword; |
|
166 |
int siteScheduleID; |
|
167 |
String unit; |
|
168 |
int updateFrequency; |
|
169 |
|
|
170 |
try { |
|
171 |
// Read the HARVEST_SITE_SCHEDULE table |
|
172 |
stmt = conn.createStatement(); |
|
173 |
rs = stmt.executeQuery("SELECT * FROM HARVEST_SITE_SCHEDULE"); |
|
174 |
warn = rs.getWarnings(); |
|
175 |
|
|
176 |
if (warn != null) { |
|
177 |
System.out.println("\n---Warning---\n"); |
|
178 |
|
|
179 |
while (warn != null) { |
|
180 |
System.out.println("Message: " + warn.getMessage()); |
|
181 |
System.out.println("SQLState: " + warn.getSQLState()); |
|
182 |
System.out.print("Vendor error code: "); |
|
183 |
System.out.println(warn.getErrorCode()); |
|
184 |
System.out.println(""); |
|
185 |
warn = warn.getNextWarning(); |
|
186 |
} |
|
187 |
} |
|
188 |
|
|
189 |
while (rs.next()) { |
|
190 |
siteScheduleID = rs.getInt("SITE_SCHEDULE_ID"); |
|
191 |
documentListURL = rs.getString("DOCUMENTLISTURL"); |
|
192 |
ldapDN = rs.getString("LDAPDN"); |
|
193 |
// ldapPassword = rs.getString("LDAPPASSWORD"); |
|
194 |
ldapPassword = "ntre4dc"; |
|
195 |
dateNextHarvest = rs.getString("DATENEXTHARVEST"); |
|
196 |
dateLastHarvest = rs.getString("DATELASTHARVEST"); |
|
197 |
updateFrequency = rs.getInt("UPDATEFREQUENCY"); |
|
198 |
unit = rs.getString("UNIT"); |
|
199 |
contactEmail = rs.getString("CONTACT_EMAIL"); |
|
200 |
|
|
201 |
warn = rs.getWarnings(); |
|
202 |
|
|
203 |
if (warn != null) { |
|
204 |
System.out.println("\n---Warning---\n"); |
|
205 |
|
|
206 |
while (warn != null) { |
|
207 |
System.out.println("Message: " + warn.getMessage()); |
|
208 |
System.out.println("SQLState: " + warn.getSQLState()); |
|
209 |
System.out.print("Vendor error code: "); |
|
210 |
System.out.println(warn.getErrorCode()); |
|
211 |
System.out.println(""); |
|
212 |
warn = warn.getNextWarning(); |
|
213 |
} |
|
214 |
} |
|
215 |
|
|
216 |
harvestSiteSchedule = new HarvestSiteSchedule( |
|
217 |
this, |
|
218 |
siteScheduleID, |
|
219 |
documentListURL, |
|
220 |
ldapDN, |
|
221 |
ldapPassword, |
|
222 |
dateNextHarvest, |
|
223 |
dateLastHarvest, |
|
224 |
updateFrequency, |
|
225 |
unit, |
|
226 |
contactEmail |
|
227 |
); |
|
228 |
|
|
229 |
harvestSiteScheduleList[harvestSiteScheduleIndex] = harvestSiteSchedule; |
|
230 |
harvestSiteScheduleIndex++; |
|
231 |
} |
|
232 |
} |
|
233 |
catch (SQLException e) { |
|
234 |
System.out.println("Database access failed " + e); |
|
235 |
System.exit(1); |
|
236 |
} |
|
237 |
|
|
238 |
} |
|
239 |
|
|
240 |
|
|
241 |
/** |
|
242 |
* Sends a report to the Harvester administrator. |
|
243 |
*/ |
|
244 |
void reportToAdministrator() { |
|
245 |
System.out.println("\nSending report to administrator."); |
|
246 |
} |
|
247 |
|
|
248 |
|
|
249 |
/** |
|
250 |
* Shuts down Harvester. Performs cleanup operations such as logging out |
|
251 |
* of Metacat and disconnecting from the database. |
|
252 |
*/ |
|
253 |
private void shutdown() { |
|
254 |
// Log shutdown operation |
|
255 |
System.out.println("Shutting Down Harvester"); |
|
256 |
|
|
257 |
try { |
|
258 |
// Close the database connection |
|
259 |
System.out.println("Closing the database connection"); |
|
260 |
conn.close(); |
|
261 |
} |
|
262 |
catch (SQLException e) { |
|
263 |
System.out.println("Database access failed " + e); |
|
264 |
} |
|
265 |
} |
|
266 |
|
|
267 |
|
|
268 |
/** |
|
269 |
* Initializes Harvester at startup. Connects to the database and to Metacat. |
|
270 |
*/ |
|
271 |
private void startup() { |
|
272 |
String dbDriver; |
|
273 |
String metacatURL; |
|
274 |
String osName = Harvester.isWin32() ? "Windows" : "Unix"; |
|
275 |
String password; |
|
276 |
// String response; |
|
277 |
String sessionId; |
|
278 |
String url; |
|
279 |
String user; |
|
280 |
String userName = System.getProperty("user.name"); |
|
281 |
SQLWarning warn; |
|
282 |
|
|
283 |
// Log startup operation |
|
284 |
System.out.println("*****************************************************"); |
|
285 |
System.out.println("Starting Up Harvester"); |
|
286 |
System.out.println("OS is " + osName); |
|
287 |
|
|
288 |
loadProperties(); |
|
289 |
|
|
290 |
dbDriver = properties.getProperty("dbDriver"); |
|
291 |
url = properties.getProperty("url"); |
|
292 |
user = properties.getProperty("user"); |
|
293 |
password = properties.getProperty("password"); |
|
294 |
metacatURL = properties.getProperty("metacatURL"); |
|
295 |
|
|
296 |
// Load the jdbc driver |
|
297 |
try { |
|
298 |
Class.forName(dbDriver); |
|
299 |
} |
|
300 |
catch (ClassNotFoundException e) { |
|
301 |
System.out.println("Can't load driver " + e); |
|
302 |
System.exit(1); |
|
303 |
} |
|
304 |
|
|
305 |
// Make the database connection |
|
306 |
try { |
|
307 |
System.out.println("Getting connection to Harvester tables"); |
|
308 |
conn = DriverManager.getConnection(url, user, password); |
|
309 |
|
|
310 |
// If a SQLWarning object is available, print its warning(s). |
|
311 |
// There may be multiple warnings chained. |
|
312 |
warn = conn.getWarnings(); |
|
313 |
|
|
314 |
if (warn != null) { |
|
315 |
while (warn != null) { |
|
316 |
System.out.println("SQLState: " + warn.getSQLState()); |
|
317 |
System.out.println("Message: " + warn.getMessage()); |
|
318 |
System.out.println("Vendor: " + warn.getErrorCode()); |
|
319 |
System.out.println(""); |
|
320 |
warn = warn.getNextWarning(); |
|
321 |
} |
|
322 |
} |
|
323 |
} |
|
324 |
catch (SQLException e) { |
|
325 |
System.out.println("Database access failed " + e); |
|
326 |
System.exit(1); |
|
327 |
} |
|
328 |
|
|
329 |
if (connectToMetacat()) { |
|
330 |
try { |
|
331 |
System.out.println("Connecting to Metacat: " + metacatURL); |
|
332 |
metacat = MetacatFactory.createMetacatConnection(metacatURL); |
|
333 |
} |
|
334 |
catch (MetacatInaccessibleException e) { |
|
335 |
System.out.println("Metacat connection failed." + e.getMessage()); |
|
336 |
} |
|
337 |
catch (Exception e) { |
|
338 |
System.out.println("Metacat connection failed." + e.getMessage()); |
|
339 |
} |
|
340 |
} |
|
341 |
else { |
|
342 |
System.out.println("Not connecting to Metacat"); |
|
343 |
} |
|
344 |
} |
|
345 |
|
|
346 |
|
|
347 |
/** |
|
348 |
* Writes one or more log entries to the HARVEST_LOG table. |
|
349 |
*/ |
|
350 |
private void writeHarvestLog() { |
|
351 |
} |
|
352 |
|
|
353 |
} |
|
0 | 354 |
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestDocument.java | ||
---|---|---|
1 |
/* |
|
2 |
* HarvestDocument.java |
|
3 |
* |
|
4 |
* Created on January 14, 2004, 4:37 PM |
|
5 |
*/ |
|
6 |
|
|
7 |
package edu.ucsb.nceas.metacat.harvesterClient; |
|
8 |
|
|
9 |
import java.io.InputStream; |
|
10 |
import java.io.InputStreamReader; |
|
11 |
import java.io.IOException; |
|
12 |
import java.io.StringReader; |
|
13 |
import java.net.MalformedURLException; |
|
14 |
import java.net.URL; |
|
15 |
|
|
16 |
import edu.ucsb.nceas.metacat.client.*; |
|
17 |
import edu.ucsb.nceas.utilities.IOUtil; |
|
18 |
|
|
19 |
|
|
20 |
/** |
|
21 |
* HarvestDocument manages operations and data for a single document to be |
|
22 |
* harvested. |
|
23 |
* |
|
24 |
* @author costa |
|
25 |
*/ |
|
26 |
public class HarvestDocument { |
|
27 |
|
|
28 |
private String documentName; |
|
29 |
private String documentType; |
|
30 |
private String documentURL; |
|
31 |
private Harvester harvester; |
|
32 |
private HarvestSiteSchedule harvestSiteSchedule; |
|
33 |
private int identifier; |
|
34 |
private int revision; |
|
35 |
private String scope; |
|
36 |
|
|
37 |
|
|
38 |
/** |
|
39 |
* Creates a new instance of HarvestDocument. Initialized with the data |
|
40 |
* that was read from a single <document> element in site document list. |
|
41 |
* |
|
42 |
* @param harvester the parent Harvester object |
|
43 |
* @param harvestSiteSchedule the parent HarvestSiteSchedule object |
|
44 |
* @param scope the value of the <scope> element |
|
45 |
* @param identifier the value of the <identifier> element |
|
46 |
* @param revision the value of the <revision> element |
|
47 |
* @param documentType the value of the <documentType> element |
|
48 |
* @param documentURL the value of the <documentURL> element |
|
49 |
*/ |
|
50 |
public HarvestDocument( |
|
51 |
Harvester harvester, |
|
52 |
HarvestSiteSchedule harvestSiteSchedule, |
|
53 |
String scope, |
|
54 |
int identifier, |
|
55 |
int revision, |
|
56 |
String documentType, |
|
57 |
String documentURL |
|
58 |
) { |
|
59 |
this.harvester = harvester; |
|
60 |
this.harvestSiteSchedule = harvestSiteSchedule; |
|
61 |
this.documentType = documentType; |
|
62 |
this.documentURL = documentURL; |
|
63 |
this.scope = scope; |
|
64 |
this.identifier = identifier; |
|
65 |
this.revision = revision; |
|
66 |
|
|
67 |
this.documentName = scope + "." + identifier; |
|
68 |
} |
|
69 |
|
|
70 |
|
|
71 |
/** |
|
72 |
* Retrieve the document from the site using its <documentURL> value. |
|
73 |
* |
|
74 |
* @return A StringReader containing the document string. |
|
75 |
*/ |
|
76 |
private StringReader getSiteDocument() { |
|
77 |
String documentString; |
|
78 |
InputStream inputStream; |
|
79 |
InputStreamReader inputStreamReader; |
|
80 |
StringReader stringReader = null; |
|
81 |
URL url; |
|
82 |
|
|
83 |
try { |
|
84 |
url = new URL(documentURL); |
|
85 |
inputStream = url.openStream(); |
|
86 |
inputStreamReader = new InputStreamReader(inputStream); |
|
87 |
documentString = IOUtil.getAsString(inputStreamReader, true); |
|
88 |
stringReader = new StringReader(documentString); |
|
89 |
System.out.println(" Successfully read document: " + documentURL); |
|
90 |
} |
|
91 |
catch (MalformedURLException e) { |
|
92 |
System.err.println("MalformedURLException: " + e.getMessage()); |
|
93 |
} |
|
94 |
catch (IOException e) { |
|
95 |
System.err.println("IOException: " + e.getMessage()); |
|
96 |
} |
|
97 |
|
|
98 |
return stringReader; |
|
99 |
} |
|
100 |
|
|
101 |
|
|
102 |
/** |
|
103 |
* Harvest the document from the site. Unless Metacat already has the |
|
104 |
* document, retrieve the document from the site and put (insert or |
|
105 |
* update) it to Metacat. If Metacat already has the document, determine |
|
106 |
* the highest revision stored in Metacat so that this can be reported |
|
107 |
* back to the user. |
|
108 |
*/ |
|
109 |
public void harvestDocument() { |
|
110 |
int highestRevision; |
|
111 |
String metacatReturnString; |
|
112 |
StringReader stringReader; |
|
113 |
|
|
114 |
/* If metacat already has this document, determine the highest revision in |
|
115 |
* metacat and report it to the user; else, insert or delete the document |
|
116 |
* into metacat. |
|
117 |
*/ |
|
118 |
if (metacatHasDocument()) { |
|
119 |
System.out.println(" metacat has document"); |
|
120 |
highestRevision = metacatHighestRevision(); |
|
121 |
System.out.println(" metacatHighestRevision: " + highestRevision); |
|
122 |
} |
|
123 |
else { |
|
124 |
stringReader = getSiteDocument(); |
|
125 |
if (stringReader != null) { |
|
126 |
if (parseDocument()) { |
|
127 |
metacatReturnString = putMetacatDocument(stringReader); |
|
128 |
System.out.println(" " + metacatReturnString); |
|
129 |
} |
|
130 |
else { |
|
131 |
System.out.println("Error parsing document."); |
|
132 |
} |
|
133 |
} |
|
134 |
else { |
|
135 |
System.out.print(" Error reading document at URL: "); |
|
136 |
System.out.println(documentURL); |
|
137 |
} |
|
138 |
} |
|
139 |
} |
|
140 |
|
|
141 |
|
|
142 |
/** |
|
143 |
* Boolean to determine whether Metacat already has this document. |
|
144 |
* |
|
145 |
* @return true if Metacat has the document, otherwise false |
|
146 |
*/ |
|
147 |
private boolean metacatHasDocument() { |
|
148 |
boolean hasDocument = false; |
|
149 |
|
|
150 |
return hasDocument; |
|
151 |
} |
|
152 |
|
|
153 |
|
|
154 |
/** |
|
155 |
* Determines the highest revision that Metacat has for this document. |
|
156 |
* |
|
157 |
* @return int representing the highest revision for this document in Metacat |
|
158 |
*/ |
|
159 |
private int metacatHighestRevision() { |
|
160 |
int highestRevision = 0; |
|
161 |
|
|
162 |
return highestRevision; |
|
163 |
} |
|
164 |
|
|
165 |
|
|
166 |
/** |
|
167 |
* Parse the document to determine whether it is valid EML prior to inserting |
|
168 |
* or updating it to Metacat. This is QA/QC measure. Currently unimplemented. |
|
169 |
* |
|
170 |
* @return true if the document is valid EML, otherwise false |
|
171 |
*/ |
|
172 |
private boolean parseDocument () { |
|
173 |
boolean success = true; |
|
174 |
|
|
175 |
return success; |
|
176 |
} |
|
177 |
|
|
178 |
|
|
179 |
/** |
|
180 |
* Print the data fields and values in this HarvestDocument object. |
|
181 |
*/ |
|
182 |
void printOutput() |
|
183 |
{ |
|
184 |
System.out.println(""); |
|
185 |
System.out.println(" scope: " + scope); |
|
186 |
System.out.println(" identifier: " + identifier); |
|
187 |
System.out.println(" revision: " + revision); |
|
188 |
System.out.println(" documentType: " + documentType); |
|
189 |
System.out.println(" documentURL: " + documentURL); |
|
190 |
System.out.println(" documentName: " + documentName); |
|
191 |
} |
|
192 |
|
|
193 |
|
|
194 |
/** |
|
195 |
* Insert or update this document to Metacat. If revision equals 1, do an |
|
196 |
* insert; otherwise, do an update. |
|
197 |
* |
|
198 |
* @return the Metacat return string from the insert or update operation |
|
199 |
*/ |
|
200 |
private String putMetacatDocument(StringReader stringReader) { |
|
201 |
String docid = scope + "." + identifier + "." + revision; |
|
202 |
Metacat metacat = harvester.metacat; |
|
203 |
String metacatReturnString = ""; |
|
204 |
|
|
205 |
if (harvester.connectToMetacat()) { |
|
206 |
try { |
|
207 |
if (revision == 1) { |
|
208 |
System.out.println(" Inserting document to metacat: " + docid); |
|
209 |
metacatReturnString = metacat.insert(docid, stringReader, null); |
|
210 |
} |
|
211 |
else { |
|
212 |
System.out.println(" Updating document to metacat: " + docid); |
|
213 |
metacatReturnString = metacat.update(docid, stringReader, null); |
|
214 |
} |
|
215 |
} |
|
216 |
catch (MetacatInaccessibleException e) { |
|
217 |
System.err.println("MetacatInaccessibleException: " + e.getMessage()); |
|
218 |
} |
|
219 |
catch (InsufficientKarmaException e) { |
|
220 |
System.err.println("InsufficientKarmaException: " + e.getMessage()); |
|
221 |
} |
|
222 |
catch (MetacatException e) { |
|
223 |
System.err.println("MetacatException: " + e.getMessage()); |
|
224 |
} |
|
225 |
catch (IOException e) { |
|
226 |
System.err.println("IOException: " + e.getMessage()); |
|
227 |
} |
|
228 |
} |
|
229 |
else { |
|
230 |
metacatReturnString = "Not putting document to metacat"; |
|
231 |
} |
|
232 |
|
|
233 |
return metacatReturnString; |
|
234 |
} |
|
235 |
|
|
236 |
} |
|
0 | 237 |
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestLog.java | ||
---|---|---|
1 |
/* |
|
2 |
* HarvestLog.java |
|
3 |
* |
|
4 |
* Created on January 14, 2004, 4:55 PM |
|
5 |
*/ |
|
6 |
|
|
7 |
package edu.ucsb.nceas.metacat.harvesterClient; |
|
8 |
|
|
9 |
/** |
|
10 |
* Manages log entries to be inserted to the HARVEST_LOG table. |
|
11 |
* |
|
12 |
* @author costa |
|
13 |
*/ |
|
14 |
public class HarvestLog { |
|
15 |
|
|
16 |
private Object harvestDate; |
|
17 |
private String harvestOperationCode; |
|
18 |
private String message; |
|
19 |
private int siteScheduleID; |
|
20 |
private String status; |
|
21 |
|
|
22 |
/** |
|
23 |
* Creates a new instance of HarvestLog. |
|
24 |
*/ |
|
25 |
public HarvestLog() { |
|
26 |
} |
|
27 |
|
|
28 |
|
|
29 |
/** |
|
30 |
* Retrieves the value of the EXPLANATION field of the HARVEST_OPERATION |
|
31 |
* table based on the value of the HARVEST_OPERATION_CODE field. |
|
32 |
* Not yet implemented. |
|
33 |
* |
|
34 |
* @param harvestOperationCode string value of the harvest operation code |
|
35 |
* @return the explanation for this harvest operation, a String |
|
36 |
*/ |
|
37 |
public String dbGetExplanation(String harvestOperationCode) { |
|
38 |
String explanation = ""; |
|
39 |
|
|
40 |
return explanation; |
|
41 |
} |
|
42 |
|
|
43 |
|
|
44 |
/** |
|
45 |
* Retrieves the value of the HARVEST_OPERATION_CODE_LEVEL field of the |
|
46 |
* HARVEST_OPERATION table based on the value of the HARVEST_OPERATION_CODE |
|
47 |
* field. Not yet implemented. |
|
48 |
* |
|
49 |
* @param harvestOperationCode string value of the harvest operation code |
|
50 |
* @return the code level value, an int |
|
51 |
*/ |
|
52 |
public int dbGetHarvestOperationCodeLevel(String harvestOperationCode) { |
|
53 |
int codeLevel = 0; |
|
54 |
|
|
55 |
return codeLevel; |
|
56 |
} |
|
57 |
|
|
58 |
|
|
59 |
/** |
|
60 |
* Inserts a new entry into the HARVEST_LOG table, based on the contents of |
|
61 |
* this HarvestLog object. Not yet implemented. |
|
62 |
*/ |
|
63 |
public void dbInsertHarvestLogEntry() { |
|
64 |
} |
|
65 |
|
|
66 |
} |
|
0 | 67 |
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestSiteSchedule.java | ||
---|---|---|
1 |
/* |
|
2 |
* HarvestSiteSchedule.java |
|
3 |
* |
|
4 |
* Created on January 14, 2004, 4:47 PM |
|
5 |
*/ |
|
6 |
|
|
7 |
package edu.ucsb.nceas.metacat.harvesterClient; |
|
8 |
|
|
9 |
import java.io.*; |
|
10 |
import java.sql.Connection; |
|
11 |
import java.sql.SQLException; |
|
12 |
import java.sql.Statement; |
|
13 |
import java.text.*; |
|
14 |
import java.util.*; |
|
15 |
import javax.xml.parsers.*; |
|
16 |
import org.xml.sax.*; |
|
17 |
import org.xml.sax.helpers.*; |
|
18 |
|
|
19 |
import edu.ucsb.nceas.metacat.client.*; |
|
20 |
|
|
21 |
|
|
22 |
/** |
|
23 |
* HarvestSiteSchedule manages a single entry in the HARVEST_SITE_SCHEDULE |
|
24 |
* table, determining when and how to harvest the documents for a given site. |
|
25 |
* |
|
26 |
* @author costa |
|
27 |
*/ |
|
28 |
class HarvestSiteSchedule { |
|
29 |
|
|
30 |
private String contactEmail; |
|
31 |
private String dateLastHarvest; |
|
32 |
private String dateNextHarvest; |
|
33 |
private long delta; |
|
34 |
private String documentListURL; |
|
35 |
private Harvester harvester; |
|
36 |
private int harvestDocumentIndex = 0; |
|
37 |
private HarvestDocument[] harvestDocumentList = new HarvestDocument[30]; |
|
38 |
private String harvestSiteEndTime; |
|
39 |
private String harvestSiteStartTime; |
|
40 |
private String ldapDN; |
|
41 |
private String ldapPassword; |
|
42 |
final private long millisecondsPerDay = (1000 * 60 * 60 * 24); |
|
43 |
private int siteScheduleID; |
|
44 |
private String unit; |
|
45 |
private int updateFrequency; |
|
46 |
|
|
47 |
/** |
|
48 |
* Creates a new instance of HarvestSiteSchedule. Initialized with the data |
|
49 |
* that was read from a single row in the HARVEST_SITE_SCHEDULE table. |
|
50 |
* |
|
51 |
* @param harvester the parent Harvester object |
|
52 |
* @param siteScheduleID the value of the SITE_SCHEDULE_ID field |
|
53 |
* @param documentListURL the value of the DOCUMENTLISTURL field |
|
54 |
* @param ldapDN the value of the LDAPDN field |
|
55 |
* @param ldapPassword the value of the LDAPPASSWORD field |
|
56 |
* @param dateNextHarvest the value of the DATENEXTHARVEST field |
|
57 |
* @param dateLastHarvest the value of the DATELASTHARVEST field |
|
58 |
* @param updateFrequency the value of the UPDATEFREQUENCY field |
|
59 |
* @param unit the value of the UNIT field |
|
60 |
* @param contactEmail the value of the CONTACT_EMAIL field |
|
61 |
*/ |
|
62 |
public HarvestSiteSchedule( |
|
63 |
Harvester harvester, |
|
64 |
int siteScheduleID, |
|
65 |
String documentListURL, |
|
66 |
String ldapDN, |
|
67 |
String ldapPassword, |
|
68 |
String dateNextHarvest, |
|
69 |
String dateLastHarvest, |
|
70 |
int updateFrequency, |
|
71 |
String unit, |
|
72 |
String contactEmail |
|
73 |
) |
|
74 |
{ |
|
75 |
this.harvester = harvester; |
|
76 |
this.siteScheduleID = siteScheduleID; |
|
77 |
this.documentListURL = documentListURL; |
|
78 |
this.ldapDN = ldapDN; |
|
79 |
this.ldapPassword = ldapPassword; |
|
80 |
this.dateNextHarvest = dateNextHarvest; |
|
81 |
this.dateLastHarvest = dateLastHarvest; |
|
82 |
this.updateFrequency = updateFrequency; |
|
83 |
this.unit = unit; |
|
84 |
this.contactEmail = contactEmail; |
|
85 |
|
|
86 |
// Calculate the value of delta, the number of milliseconds between the |
|
87 |
// last harvest date and the next harvest date. |
|
88 |
delta = updateFrequency * millisecondsPerDay; |
|
89 |
|
|
90 |
if (unit.equals("weeks")) { |
|
91 |
delta *= 7; |
|
92 |
} |
|
93 |
else if (unit.equals("months")) { |
|
94 |
delta *= 30; |
|
95 |
} |
|
96 |
} |
|
97 |
|
|
98 |
|
|
99 |
/** |
|
100 |
* Updates the DATELASTHARVEST value of the HARVEST_SITE_SCHEDULE table |
|
101 |
* after a harvest operation has completed. Calculates the date of the next |
|
102 |
* harvest based on today's date and the update frequency. |
|
103 |
*/ |
|
104 |
private void dbUpdateHarvestSiteSchedule() { |
|
105 |
Connection con; |
|
106 |
long currentTime; // Current time in milliseconds |
|
107 |
Date dateNextHarvest; // Date of next harvest |
|
108 |
String lastHarvest; |
|
109 |
String nextHarvest; |
|
110 |
Date now = new Date(); |
|
111 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy"); |
|
112 |
Statement stmt; |
|
113 |
long timeNextHarvest; |
|
114 |
|
|
115 |
con = harvester.conn; |
|
116 |
now = new Date(); |
|
117 |
currentTime = now.getTime(); // Current time in milliseconds |
|
118 |
timeNextHarvest = currentTime + delta; |
|
119 |
dateNextHarvest = new Date(timeNextHarvest); |
|
120 |
nextHarvest = "'" + simpleDateFormat.format(dateNextHarvest) + "'"; |
|
121 |
lastHarvest = "'" + simpleDateFormat.format(now) + "'"; |
|
122 |
|
|
123 |
System.out.println("Date of next harvest: " + nextHarvest); |
|
124 |
System.out.println("Date of last harvest: " + lastHarvest); |
|
125 |
|
|
126 |
try { |
|
127 |
stmt = con.createStatement(); |
|
128 |
stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATENEXTHARVEST = " + nextHarvest + " WHERE SITE_SCHEDULE_ID = " + siteScheduleID); |
|
129 |
stmt.executeUpdate("UPDATE HARVEST_SITE_SCHEDULE SET DATELASTHARVEST = " + lastHarvest + " WHERE SITE_SCHEDULE_ID = " + siteScheduleID); |
|
130 |
stmt.close(); |
|
131 |
} |
|
132 |
catch(SQLException e) { |
|
133 |
System.err.println("SQLException: " + e.getMessage()); |
|
134 |
} |
|
135 |
} |
|
136 |
|
|
137 |
|
|
138 |
/** |
|
139 |
* Boolean to determine whether this site is currently due for its next |
|
140 |
* harvest. |
|
141 |
* |
|
142 |
* @retrun true if due for harvest, otherwise false |
|
143 |
*/ |
|
144 |
private boolean dueForHarvest() { |
|
145 |
boolean dueForHarvest = false; |
|
146 |
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S"); |
|
147 |
Date now = new Date(); |
|
148 |
Date dlh; // Date of last harvest |
|
149 |
Date dnh; // Date of next harvest |
|
150 |
long currentTime = now.getTime(); // Current time in milliseconds |
|
151 |
long timeNextHarvest = 0; |
|
152 |
|
|
153 |
try { |
|
154 |
dlh = dateFormat.parse(dateLastHarvest); |
|
155 |
timeNextHarvest = dlh.getTime() + delta; |
|
156 |
dnh = new Date(timeNextHarvest); |
|
157 |
|
|
158 |
if (timeNextHarvest < currentTime) { |
|
159 |
dueForHarvest = true; |
|
160 |
} |
|
161 |
else { |
|
162 |
System.out.println("Next harvest date: " + dnh.toString()); |
|
163 |
} |
|
164 |
} |
|
165 |
catch (ParseException e) { |
|
166 |
System.err.println("Error parsing date: " + e.getMessage()); |
|
167 |
} |
|
168 |
|
|
169 |
//return dueForHarvest; |
|
170 |
return true; |
|
171 |
} |
|
172 |
|
|
173 |
|
|
174 |
/** |
|
175 |
* Harvests each document in the site document list. |
|
176 |
* |
|
177 |
* @throws SAXException |
|
178 |
* @throws IOException |
|
179 |
* @throws ParserConfigurationException |
|
180 |
*/ |
|
181 |
public void harvestDocumentList() { |
|
182 |
HarvestDocument harvestDocument; |
|
183 |
|
|
184 |
if (dueForHarvest()) { |
|
185 |
try { |
|
186 |
parseDocumentList(); |
|
187 |
metacatLogin(); |
|
188 |
|
|
189 |
for (int i = 0; i < harvestDocumentList.length; i++) { |
|
190 |
harvestDocument = harvestDocumentList[i]; |
|
191 |
|
|
192 |
if (harvestDocument != null) { |
|
193 |
harvestDocument.printOutput(); |
|
194 |
harvestDocument.harvestDocument(); |
|
195 |
} |
|
196 |
} |
|
197 |
|
|
198 |
metacatLogout(); |
|
199 |
dbUpdateHarvestSiteSchedule(); |
|
200 |
} |
|
201 |
catch (ParserConfigurationException e) { |
|
202 |
System.err.println("ParserConfigurationException: " + e.getMessage()); |
|
203 |
} |
|
204 |
catch (SAXException e) { |
|
205 |
System.err.println("SAXException: " + e.getMessage()); |
|
206 |
} |
|
207 |
catch (IOException e) { |
|
208 |
System.err.println("IOException: " + e.getMessage()); |
|
209 |
} |
|
210 |
|
|
211 |
reportToSite(); |
|
212 |
} |
|
213 |
} |
|
214 |
|
|
215 |
|
|
216 |
/** |
|
217 |
* Login to Metacat using the ldapDN and ldapPassword |
|
218 |
*/ |
|
219 |
private void metacatLogin() { |
|
220 |
Metacat metacat = harvester.metacat; |
|
221 |
|
|
222 |
if (harvester.connectToMetacat()) { |
|
223 |
|
|
224 |
try { |
|
225 |
System.out.println("Logging in to Metacat: " + ldapDN); |
|
226 |
metacat.login(ldapDN, ldapPassword); |
|
227 |
//System.out.println("Metacat login response: " + response); |
|
228 |
//sessionId = metacat.getSessionId(); |
|
229 |
//System.out.println("Session ID: " + sessionId); |
|
230 |
} |
|
231 |
catch (MetacatInaccessibleException e) { |
|
232 |
System.out.println("Metacat login failed." + e.getMessage()); |
|
233 |
} |
|
234 |
catch (Exception e) { |
|
235 |
System.out.println("Metacat login failed." + e.getMessage()); |
|
236 |
} |
|
237 |
} |
|
238 |
else { |
|
239 |
System.out.println("Not logging in to Metacat"); |
|
240 |
} |
|
241 |
|
|
242 |
} |
|
243 |
|
|
244 |
|
|
245 |
/** |
|
246 |
* Logout from Metacat |
|
247 |
*/ |
|
248 |
private void metacatLogout() { |
|
249 |
Metacat metacat = harvester.metacat; |
|
250 |
|
|
251 |
if (harvester.connectToMetacat()) { |
|
252 |
try { |
|
253 |
// Log out from the Metacat session |
|
254 |
System.out.println("Logging out from Metacat"); |
|
255 |
metacat.logout(); |
|
256 |
} |
|
257 |
catch (MetacatInaccessibleException e) { |
|
258 |
System.out.println("Metacat inaccessible: " + e.getMessage()); |
|
259 |
} |
|
260 |
catch (MetacatException e) { |
|
261 |
System.out.println("Metacat exception: " + e.getMessage()); |
|
262 |
} |
|
263 |
} |
|
264 |
else { |
|
265 |
System.out.println("Not logging out from Metacat"); |
|
266 |
} |
|
267 |
} |
|
268 |
|
|
269 |
|
|
270 |
/** |
|
271 |
* Parse the site document list to find out which documents to harvest. |
|
272 |
* |
|
273 |
* @throws SAXException |
|
274 |
* @throws IOException |
|
275 |
* @throws ParserConfigurationException |
|
276 |
*/ |
|
277 |
private void parseDocumentList() |
|
278 |
throws SAXException, IOException, ParserConfigurationException { |
|
279 |
|
|
280 |
// Create a parser factory and use it to create a parser |
|
281 |
SAXParserFactory parserFactory = SAXParserFactory.newInstance(); |
|
282 |
SAXParser parser = parserFactory.newSAXParser(); |
|
283 |
|
|
284 |
// Instantiate a DefaultHandler subclass to do your counting for you |
|
285 |
DocumentListHandler handler = new DocumentListHandler(); |
|
286 |
|
|
287 |
// Start the parser. It reads the document list and calls methods of the handler. |
|
288 |
parser.parse(documentListURL, handler); |
|
289 |
} |
|
290 |
|
|
291 |
|
|
292 |
/** |
|
293 |
* Prints the data that is stored in this HarvestSiteSchedule object. |
|
294 |
*/ |
|
295 |
void printOutput() { |
|
296 |
System.out.println(""); |
|
297 |
System.out.println("siteScheduleID: " + siteScheduleID); |
|
298 |
System.out.println("documentListURL: " + documentListURL); |
|
299 |
System.out.println("ldapDN: " + ldapDN); |
|
300 |
System.out.println("ldapPassword: " + ldapPassword); |
|
301 |
System.out.println("dateNextHarvest: " + dateNextHarvest); |
|
302 |
System.out.println("dateLastHarvest: " + dateLastHarvest); |
|
303 |
System.out.println("updateFrequency: " + updateFrequency); |
|
304 |
System.out.println("unit: " + unit); |
|
305 |
System.out.println("contactEmail: " + contactEmail); |
|
306 |
} |
|
307 |
|
|
308 |
|
|
309 |
/** |
|
310 |
* Pushes a HarvestDocument object onto the harvestDocumentList. |
|
311 |
* |
|
312 |
* @param harvestDocument a new HarvestDocument object to add to the list |
|
313 |
*/ |
|
314 |
void pushHarvestDocument(HarvestDocument harvestDocument) { |
|
315 |
harvestDocumentList[harvestDocumentIndex] = harvestDocument; |
|
316 |
harvestDocumentIndex++; |
|
317 |
} |
|
318 |
|
|
319 |
|
|
320 |
/** |
|
321 |
* Sends a report to the site summarizing the results of the harvest |
|
322 |
* operation. |
|
323 |
*/ |
|
324 |
void reportToSite() { |
|
325 |
System.out.println("Sending report to site.\n"); |
|
326 |
} |
|
327 |
|
|
328 |
|
|
329 |
/** |
|
330 |
* This inner class extends DefaultHandler. It parses the document list, |
|
331 |
* creating a new HarvestDocument object every time it finds a </Document> |
|
332 |
* end tag. |
|
333 |
*/ |
|
334 |
class DocumentListHandler extends DefaultHandler { |
|
335 |
|
|
336 |
public String scope; |
|
337 |
public int identifier; |
|
338 |
public int revision; |
|
339 |
public String documentType; |
|
340 |
public String documentURL; |
|
341 |
private String currentQname; |
|
342 |
|
|
343 |
|
|
344 |
/** |
|
345 |
* Handles a start-of-document event. |
|
346 |
*/ |
|
347 |
public void startDocument () { |
|
348 |
System.out.println("Started parsing " + documentListURL); |
|
349 |
} |
|
350 |
|
|
351 |
|
|
352 |
/** |
|
353 |
* Handles an end-of-document event. |
|
354 |
*/ |
|
355 |
public void endDocument () { |
|
356 |
System.out.println("Finished parsing " + documentListURL); |
|
357 |
} |
|
358 |
|
|
359 |
|
|
360 |
/** |
|
361 |
* Handles a start-of-element event. |
|
362 |
* |
|
363 |
* @param uri |
|
364 |
* @param localname |
|
365 |
* @param qname |
|
366 |
* @param attributes |
|
367 |
*/ |
|
368 |
public void startElement(String uri, |
|
369 |
String localname, |
|
370 |
String qname, |
|
371 |
Attributes attributes) { |
|
372 |
|
|
373 |
currentQname = qname; |
|
374 |
} |
|
375 |
|
|
376 |
|
|
377 |
/** |
|
378 |
* Handles an end-of-element event. If the end tag is </Document>, then |
|
379 |
* creates a new HarvestDocument object and pushes it to the document |
|
380 |
* list. |
|
381 |
* |
|
382 |
* @param uri |
|
383 |
* @param localname |
|
384 |
* @param qname |
|
385 |
*/ |
|
386 |
public void endElement(String uri, |
|
387 |
String localname, |
|
388 |
String qname) { |
|
389 |
|
|
390 |
HarvestDocument harvestDocument; |
|
391 |
|
|
392 |
if (qname.equals("document")) { |
|
393 |
harvestDocument = new HarvestDocument( |
|
394 |
harvester, |
|
395 |
HarvestSiteSchedule.this, |
|
396 |
scope, |
|
397 |
identifier, |
|
398 |
revision, |
|
399 |
documentType, |
|
400 |
documentURL |
|
401 |
); |
|
402 |
pushHarvestDocument(harvestDocument); |
|
403 |
} |
|
404 |
} |
|
405 |
|
|
406 |
|
|
407 |
/** |
|
408 |
* This method is called for any plain text within an element. |
|
409 |
* It parses the value for any of the following elements: |
|
410 |
* <scope>, <identifier>, <revision>, <documentType>, <documentURL> |
|
411 |
* |
|
412 |
* @param ch the character array holding the parsed text |
|
413 |
* @param start the start index |
|
414 |
* @param length the text length |
|
415 |
* |
|
416 |
*/ |
|
417 |
public void characters (char ch[], int start, int length) { |
|
418 |
String s = new String(ch, start, length); |
|
419 |
|
|
420 |
if (length > 0) { |
|
421 |
if (currentQname.equals("scope")) { |
|
422 |
scope = s; |
|
423 |
} |
|
424 |
else if (currentQname.equals("identifier")) { |
|
425 |
identifier = Integer.parseInt(s); |
|
426 |
} |
|
427 |
else if (currentQname.equals("revision")) { |
|
428 |
revision = Integer.parseInt(s); |
|
429 |
} |
|
430 |
else if (currentQname.equals("documentType")) { |
|
431 |
documentType = s; |
|
432 |
} |
|
433 |
else if (currentQname.equals("documentURL")) { |
|
434 |
documentURL = s; |
|
435 |
} |
|
436 |
|
|
437 |
currentQname = ""; |
|
438 |
} |
|
439 |
} |
|
440 |
|
|
441 |
} |
|
442 |
} |
|
0 | 443 |
Also available in: Unified diff
Harvester source files