Revision 2036
Added by Duane Costa over 20 years ago
HarvestDocument.java | ||
---|---|---|
12 | 12 |
import java.io.StringReader; |
13 | 13 |
import java.net.MalformedURLException; |
14 | 14 |
import java.net.URL; |
15 |
import java.sql.ResultSet; |
|
16 |
import java.sql.SQLException; |
|
17 |
import java.sql.Statement; |
|
15 | 18 |
|
16 | 19 |
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException; |
17 | 20 |
import edu.ucsb.nceas.metacat.client.Metacat; |
... | ... | |
27 | 30 |
* @author costa |
28 | 31 |
*/ |
29 | 32 |
public class HarvestDocument { |
30 |
|
|
31 |
private String documentName; |
|
33 |
|
|
34 |
|
|
35 |
private String docid; // scope + identifier |
|
36 |
private String docidFull; // scope + identifier + revision |
|
32 | 37 |
String documentType; |
33 | 38 |
String documentURL; |
34 | 39 |
private Harvester harvester; |
... | ... | |
67 | 72 |
this.identifier = identifier; |
68 | 73 |
this.revision = revision; |
69 | 74 |
|
70 |
this.documentName = scope + "." + identifier; |
|
75 |
this.docid = scope + "." + identifier; |
|
76 |
this.docidFull = this.docid + "." + revision; |
|
71 | 77 |
} |
72 | 78 |
|
73 | 79 |
|
... | ... | |
89 | 95 |
inputStreamReader = new InputStreamReader(inputStream); |
90 | 96 |
documentString = IOUtil.getAsString(inputStreamReader, true); |
91 | 97 |
stringReader = new StringReader(documentString); |
92 |
harvester.addLogEntry(0, "", "GetDocSuccess", |
|
93 |
harvestSiteSchedule.siteScheduleID, null, ""); |
|
98 |
harvester.addLogEntry(0, |
|
99 |
"Retrieved: " + documentURL, |
|
100 |
"GetDocSuccess", |
|
101 |
harvestSiteSchedule.siteScheduleID, |
|
102 |
null, |
|
103 |
""); |
|
94 | 104 |
} |
95 | 105 |
catch (MalformedURLException e) { |
96 | 106 |
harvester.addLogEntry(1, "MalformedURLException", "GetDocError", |
... | ... | |
116 | 126 |
*/ |
117 | 127 |
public void harvestDocument() { |
118 | 128 |
int highestRevision; |
129 |
boolean insert = false; |
|
119 | 130 |
String metacatReturnString; |
120 | 131 |
StringReader stringReader; |
132 |
boolean update = false; |
|
121 | 133 |
|
122 | 134 |
/* If metacat already has this document, determine the highest revision in |
123 | 135 |
* metacat and report it to the user; else, insert or delete the document |
124 | 136 |
* into metacat. |
125 | 137 |
*/ |
126 |
if (metacatHasDocument()) { |
|
127 |
highestRevision = metacatHighestRevision(); |
|
138 |
highestRevision = metacatHighestRevision(); |
|
139 |
|
|
140 |
if (highestRevision == -1) { |
|
141 |
insert = true; |
|
142 |
} |
|
143 |
else if (revision > highestRevision) { |
|
144 |
update = true; |
|
145 |
} |
|
146 |
else { |
|
128 | 147 |
harvester.addLogEntry(0, |
129 |
"Metacat has document: " + documentName + |
|
130 |
", highest revision: " + highestRevision, |
|
148 |
"Attempting to update " + docid + " to revision " + |
|
149 |
revision + ". Metacat has document revision " + |
|
150 |
highestRevision + ".", |
|
131 | 151 |
"MetacatHasDoc", |
132 |
harvestSiteSchedule.siteScheduleID, null, ""); |
|
152 |
harvestSiteSchedule.siteScheduleID, |
|
153 |
null, |
|
154 |
""); |
|
133 | 155 |
} |
134 |
else { |
|
156 |
|
|
157 |
if (insert || update) { |
|
135 | 158 |
stringReader = getSiteDocument(); |
136 | 159 |
if (stringReader != null) { |
137 |
if (parseDocument()) {
|
|
138 |
putMetacatDocument(stringReader); |
|
160 |
if (validateDocument()) {
|
|
161 |
putMetacatDocument(insert, update, stringReader);
|
|
139 | 162 |
} |
140 | 163 |
} |
141 | 164 |
} |
... | ... | |
156 | 179 |
Exception e |
157 | 180 |
) { |
158 | 181 |
if (insert) { |
159 |
harvester.addLogEntry(1, metacatReturnString, "InsertDocError", |
|
160 |
harvestSiteSchedule.siteScheduleID, |
|
161 |
this, exceptionName + ": " + e.getMessage()); |
|
182 |
harvester.addLogEntry(1, |
|
183 |
metacatReturnString, |
|
184 |
"InsertDocError", |
|
185 |
harvestSiteSchedule.siteScheduleID, |
|
186 |
this, |
|
187 |
exceptionName + ": " + e.getMessage()); |
|
162 | 188 |
} |
163 | 189 |
else { |
164 |
harvester.addLogEntry(1, metacatReturnString, "UpdateDocError", |
|
165 |
harvestSiteSchedule.siteScheduleID, |
|
166 |
this, exceptionName + ": " + e.getMessage()); |
|
190 |
harvester.addLogEntry(1, |
|
191 |
metacatReturnString, |
|
192 |
"UpdateDocError", |
|
193 |
harvestSiteSchedule.siteScheduleID, |
|
194 |
this, |
|
195 |
exceptionName + ": " + e.getMessage()); |
|
167 | 196 |
} |
168 | 197 |
} |
169 | 198 |
|
170 | 199 |
|
171 | 200 |
/** |
172 |
* Boolean to determine whether Metacat already has this document. |
|
173 |
* |
|
174 |
* @return true if Metacat has the document, otherwise false |
|
175 |
*/ |
|
176 |
private boolean metacatHasDocument() { |
|
177 |
boolean hasDocument = false; |
|
178 |
|
|
179 |
return hasDocument; |
|
180 |
} |
|
181 |
|
|
182 |
|
|
183 |
/** |
|
184 | 201 |
* Determines the highest revision that Metacat has for this document. |
185 | 202 |
* |
186 |
* @return int representing the highest revision for this document in Metacat |
|
203 |
* @return int representing the highest revision for this document in |
|
204 |
* Metacat. Returns -1 if Metacat does not currently hold the |
|
205 |
* document. |
|
187 | 206 |
*/ |
188 | 207 |
private int metacatHighestRevision() { |
189 |
int highestRevision = 0; |
|
190 |
|
|
191 |
return highestRevision; |
|
192 |
} |
|
193 |
|
|
194 |
|
|
195 |
/** |
|
196 |
* Parse the document to determine whether it is valid EML prior to inserting |
|
197 |
* or updating it to Metacat. This is QA/QC measure. Currently unimplemented. |
|
198 |
* |
|
199 |
* @return true if the document is valid EML, otherwise false |
|
200 |
*/ |
|
201 |
private boolean parseDocument () { |
|
202 |
boolean success = true; |
|
208 |
int highestRevision = -1; |
|
209 |
String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " + |
|
210 |
"'" + docid + "'"; |
|
211 |
Statement stmt; |
|
203 | 212 |
|
204 |
if (success) { |
|
205 |
harvester.addLogEntry(0, "", "ValidateDocSuccess", |
|
206 |
harvestSiteSchedule.siteScheduleID, null, ""); |
|
213 |
try { |
|
214 |
stmt = harvester.conn.createStatement(); |
|
215 |
ResultSet rs = stmt.executeQuery(query); |
|
216 |
|
|
217 |
while (rs.next()) { |
|
218 |
highestRevision = rs.getInt("REV"); |
|
219 |
} |
|
220 |
|
|
221 |
stmt.close(); |
|
222 |
} |
|
223 |
catch(SQLException e) { |
|
224 |
System.out.println("SQLException: " + e.getMessage()); |
|
207 | 225 |
} |
208 |
else { |
|
209 |
harvester.addLogEntry(1, "Error validating document", "ValidateDocError", |
|
210 |
harvestSiteSchedule.siteScheduleID, this, ""); |
|
211 |
} |
|
212 |
|
|
213 |
return success; |
|
226 |
|
|
227 |
return highestRevision; |
|
214 | 228 |
} |
215 | 229 |
|
216 | 230 |
|
... | ... | |
218 | 232 |
* Print the data fields and values in this HarvestDocument object. |
219 | 233 |
*/ |
220 | 234 |
void printOutput() { |
221 |
System.out.println("scope: " + scope); |
|
222 |
System.out.println("identifier: " + identifier); |
|
223 |
System.out.println("revision: " + revision); |
|
224 |
System.out.println("documentType: " + documentType); |
|
225 |
System.out.println("documentURL: " + documentURL); |
|
226 |
System.out.println("documentName: " + documentName); |
|
235 |
System.out.println("* scope: " + scope); |
|
236 |
System.out.println("* identifier: " + identifier); |
|
237 |
System.out.println("* revision: " + revision); |
|
238 |
System.out.println("* documentType: " + documentType); |
|
239 |
System.out.println("* documentURL: " + documentURL); |
|
227 | 240 |
} |
228 | 241 |
|
229 | 242 |
|
230 | 243 |
/** |
231 | 244 |
* Insert or update this document to Metacat. If revision equals 1, do an |
232 | 245 |
* insert; otherwise, do an update. |
246 |
* |
|
247 |
* @param insert true if this is an insert operation |
|
248 |
* @param update true if this is an update operation |
|
249 |
* @param stringReader the StringReader object holding the document text |
|
233 | 250 |
*/ |
234 |
private void putMetacatDocument(StringReader stringReader) {
|
|
235 |
String docid = scope + "." + identifier + "." + revision;
|
|
236 |
boolean insert = (revision == 1);
|
|
251 |
private void putMetacatDocument(boolean insert,
|
|
252 |
boolean update,
|
|
253 |
StringReader stringReader) {
|
|
237 | 254 |
Metacat metacat = harvester.metacat; |
238 | 255 |
String metacatReturnString = ""; |
239 | 256 |
|
240 | 257 |
if (harvester.connectToMetacat()) { |
241 | 258 |
try { |
242 | 259 |
if (insert) { |
243 |
metacatReturnString = metacat.insert(docid, stringReader, null); |
|
244 |
harvester.addLogEntry(0, docid + " : " + metacatReturnString, |
|
260 |
metacatReturnString = metacat.insert(docidFull, stringReader, null);
|
|
261 |
harvester.addLogEntry(0, docidFull + " : " + metacatReturnString,
|
|
245 | 262 |
"InsertDocSuccess", |
246 | 263 |
harvestSiteSchedule.siteScheduleID, |
247 | 264 |
null, ""); |
248 | 265 |
} |
249 |
else { |
|
250 |
metacatReturnString = metacat.update(docid, stringReader, null); |
|
251 |
harvester.addLogEntry(0, docid + " : " + metacatReturnString, |
|
266 |
else if (update) {
|
|
267 |
metacatReturnString = metacat.update(docidFull, stringReader, null);
|
|
268 |
harvester.addLogEntry(0, docidFull + " : " + metacatReturnString,
|
|
252 | 269 |
"UpdateDocSuccess", |
253 | 270 |
harvestSiteSchedule.siteScheduleID, |
254 | 271 |
null, ""); |
... | ... | |
270 | 287 |
} |
271 | 288 |
} |
272 | 289 |
} |
290 |
|
|
291 |
|
|
292 |
/** |
|
293 |
* Validate the document to determine whether it is valid EML prior to |
|
294 |
* inserting or updating it to Metacat. This is QA/QC measure. |
|
295 |
* Not yet implemented. |
|
296 |
* |
|
297 |
* @return true if the document is valid EML, otherwise false |
|
298 |
*/ |
|
299 |
private boolean validateDocument () { |
|
300 |
boolean success = true; |
|
301 |
|
|
302 |
/*if (success) { |
|
303 |
harvester.addLogEntry(0, |
|
304 |
"Validated: " + documentURL, |
|
305 |
"ValidateDocSuccess", |
|
306 |
harvestSiteSchedule.siteScheduleID, |
|
307 |
null, |
|
308 |
""); |
|
309 |
} |
|
310 |
else { |
|
311 |
harvester.addLogEntry(1, "Error validating document", "ValidateDocError", |
|
312 |
harvestSiteSchedule.siteScheduleID, this, ""); |
|
313 |
}*/ |
|
314 |
|
|
315 |
return success; |
|
316 |
} |
|
317 |
|
|
273 | 318 |
} |
Also available in: Unified diff
Additional Harvester development