Project

General

Profile

« Previous | Next » 

Revision 2036

Additional Harvester development

View differences:

src/edu/ucsb/nceas/metacat/harvesterClient/HarvestDetailLog.java
53 53
   * contents of this HarvestDetailLog object.
54 54
   */
55 55
  void dbInsertHarvestDetailLogEntry() {
56
    String dequotedMessage;
56 57
    String insertString;
57 58
		Statement stmt;
59
    
60
    dequotedMessage = harvester.dequoteText(errorMessage);
58 61

  
59 62
    // Set the value of the HARVEST_LOG_ID to the current time in UTC seconds
60 63
    insertString = "INSERT INTO HARVEST_DETAIL_LOG " +
......
68 71
                   harvestDocument.identifier + ", " +
69 72
                   harvestDocument.revision + ", " +
70 73
                   "'" + harvestDocument.documentURL + "', " +
71
                   "'" + errorMessage + "'," +
74
                   "'" + dequotedMessage + "'," +
72 75
                   "'" + harvestDocument.documentType + "'" +
73 76
                   ")";
74 77
                   
75 78
		try {
76
			stmt = harvester.conn.createStatement();						
79
			stmt = harvester.conn.createStatement();
77 80
			stmt.executeUpdate(insertString);
78 81
			stmt.close();
79 82
		}
......
87 90
   * Prints the contents of this HarvestLog object. Used in generating reports.
88 91
   */
89 92
  void printOutput() {
90
    System.out.println("detailLogID:          " + detailLogID);
91
    System.out.println("errorMessage:         " + errorMessage);
93
    System.out.println("* detailLogID:          " + detailLogID);
94
    System.out.println("* errorMessage:         " + errorMessage);
92 95
    harvestDocument.printOutput();
93 96
  }
94 97

  
src/edu/ucsb/nceas/metacat/harvesterClient/Harvester.java
184 184
  boolean connectToMetacat () {
185 185
    return connectToMetacat;
186 186
  }
187
  
188

  
189
  /**
190
   * Normalizes text prior to insertion into the HARVEST_LOG or
191
   * HARVEST_DETAIL_LOG tables. In particular, replaces the single quote
192
   * character with the double quote character. This prevents SQL errors
193
   * involving words that contain single quotes. Also removes \n and \r
194
   * characters from the text.
195
   * 
196
   * @param text  the original string
197
   * @return      a string containing the normalized text
198
   */
199
  String dequoteText(String text) {
200
    char c;
201
    StringBuffer stringBuffer = new StringBuffer();
187 202
    
203
    for (int i = 0; i < text.length(); i++) {
204
      c = text.charAt(i);
205
      switch (c) {
206
        case '\'':
207
          stringBuffer.append('\"');
208
          break;
209
        case '\r':
210
        case '\n':
211
          break;
212
        default:
213
          stringBuffer.append(c);
214
          break;
215
      }
216
    }
217
    
218
    return stringBuffer.toString();
219
  }
188 220

  
221

  
189 222
  /**
190 223
   * Gets the current value of the detailLogID for storage as a primary key in
191 224
   * the DETAIL_LOG_ID field of the HARVEST_DETAIL_LOG table.
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestDocument.java
12 12
import java.io.StringReader;
13 13
import java.net.MalformedURLException;
14 14
import java.net.URL;
15
import java.sql.ResultSet;
16
import java.sql.SQLException;
17
import java.sql.Statement;
15 18

  
16 19
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
17 20
import edu.ucsb.nceas.metacat.client.Metacat;
......
27 30
 * @author  costa
28 31
 */
29 32
public class HarvestDocument {
30
  
31
  private String documentName;  
33

  
34
   
35
  private String docid;                      // scope + identifier
36
  private String docidFull;                  // scope + identifier + revision
32 37
  String documentType;
33 38
  String documentURL;
34 39
  private Harvester harvester;
......
67 72
    this.identifier = identifier;
68 73
    this.revision = revision;
69 74
    
70
    this.documentName = scope + "." + identifier;
75
    this.docid = scope + "." + identifier;
76
    this.docidFull = this.docid + "." + revision;
71 77
  }
72 78

  
73 79

  
......
89 95
      inputStreamReader = new InputStreamReader(inputStream);
90 96
      documentString = IOUtil.getAsString(inputStreamReader, true);
91 97
      stringReader = new StringReader(documentString);
92
      harvester.addLogEntry(0, "", "GetDocSuccess", 
93
                            harvestSiteSchedule.siteScheduleID, null, "");
98
      harvester.addLogEntry(0,
99
                            "Retrieved: " + documentURL, 
100
                            "GetDocSuccess", 
101
                            harvestSiteSchedule.siteScheduleID, 
102
                            null, 
103
                            "");
94 104
    }
95 105
    catch (MalformedURLException e) {
96 106
      harvester.addLogEntry(1, "MalformedURLException", "GetDocError", 
......
116 126
   */
117 127
  public void harvestDocument() {
118 128
    int highestRevision;
129
    boolean insert = false;
119 130
    String metacatReturnString;
120 131
    StringReader stringReader;
132
    boolean update = false;
121 133

  
122 134
    /* If metacat already has this document, determine the highest revision in
123 135
     * metacat and report it to the user; else, insert or delete the document 
124 136
     * into metacat.
125 137
     */
126
    if (metacatHasDocument()) {
127
      highestRevision = metacatHighestRevision();
138
    highestRevision = metacatHighestRevision();
139

  
140
    if (highestRevision == -1) {
141
      insert = true;
142
    }
143
    else if (revision > highestRevision) {
144
      update = true;
145
    }
146
    else {
128 147
      harvester.addLogEntry(0, 
129
                            "Metacat has document: " + documentName +
130
                            ", highest revision: " + highestRevision, 
148
                            "Attempting to update " + docid + " to revision " + 
149
                            revision + ". Metacat has document revision " +
150
                            highestRevision + ".", 
131 151
                            "MetacatHasDoc", 
132
                            harvestSiteSchedule.siteScheduleID, null, "");
152
                            harvestSiteSchedule.siteScheduleID, 
153
                            null, 
154
                            "");
133 155
    }
134
    else {
156
    
157
    if (insert || update) {
135 158
      stringReader = getSiteDocument();
136 159
      if (stringReader != null) {
137
        if (parseDocument()) {
138
          putMetacatDocument(stringReader);
160
        if (validateDocument()) {
161
          putMetacatDocument(insert, update, stringReader);
139 162
        }
140 163
      }
141 164
    }
......
156 179
                                Exception e
157 180
                               ) {
158 181
    if (insert) {
159
      harvester.addLogEntry(1, metacatReturnString, "InsertDocError", 
160
                                harvestSiteSchedule.siteScheduleID, 
161
                                this, exceptionName + ": " + e.getMessage());
182
      harvester.addLogEntry(1, 
183
                            metacatReturnString,
184
                            "InsertDocError",
185
                            harvestSiteSchedule.siteScheduleID,
186
                            this,
187
                            exceptionName + ": " + e.getMessage());
162 188
    }
163 189
    else {
164
      harvester.addLogEntry(1, metacatReturnString, "UpdateDocError", 
165
                                harvestSiteSchedule.siteScheduleID, 
166
                                this, exceptionName + ": " + e.getMessage());
190
      harvester.addLogEntry(1, 
191
                            metacatReturnString,
192
                            "UpdateDocError",
193
                            harvestSiteSchedule.siteScheduleID,
194
                            this,
195
                            exceptionName + ": " + e.getMessage());
167 196
    }
168 197
  }
169 198
  
170 199

  
171 200
  /**
172
   * Boolean to determine whether Metacat already has this document.
173
   * 
174
   * @return  true if Metacat has the document, otherwise false
175
   */
176
  private boolean metacatHasDocument() {
177
    boolean     hasDocument = false;
178

  
179
    return hasDocument;
180
  }
181
    
182

  
183
  /**
184 201
   * Determines the highest revision that Metacat has for this document.
185 202
   * 
186
   * @return  int representing the highest revision for this document in Metacat
203
   * @return  int representing the highest revision for this document in
204
   *          Metacat. Returns -1 if Metacat does not currently hold the
205
   *          document.
187 206
   */
188 207
  private int metacatHighestRevision() {
189
    int         highestRevision = 0;
190
        
191
    return highestRevision;
192
  }
193
  
194
  
195
  /**
196
   * Parse the document to determine whether it is valid EML prior to inserting
197
   * or updating it to Metacat. This is QA/QC measure. Currently unimplemented.
198
   * 
199
   * @return  true if the document is valid EML, otherwise false
200
   */
201
  private boolean parseDocument () {
202
    boolean success = true;
208
    int         highestRevision = -1;
209
		String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " +
210
                   "'" + docid + "'";
211
		Statement stmt;
203 212
    
204
    if (success) {
205
      harvester.addLogEntry(0, "", "ValidateDocSuccess", 
206
                            harvestSiteSchedule.siteScheduleID, null, "");
213
		try {
214
			stmt = harvester.conn.createStatement();							
215
			ResultSet rs = stmt.executeQuery(query);
216
	
217
			while (rs.next()) {
218
				highestRevision = rs.getInt("REV");
219
			}
220
	
221
			stmt.close();	
222
		}
223
    catch(SQLException e) {
224
			System.out.println("SQLException: " + e.getMessage());
207 225
    }
208
    else {
209
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
210
                            harvestSiteSchedule.siteScheduleID, this, "");
211
    }
212
    
213
    return success;
226

  
227
    return highestRevision;
214 228
  }
215 229
  
216 230
  
......
218 232
   * Print the data fields and values in this HarvestDocument object.
219 233
   */
220 234
  void printOutput() {
221
    System.out.println("scope:                " + scope);
222
    System.out.println("identifier:           " + identifier);
223
    System.out.println("revision:             " + revision);
224
    System.out.println("documentType:         " + documentType);
225
    System.out.println("documentURL:          " + documentURL);
226
    System.out.println("documentName:         " + documentName);
235
    System.out.println("* scope:                " + scope);
236
    System.out.println("* identifier:           " + identifier);
237
    System.out.println("* revision:             " + revision);
238
    System.out.println("* documentType:         " + documentType);
239
    System.out.println("* documentURL:          " + documentURL);
227 240
  }
228 241
 
229 242
 
230 243
  /**
231 244
   * Insert or update this document to Metacat. If revision equals 1, do an
232 245
   * insert; otherwise, do an update.
246
   * 
247
   * @param insert       true if this is an insert operation
248
   * @param update       true if this is an update operation
249
   * @param stringReader the StringReader object holding the document text
233 250
   */
234
  private void putMetacatDocument(StringReader stringReader) {
235
    String docid = scope + "." + identifier + "." + revision;
236
    boolean insert = (revision == 1);
251
  private void putMetacatDocument(boolean insert,
252
                                  boolean update, 
253
                                  StringReader stringReader) {
237 254
    Metacat metacat = harvester.metacat;
238 255
    String metacatReturnString = "";
239 256
    
240 257
    if (harvester.connectToMetacat()) {
241 258
      try {
242 259
        if (insert) {
243
          metacatReturnString = metacat.insert(docid, stringReader, null);
244
          harvester.addLogEntry(0, docid + " : " + metacatReturnString, 
260
          metacatReturnString = metacat.insert(docidFull, stringReader, null);
261
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
245 262
                                "InsertDocSuccess", 
246 263
                                harvestSiteSchedule.siteScheduleID, 
247 264
                                null, "");
248 265
        }
249
        else {
250
          metacatReturnString = metacat.update(docid, stringReader, null);
251
          harvester.addLogEntry(0, docid + " : " + metacatReturnString, 
266
        else if (update) {
267
          metacatReturnString = metacat.update(docidFull, stringReader, null);
268
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
252 269
                                "UpdateDocSuccess", 
253 270
                                harvestSiteSchedule.siteScheduleID, 
254 271
                                null, "");
......
270 287
      }
271 288
    }
272 289
  }
290
  
291
  
292
  /**
293
   * Validate the document to determine whether it is valid EML prior to 
294
   * inserting or updating it to Metacat. This is QA/QC measure. 
295
   * Not yet implemented.
296
   * 
297
   * @return  true if the document is valid EML, otherwise false
298
   */
299
  private boolean validateDocument () {
300
    boolean success = true;
301
    
302
    /*if (success) {
303
      harvester.addLogEntry(0, 
304
                            "Validated: " + documentURL, 
305
                            "ValidateDocSuccess", 
306
                            harvestSiteSchedule.siteScheduleID, 
307
                            null, 
308
                            "");
309
    }
310
    else {
311
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
312
                            harvestSiteSchedule.siteScheduleID, this, "");
313
    }*/
314
    
315
    return success;
316
  }
317
  
273 318
}
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestLog.java
26 26
  private int harvestLogID;
27 27
  private Date harvestDate;
28 28
  private int status;
29
  private final String marker =
30
"*****************************************************************************";
29 31
  private String message;
30 32
  private String harvestOperationCode;
31 33
  private int siteScheduleID;
......
152 154
   * this HarvestLog object. Not yet implemented.
153 155
   */
154 156
  void dbInsertHarvestLogEntry() {
157
    String dequotedMessage = harvester.dequoteText(message);
155 158
    String insertString;
156 159
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd-MMM-yyyy");
157 160
		Statement stmt;
......
163 166
                   harvestLogID + ", " +
164 167
                   "'" + simpleDateFormat.format(harvestDate) + "', " +
165 168
                   status + ", " +
166
                   "'" + timestamp + ": " + message + "', " +
169
                   "'" + timestamp + ": " + dequotedMessage + "', " +
167 170
                   "'" + harvestOperationCode + "', " +
168 171
                   siteScheduleID +
169 172
                   ")";
......
264 267
   */
265 268
  void printOutput() {
266 269
    System.out.println("");
267
    System.out.println("harvestLogID:         " + harvestLogID);
268
    System.out.println("harvestDate:          " + harvestDate);
269
    System.out.println("status:               " + status);
270
    System.out.println("message:              " + message);
271
    System.out.println("harvestOperationCode: " + harvestOperationCode);
272
    
273
    if (siteScheduleID != 0) {
274
      harvester.printHarvestSiteSchedule(siteScheduleID);
270
    System.out.println(marker);
271
    System.out.println("*");
272
    System.out.println("* harvestLogID:         " + harvestLogID);
273
    System.out.println("* harvestDate:          " + harvestDate);
274
    System.out.println("* status:               " + status);
275
    System.out.println("* message:              " + message);
276
    System.out.println("* harvestOperationCode: " + harvestOperationCode);
277

  
278
    if (harvestOperationCode.equals("GetDocListSuccess") ||
279
        harvestOperationCode.equals("GetDocListError")) {
280
      if (siteScheduleID != 0) {
281
        harvester.printHarvestSiteSchedule(siteScheduleID);
282
      }
275 283
    }
276 284
    
277 285
    if (harvestDetailLog != null) {
278 286
      harvestDetailLog.printOutput();
279 287
    }
288

  
289
    System.out.println("*");
290
    System.out.println(marker);
280 291
  }
281 292

  
282 293
}
src/edu/ucsb/nceas/metacat/harvesterClient/HarvestSiteSchedule.java
165 165
    boolean dueForHarvest = false;
166 166
    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
167 167
    Date now = new Date();
168
    Date dlh;                          // Date of last harvest
169 168
    Date dnh;                          // Date of next harvest
170 169
    long currentTime = now.getTime();  // Current time in milliseconds
171 170
    long timeNextHarvest = 0;
172 171
    
173 172
    try {
174
      dlh = dateFormat.parse(dateLastHarvest);
175
      timeNextHarvest = dlh.getTime() + delta;
176
      dnh = new Date(timeNextHarvest);
173
      dnh = dateFormat.parse(dateNextHarvest);
174
      timeNextHarvest = dnh.getTime();
177 175
      
178 176
      if (timeNextHarvest < currentTime) {
179 177
        dueForHarvest = true;
......
200 198
   */
201 199
  public void harvestDocumentList() {
202 200
    HarvestDocument harvestDocument;
201
    boolean success;
203 202
    
204 203
    if (dueForHarvest()) {
205 204
      try {
206
        parseDocumentList();
207
        metacatLogin();
205
        success = parseDocumentList();
206

  
207
        /* If the document list was validated, then proceed with harvesting
208
         * the documents
209
         */
210
        if (success) {
211
          metacatLogin();
208 212
        
209
        for (int i = 0; i < harvestDocumentList.size(); i++) {
210
          harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
213
          for (int i = 0; i < harvestDocumentList.size(); i++) {
214
            harvestDocument = (HarvestDocument) harvestDocumentList.get(i);
211 215
          
212
          if (harvestDocument != null) {
213
            harvestDocument.harvestDocument();
216
            if (harvestDocument != null) {
217
              harvestDocument.harvestDocument();
218
            }
214 219
          }
220

  
221
          metacatLogout();      
222
          dbUpdateHarvestSiteSchedule();  // Update the schedule
215 223
        }
216

  
217
        metacatLogout();      
218
        dbUpdateHarvestSiteSchedule();
219 224
      }
220 225
      catch (ParserConfigurationException e) {
221 226
        System.out.println("ParserConfigurationException: " + e.getMessage());
......
274 279

  
275 280
  /**
276 281
   * Parse the site document list to find out which documents to harvest.
282
   * 
283
   * @return  true if successful, otherwise false
277 284
   */
278
  private void parseDocumentList() 
285
  private boolean parseDocumentList() 
279 286
          throws ParserConfigurationException {
280 287
    DocumentListHandler documentListHandler = new DocumentListHandler();
281 288
    InputStream inputStream;
282 289
    InputStreamReader inputStreamReader;
283 290
    String schemaLocation = ".";
291
    boolean success = false;
284 292
    URL url;
285 293

  
286 294
    try {
......
292 300
      documentListHandler.runParser(inputStreamReader, schemaLocation);
293 301
      harvester.addLogEntry(0, "", "ValidateDocListSuccess", 
294 302
                            siteScheduleID, null, "");
303
      success = true;
295 304
    }
296 305
    catch (MalformedURLException e){
297 306
      harvester.addLogEntry(1, "MalformedURLException: " + e.getMessage(), 
......
313 322
      harvester.addLogEntry(1, "IOException: " + e.getMessage(), 
314 323
                            "GetDocListError", siteScheduleID, null, "");
315 324
    }
325
    
326
    return success;
316 327
  }
317 328

  
318 329

  
......
320 331
   * Prints the data that is stored in this HarvestSiteSchedule object.
321 332
   */
322 333
  void printOutput() {
323
    System.out.println("siteScheduleID:       " + siteScheduleID);
324
    System.out.println("documentListURL:      " + documentListURL);
325
    System.out.println("ldapDN:               " + ldapDN);
326
    System.out.println("dateNextHarvest:      " + dateNextHarvest);
327
    System.out.println("dateLastHarvest:      " + dateLastHarvest);
328
    System.out.println("updateFrequency:      " + updateFrequency);
329
    System.out.println("unit:                 " + unit);
330
    System.out.println("contactEmail:         " + contactEmail);
334
    System.out.println("* siteScheduleID:       " + siteScheduleID);
335
    System.out.println("* documentListURL:      " + documentListURL);
336
    System.out.println("* ldapDN:               " + ldapDN);
337
    System.out.println("* dateNextHarvest:      " + dateNextHarvest);
338
    System.out.println("* dateLastHarvest:      " + dateLastHarvest);
339
    System.out.println("* updateFrequency:      " + updateFrequency);
340
    System.out.println("* unit:                 " + unit);
341
    System.out.println("* contactEmail:         " + contactEmail);
331 342
  }
332 343
  
333 344

  
......
349 360
  
350 361
    public String scope;
351 362
    public int identifier;
363
    public String identifierString;
364
    public String documentType;
352 365
    public int revision;
353
    public String documentType;
366
    public String revisionString;
354 367
    public String documentURL;
355 368
    private String currentQname;
356 369
    public final static String DEFAULT_PARSER = 
......
373 386
 
374 387
      if (length > 0) {           
375 388
        if (currentQname.equals("scope")) {
376
          scope = s;
389
          scope += s;
377 390
        }
378 391
        else if (currentQname.equals("identifier")) {
379
          identifier = Integer.parseInt(s);
392
          identifierString += s;
380 393
        }
381 394
        else if (currentQname.equals("revision")) {
382
          revision = Integer.parseInt(s);
395
          revisionString += s;
383 396
        }
384 397
        else if (currentQname.equals("documentType")) {
385
          documentType = s;
398
          documentType += s;
386 399
        }
387 400
        else if (currentQname.equals("documentURL")) {
388
          documentURL = s;
401
          documentURL += s;
389 402
        }
390
        
391
        currentQname = "";
392 403
      }
393 404
    }
394 405

  
......
416 427
      
417 428
      HarvestDocument harvestDocument;
418 429
      
419
      if (qname.equals("document")) {
430
      if (qname.equals("identifier")) {
431
        identifier = Integer.parseInt(identifierString);
432
      }
433
      else if (qname.equals("revision")) {
434
        revision = Integer.parseInt(revisionString);
435
      }
436
      else if (qname.equals("document")) {
420 437
        harvestDocument = new HarvestDocument(
421 438
                                              harvester,
422 439
                                              HarvestSiteSchedule.this,
......
428 445
                                             );
429 446
        harvestDocumentList.add(harvestDocument);
430 447
      }
448

  
449
      currentQname = "";
431 450
    }
432 451

  
433 452

  
......
501 520
                             Attributes attributes) {
502 521
      
503 522
      currentQname = qname;
523

  
524
      if (qname.equals("scope")) {
525
        scope = "";
526
      }
527
      else if (qname.equals("identifier")) {
528
        identifierString = "";
529
      }
530
      else if (qname.equals("revision")) {
531
        revisionString = "";
532
      }
533
      else if (qname.equals("documentType")) {
534
        documentType = "";
535
      }
536
      else if (qname.equals("documentURL")) {
537
        documentURL = "";
538
      }
504 539
    }
505 540
  }
506 541
}

Also available in: Unified diff