Project

General

Profile

« Previous | Next » 

Revision 2036

Added by Duane Costa over 20 years ago

Additional Harvester development

View differences:

HarvestDocument.java
12 12
import java.io.StringReader;
13 13
import java.net.MalformedURLException;
14 14
import java.net.URL;
15
import java.sql.ResultSet;
16
import java.sql.SQLException;
17
import java.sql.Statement;
15 18

  
16 19
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
17 20
import edu.ucsb.nceas.metacat.client.Metacat;
......
27 30
 * @author  costa
28 31
 */
29 32
public class HarvestDocument {
30
  
31
  private String documentName;  
33

  
34
   
35
  private String docid;                      // scope + identifier
36
  private String docidFull;                  // scope + identifier + revision
32 37
  String documentType;
33 38
  String documentURL;
34 39
  private Harvester harvester;
......
67 72
    this.identifier = identifier;
68 73
    this.revision = revision;
69 74
    
70
    this.documentName = scope + "." + identifier;
75
    this.docid = scope + "." + identifier;
76
    this.docidFull = this.docid + "." + revision;
71 77
  }
72 78

  
73 79

  
......
89 95
      inputStreamReader = new InputStreamReader(inputStream);
90 96
      documentString = IOUtil.getAsString(inputStreamReader, true);
91 97
      stringReader = new StringReader(documentString);
92
      harvester.addLogEntry(0, "", "GetDocSuccess", 
93
                            harvestSiteSchedule.siteScheduleID, null, "");
98
      harvester.addLogEntry(0,
99
                            "Retrieved: " + documentURL, 
100
                            "GetDocSuccess", 
101
                            harvestSiteSchedule.siteScheduleID, 
102
                            null, 
103
                            "");
94 104
    }
95 105
    catch (MalformedURLException e) {
96 106
      harvester.addLogEntry(1, "MalformedURLException", "GetDocError", 
......
116 126
   */
117 127
  public void harvestDocument() {
118 128
    int highestRevision;
129
    boolean insert = false;
119 130
    String metacatReturnString;
120 131
    StringReader stringReader;
132
    boolean update = false;
121 133

  
122 134
    /* If metacat already has this document, determine the highest revision in
123 135
     * metacat and report it to the user; else, insert or delete the document 
124 136
     * into metacat.
125 137
     */
126
    if (metacatHasDocument()) {
127
      highestRevision = metacatHighestRevision();
138
    highestRevision = metacatHighestRevision();
139

  
140
    if (highestRevision == -1) {
141
      insert = true;
142
    }
143
    else if (revision > highestRevision) {
144
      update = true;
145
    }
146
    else {
128 147
      harvester.addLogEntry(0, 
129
                            "Metacat has document: " + documentName +
130
                            ", highest revision: " + highestRevision, 
148
                            "Attempting to update " + docid + " to revision " + 
149
                            revision + ". Metacat has document revision " +
150
                            highestRevision + ".", 
131 151
                            "MetacatHasDoc", 
132
                            harvestSiteSchedule.siteScheduleID, null, "");
152
                            harvestSiteSchedule.siteScheduleID, 
153
                            null, 
154
                            "");
133 155
    }
134
    else {
156
    
157
    if (insert || update) {
135 158
      stringReader = getSiteDocument();
136 159
      if (stringReader != null) {
137
        if (parseDocument()) {
138
          putMetacatDocument(stringReader);
160
        if (validateDocument()) {
161
          putMetacatDocument(insert, update, stringReader);
139 162
        }
140 163
      }
141 164
    }
......
156 179
                                Exception e
157 180
                               ) {
158 181
    if (insert) {
159
      harvester.addLogEntry(1, metacatReturnString, "InsertDocError", 
160
                                harvestSiteSchedule.siteScheduleID, 
161
                                this, exceptionName + ": " + e.getMessage());
182
      harvester.addLogEntry(1, 
183
                            metacatReturnString,
184
                            "InsertDocError",
185
                            harvestSiteSchedule.siteScheduleID,
186
                            this,
187
                            exceptionName + ": " + e.getMessage());
162 188
    }
163 189
    else {
164
      harvester.addLogEntry(1, metacatReturnString, "UpdateDocError", 
165
                                harvestSiteSchedule.siteScheduleID, 
166
                                this, exceptionName + ": " + e.getMessage());
190
      harvester.addLogEntry(1, 
191
                            metacatReturnString,
192
                            "UpdateDocError",
193
                            harvestSiteSchedule.siteScheduleID,
194
                            this,
195
                            exceptionName + ": " + e.getMessage());
167 196
    }
168 197
  }
169 198
  
170 199

  
171 200
  /**
172
   * Boolean to determine whether Metacat already has this document.
173
   * 
174
   * @return  true if Metacat has the document, otherwise false
175
   */
176
  private boolean metacatHasDocument() {
177
    boolean     hasDocument = false;
178

  
179
    return hasDocument;
180
  }
181
    
182

  
183
  /**
184 201
   * Determines the highest revision that Metacat has for this document.
185 202
   * 
186
   * @return  int representing the highest revision for this document in Metacat
203
   * @return  int representing the highest revision for this document in
204
   *          Metacat. Returns -1 if Metacat does not currently hold the
205
   *          document.
187 206
   */
188 207
  private int metacatHighestRevision() {
189
    int         highestRevision = 0;
190
        
191
    return highestRevision;
192
  }
193
  
194
  
195
  /**
196
   * Parse the document to determine whether it is valid EML prior to inserting
197
   * or updating it to Metacat. This is QA/QC measure. Currently unimplemented.
198
   * 
199
   * @return  true if the document is valid EML, otherwise false
200
   */
201
  private boolean parseDocument () {
202
    boolean success = true;
208
    int         highestRevision = -1;
209
		String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " +
210
                   "'" + docid + "'";
211
		Statement stmt;
203 212
    
204
    if (success) {
205
      harvester.addLogEntry(0, "", "ValidateDocSuccess", 
206
                            harvestSiteSchedule.siteScheduleID, null, "");
213
		try {
214
			stmt = harvester.conn.createStatement();							
215
			ResultSet rs = stmt.executeQuery(query);
216
	
217
			while (rs.next()) {
218
				highestRevision = rs.getInt("REV");
219
			}
220
	
221
			stmt.close();	
222
		}
223
    catch(SQLException e) {
224
			System.out.println("SQLException: " + e.getMessage());
207 225
    }
208
    else {
209
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
210
                            harvestSiteSchedule.siteScheduleID, this, "");
211
    }
212
    
213
    return success;
226

  
227
    return highestRevision;
214 228
  }
215 229
  
216 230
  
......
218 232
   * Print the data fields and values in this HarvestDocument object.
219 233
   */
220 234
  void printOutput() {
221
    System.out.println("scope:                " + scope);
222
    System.out.println("identifier:           " + identifier);
223
    System.out.println("revision:             " + revision);
224
    System.out.println("documentType:         " + documentType);
225
    System.out.println("documentURL:          " + documentURL);
226
    System.out.println("documentName:         " + documentName);
235
    System.out.println("* scope:                " + scope);
236
    System.out.println("* identifier:           " + identifier);
237
    System.out.println("* revision:             " + revision);
238
    System.out.println("* documentType:         " + documentType);
239
    System.out.println("* documentURL:          " + documentURL);
227 240
  }
228 241
 
229 242
 
230 243
  /**
231 244
   * Insert or update this document to Metacat. If revision equals 1, do an
232 245
   * insert; otherwise, do an update.
246
   * 
247
   * @param insert       true if this is an insert operation
248
   * @param update       true if this is an update operation
249
   * @param stringReader the StringReader object holding the document text
233 250
   */
234
  private void putMetacatDocument(StringReader stringReader) {
235
    String docid = scope + "." + identifier + "." + revision;
236
    boolean insert = (revision == 1);
251
  private void putMetacatDocument(boolean insert,
252
                                  boolean update, 
253
                                  StringReader stringReader) {
237 254
    Metacat metacat = harvester.metacat;
238 255
    String metacatReturnString = "";
239 256
    
240 257
    if (harvester.connectToMetacat()) {
241 258
      try {
242 259
        if (insert) {
243
          metacatReturnString = metacat.insert(docid, stringReader, null);
244
          harvester.addLogEntry(0, docid + " : " + metacatReturnString, 
260
          metacatReturnString = metacat.insert(docidFull, stringReader, null);
261
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
245 262
                                "InsertDocSuccess", 
246 263
                                harvestSiteSchedule.siteScheduleID, 
247 264
                                null, "");
248 265
        }
249
        else {
250
          metacatReturnString = metacat.update(docid, stringReader, null);
251
          harvester.addLogEntry(0, docid + " : " + metacatReturnString, 
266
        else if (update) {
267
          metacatReturnString = metacat.update(docidFull, stringReader, null);
268
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
252 269
                                "UpdateDocSuccess", 
253 270
                                harvestSiteSchedule.siteScheduleID, 
254 271
                                null, "");
......
270 287
      }
271 288
    }
272 289
  }
290
  
291
  
292
  /**
293
   * Validate the document to determine whether it is valid EML prior to 
294
   * inserting or updating it to Metacat. This is QA/QC measure. 
295
   * Not yet implemented.
296
   * 
297
   * @return  true if the document is valid EML, otherwise false
298
   */
299
  private boolean validateDocument () {
300
    boolean success = true;
301
    
302
    /*if (success) {
303
      harvester.addLogEntry(0, 
304
                            "Validated: " + documentURL, 
305
                            "ValidateDocSuccess", 
306
                            harvestSiteSchedule.siteScheduleID, 
307
                            null, 
308
                            "");
309
    }
310
    else {
311
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
312
                            harvestSiteSchedule.siteScheduleID, this, "");
313
    }*/
314
    
315
    return success;
316
  }
317
  
273 318
}

Also available in: Unified diff