Project

General

Profile

1
/*
2
 * HarvestDocument.java
3
 *
4
 * Created on January 14, 2004, 4:37 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.InputStream;
10
import java.io.InputStreamReader;
11
import java.io.IOException;
12
import java.io.StringReader;
13
import java.net.MalformedURLException;
14
import java.net.URL;
15
import java.sql.ResultSet;
16
import java.sql.SQLException;
17
import java.sql.Statement;
18

    
19
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
20
import edu.ucsb.nceas.metacat.client.Metacat;
21
import edu.ucsb.nceas.metacat.client.MetacatException;
22
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
23
import edu.ucsb.nceas.utilities.IOUtil;
24

    
25

    
26
/**
27
 * HarvestDocument manages operations and data for a single document to be
28
 * harvested.
29
 * 
30
 * @author  costa
31
 */
32
public class HarvestDocument {
33

    
34
   
35
  private String docid;                      // scope + identifier
36
  private String docidFull;                  // scope + identifier + revision
37
  String documentType;
38
  String documentURL;
39
  private Harvester harvester;
40
  private HarvestSiteSchedule harvestSiteSchedule;
41
  int identifier;
42
  int revision;
43
  String scope;
44
    
45

    
46
  /**
47
   * Creates a new instance of HarvestDocument. Initialized with the data
48
   * that was read from a single <document> element in site document list.
49
   * 
50
   * @param harvester            the parent Harvester object
51
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
52
   * @param scope                the value of the <scope> element
53
   * @param identifier           the value of the <identifier> element
54
   * @param revision             the value of the <revision> element
55
   * @param documentType         the value of the <documentType> element
56
   * @param documentURL          the value of the <documentURL> element
57
   */
58
  public HarvestDocument(
59
                          Harvester harvester,
60
                          HarvestSiteSchedule harvestSiteSchedule,
61
                          String scope,
62
                          int identifier,
63
                          int revision,
64
                          String documentType,
65
                          String documentURL
66
                        ) {
67
    this.harvester = harvester;
68
    this.harvestSiteSchedule = harvestSiteSchedule;
69
    this.documentType = documentType;
70
    this.documentURL = documentURL;
71
    this.scope = scope;
72
    this.identifier = identifier;
73
    this.revision = revision;
74
    
75
    this.docid = scope + "." + identifier;
76
    this.docidFull = this.docid + "." + revision;
77
  }
78

    
79

    
80
  /**
81
   * Retrieve the document from the site using its <documentURL> value.
82
   * 
83
   * @return   A StringReader containing the document string.
84
   */
85
  private StringReader getSiteDocument() {
86
    String documentString;
87
    InputStream inputStream;
88
    InputStreamReader inputStreamReader;
89
    StringReader stringReader = null;
90
    URL url;
91
    
92
    try {
93
      url = new URL(documentURL);
94
      inputStream = url.openStream();
95
      inputStreamReader = new InputStreamReader(inputStream);
96
      documentString = IOUtil.getAsString(inputStreamReader, true);
97
      stringReader = new StringReader(documentString);
98
      harvester.addLogEntry(0,
99
                            "Retrieved: " + documentURL, 
100
                            "GetDocSuccess", 
101
                            harvestSiteSchedule.siteScheduleID, 
102
                            null, 
103
                            "");
104
    }
105
    catch (MalformedURLException e) {
106
      harvester.addLogEntry(1, "MalformedURLException", "GetDocError", 
107
                            harvestSiteSchedule.siteScheduleID, this, 
108
                            "MalformedURLException: " + e.getMessage());
109
    }
110
    catch (IOException e) {
111
      harvester.addLogEntry(1, "IOException", "GetDocError", 
112
                            harvestSiteSchedule.siteScheduleID, this, 
113
                            "IOException: " + e.getMessage());
114
    }
115
    
116
    return stringReader;
117
  }
118
    
119

    
120
  /**
121
   * Harvest the document from the site. Unless Metacat already has the
122
   * document, retrieve the document from the site and put (insert or
123
   * update) it to Metacat. If Metacat already has the document, determine
124
   * the highest revision stored in Metacat so that this can be reported
125
   * back to the user.
126
   */
127
  public void harvestDocument() {
128
    int highestRevision;
129
    boolean insert = false;
130
    String metacatReturnString;
131
    StringReader stringReader;
132
    boolean update = false;
133

    
134
    /* If metacat already has this document, determine the highest revision in
135
     * metacat and report it to the user; else, insert or delete the document 
136
     * into metacat.
137
     */
138
    highestRevision = metacatHighestRevision();
139

    
140
    if (highestRevision == -1) {
141
      insert = true;
142
    }
143
    else if (revision > highestRevision) {
144
      update = true;
145
    }
146
    else {
147
      harvester.addLogEntry(0, 
148
                            "Attempting to update " + docid + " to revision " + 
149
                            revision + ". Metacat has document revision " +
150
                            highestRevision + ".", 
151
                            "MetacatHasDoc", 
152
                            harvestSiteSchedule.siteScheduleID, 
153
                            null, 
154
                            "");
155
    }
156
    
157
    if (insert || update) {
158
      stringReader = getSiteDocument();
159
      if (stringReader != null) {
160
        if (validateDocument()) {
161
          putMetacatDocument(insert, update, stringReader);
162
        }
163
      }
164
    }
165
  }
166
  
167

    
168
  /**
169
   * Logs a metacat document error to the harvest detail log. 
170
   *
171
   * @param insert               true if insert operation, false is update
172
   * @param metacatReturnString  string returned from the insert or update
173
   * @param exceptionName        name of the exception class
174
   * @param e                    the exception object
175
   */
176
  private void logMetacatError (boolean insert, 
177
                                String metacatReturnString,
178
                                String exceptionName,
179
                                Exception e
180
                               ) {
181
    if (insert) {
182
      harvester.addLogEntry(1, 
183
                            metacatReturnString,
184
                            "InsertDocError",
185
                            harvestSiteSchedule.siteScheduleID,
186
                            this,
187
                            exceptionName + ": " + e.getMessage());
188
    }
189
    else {
190
      harvester.addLogEntry(1, 
191
                            metacatReturnString,
192
                            "UpdateDocError",
193
                            harvestSiteSchedule.siteScheduleID,
194
                            this,
195
                            exceptionName + ": " + e.getMessage());
196
    }
197
  }
198
  
199

    
200
  /**
201
   * Determines the highest revision that Metacat has for this document.
202
   * 
203
   * @return  int representing the highest revision for this document in
204
   *          Metacat. Returns -1 if Metacat does not currently hold the
205
   *          document.
206
   */
207
  private int metacatHighestRevision() {
208
    int         highestRevision = -1;
209
		String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " +
210
                   "'" + docid + "'";
211
		Statement stmt;
212
    
213
		try {
214
			stmt = harvester.conn.createStatement();							
215
			ResultSet rs = stmt.executeQuery(query);
216
	
217
			while (rs.next()) {
218
				highestRevision = rs.getInt("REV");
219
			}
220
	
221
			stmt.close();	
222
		}
223
    catch(SQLException e) {
224
			System.out.println("SQLException: " + e.getMessage());
225
    }
226

    
227
    return highestRevision;
228
  }
229
  
230
  
231
  /**
232
   * Print the data fields and values in this HarvestDocument object.
233
   */
234
  void printOutput() {
235
    System.out.println("* scope:                " + scope);
236
    System.out.println("* identifier:           " + identifier);
237
    System.out.println("* revision:             " + revision);
238
    System.out.println("* documentType:         " + documentType);
239
    System.out.println("* documentURL:          " + documentURL);
240
  }
241
 
242
 
243
  /**
244
   * Insert or update this document to Metacat. If revision equals 1, do an
245
   * insert; otherwise, do an update.
246
   * 
247
   * @param insert       true if this is an insert operation
248
   * @param update       true if this is an update operation
249
   * @param stringReader the StringReader object holding the document text
250
   */
251
  private void putMetacatDocument(boolean insert,
252
                                  boolean update, 
253
                                  StringReader stringReader) {
254
    Metacat metacat = harvester.metacat;
255
    String metacatReturnString = "";
256
    
257
    if (harvester.connectToMetacat()) {
258
      try {
259
        if (insert) {
260
          metacatReturnString = metacat.insert(docidFull, stringReader, null);
261
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
262
                                "InsertDocSuccess", 
263
                                harvestSiteSchedule.siteScheduleID, 
264
                                null, "");
265
        }
266
        else if (update) {
267
          metacatReturnString = metacat.update(docidFull, stringReader, null);
268
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
269
                                "UpdateDocSuccess", 
270
                                harvestSiteSchedule.siteScheduleID, 
271
                                null, "");
272
        }
273
      }
274
      catch (MetacatInaccessibleException e) {
275
        logMetacatError(insert, metacatReturnString, 
276
                        "MetacatInaccessibleException", e);
277
      }
278
      catch (InsufficientKarmaException e) {
279
        logMetacatError(insert, metacatReturnString, 
280
                        "InsufficientKarmaException", e);
281
      }
282
      catch (MetacatException e) {
283
        logMetacatError(insert, metacatReturnString, "MetacatException", e);
284
      }
285
      catch (IOException e) {
286
        logMetacatError(insert, metacatReturnString, "IOException", e);
287
      }
288
    }
289
  }
290
  
291
  
292
  /**
293
   * Validate the document to determine whether it is valid EML prior to 
294
   * inserting or updating it to Metacat. This is QA/QC measure. 
295
   * Not yet implemented.
296
   * 
297
   * @return  true if the document is valid EML, otherwise false
298
   */
299
  private boolean validateDocument () {
300
    boolean success = true;
301
    
302
    /*if (success) {
303
      harvester.addLogEntry(0, 
304
                            "Validated: " + documentURL, 
305
                            "ValidateDocSuccess", 
306
                            harvestSiteSchedule.siteScheduleID, 
307
                            null, 
308
                            "");
309
    }
310
    else {
311
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
312
                            harvestSiteSchedule.siteScheduleID, this, "");
313
    }*/
314
    
315
    return success;
316
  }
317
  
318
}
(2-2/7)