Project

General

Profile

1
/*
2
 * HarvestDocument.java
3
 *
4
 * Created on January 14, 2004, 4:37 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.InputStream;
10
import java.io.InputStreamReader;
11
import java.io.IOException;
12
import java.io.PrintStream;
13
import java.io.StringReader;
14
import java.net.MalformedURLException;
15
import java.net.URL;
16
import java.sql.ResultSet;
17
import java.sql.SQLException;
18
import java.sql.Statement;
19

    
20
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
21
import edu.ucsb.nceas.metacat.client.Metacat;
22
import edu.ucsb.nceas.metacat.client.MetacatException;
23
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
24
import edu.ucsb.nceas.utilities.IOUtil;
25

    
26

    
27
/**
28
 * HarvestDocument manages operations and data for a single document to be
29
 * harvested.
30
 * 
31
 * @author  costa
32
 */
33
public class HarvestDocument {
34

    
35
   
36
  private String docid;                      // scope + identifier
37
  private String docidFull;                  // scope + identifier + revision
38
  String documentType;
39
  String documentURL;
40
  private Harvester harvester;
41
  private HarvestSiteSchedule harvestSiteSchedule;
42
  int identifier;
43
  int revision;
44
  String scope;
45
    
46

    
47
  /**
48
   * Creates a new instance of HarvestDocument. Initialized with the data
49
   * that was read from a single <document> element in site document list.
50
   * 
51
   * @param harvester            the parent Harvester object
52
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
53
   * @param scope                the value of the <scope> element
54
   * @param identifier           the value of the <identifier> element
55
   * @param revision             the value of the <revision> element
56
   * @param documentType         the value of the <documentType> element
57
   * @param documentURL          the value of the <documentURL> element
58
   */
59
  public HarvestDocument(
60
                          Harvester harvester,
61
                          HarvestSiteSchedule harvestSiteSchedule,
62
                          String scope,
63
                          int identifier,
64
                          int revision,
65
                          String documentType,
66
                          String documentURL
67
                        ) {
68
    this.harvester = harvester;
69
    this.harvestSiteSchedule = harvestSiteSchedule;
70
    this.documentType = documentType;
71
    this.documentURL = documentURL;
72
    this.scope = scope;
73
    this.identifier = identifier;
74
    this.revision = revision;
75
    
76
    this.docid = scope + "." + identifier;
77
    this.docidFull = this.docid + "." + revision;
78
  }
79

    
80

    
81
  /**
82
   * Retrieve the document from the site using its <documentURL> value.
83
   * 
84
   * @return   A StringReader containing the document string.
85
   */
86
  private StringReader getSiteDocument() {
87
    String documentString;
88
    InputStream inputStream;
89
    InputStreamReader inputStreamReader;
90
    StringReader stringReader = null;
91
    URL url;
92
    
93
    try {
94
      url = new URL(documentURL);
95
      inputStream = url.openStream();
96
      inputStreamReader = new InputStreamReader(inputStream);
97
      documentString = IOUtil.getAsString(inputStreamReader, true);
98
      stringReader = new StringReader(documentString);
99
      harvester.addLogEntry(0,
100
                            "Retrieved: " + documentURL, 
101
                            "GetDocSuccess", 
102
                            harvestSiteSchedule.siteScheduleID, 
103
                            null, 
104
                            "");
105
    }
106
    catch (MalformedURLException e) {
107
      harvester.addLogEntry(1, "MalformedURLException", "GetDocError", 
108
                            harvestSiteSchedule.siteScheduleID, this, 
109
                            "MalformedURLException: " + e.getMessage());
110
    }
111
    catch (IOException e) {
112
      harvester.addLogEntry(1, "IOException", "GetDocError", 
113
                            harvestSiteSchedule.siteScheduleID, this, 
114
                            "IOException: " + e.getMessage());
115
    }
116
    
117
    return stringReader;
118
  }
119
    
120

    
121
  /**
122
   * Harvest the document from the site. Unless Metacat already has the
123
   * document, retrieve the document from the site and put (insert or
124
   * update) it to Metacat. If Metacat already has the document, determine
125
   * the highest revision stored in Metacat so that this can be reported
126
   * back to the user.
127
   */
128
  public void harvestDocument() {
129
    int highestRevision;
130
    boolean insert = false;
131
    String metacatReturnString;
132
    StringReader stringReader;
133
    boolean update = false;
134

    
135
    /* If metacat already has this document, determine the highest revision in
136
     * metacat and report it to the user; else, insert or delete the document 
137
     * into metacat.
138
     */
139
    highestRevision = metacatHighestRevision();
140

    
141
    if (highestRevision == -1) {
142
      insert = true;
143
    }
144
    else if (revision > highestRevision) {
145
      update = true;
146
    }
147
    else {
148
      harvester.addLogEntry(0, 
149
                            "Attempting to update " + docid + " to revision " + 
150
                            revision + ". Metacat has document revision " +
151
                            highestRevision + ".", 
152
                            "MetacatHasDoc", 
153
                            harvestSiteSchedule.siteScheduleID, 
154
                            null, 
155
                            "");
156
    }
157
    
158
    if (insert || update) {
159
      stringReader = getSiteDocument();
160
      if (stringReader != null) {
161
        if (validateDocument()) {
162
          putMetacatDocument(insert, update, stringReader);
163
        }
164
      }
165
    }
166
  }
167
  
168

    
169
  /**
170
   * Logs a metacat document error to the harvest detail log. 
171
   *
172
   * @param insert               true if insert operation, false is update
173
   * @param metacatReturnString  string returned from the insert or update
174
   * @param exceptionName        name of the exception class
175
   * @param e                    the exception object
176
   */
177
  private void logMetacatError (boolean insert, 
178
                                String metacatReturnString,
179
                                String exceptionName,
180
                                Exception e
181
                               ) {
182
    if (insert) {
183
      harvester.addLogEntry(1, 
184
                            metacatReturnString,
185
                            "InsertDocError",
186
                            harvestSiteSchedule.siteScheduleID,
187
                            this,
188
                            exceptionName + ": " + e.getMessage());
189
    }
190
    else {
191
      harvester.addLogEntry(1, 
192
                            metacatReturnString,
193
                            "UpdateDocError",
194
                            harvestSiteSchedule.siteScheduleID,
195
                            this,
196
                            exceptionName + ": " + e.getMessage());
197
    }
198
  }
199
  
200

    
201
  /**
202
   * Determines the highest revision that Metacat has for this document.
203
   * 
204
   * @return  int representing the highest revision for this document in
205
   *          Metacat. Returns -1 if Metacat does not currently hold the
206
   *          document.
207
   */
208
  private int metacatHighestRevision() {
209
    int         highestRevision = -1;
210
		String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " +
211
                   "'" + docid + "'";
212
		Statement stmt;
213
    
214
		try {
215
			stmt = harvester.conn.createStatement();							
216
			ResultSet rs = stmt.executeQuery(query);
217
	
218
			while (rs.next()) {
219
				highestRevision = rs.getInt("REV");
220
			}
221
	
222
			stmt.close();	
223
		}
224
    catch(SQLException e) {
225
			System.out.println("SQLException: " + e.getMessage());
226
    }
227

    
228
    return highestRevision;
229
  }
230
  
231
  
232
  /**
233
   * Print the data fields and values in this HarvestDocument object.
234
   * 
235
   * @param out   the PrintStream to write to
236
   */
237
  void printOutput(PrintStream out) {
238
    out.println("* scope:                " + scope);
239
    out.println("* identifier:           " + identifier);
240
    out.println("* revision:             " + revision);
241
    out.println("* documentType:         " + documentType);
242
    out.println("* documentURL:          " + documentURL);
243
  }
244
 
245
 
246
  /**
247
   * Insert or update this document to Metacat. If revision equals 1, do an
248
   * insert; otherwise, do an update.
249
   * 
250
   * @param insert       true if this is an insert operation
251
   * @param update       true if this is an update operation
252
   * @param stringReader the StringReader object holding the document text
253
   */
254
  private void putMetacatDocument(boolean insert,
255
                                  boolean update, 
256
                                  StringReader stringReader) {
257
    Metacat metacat = harvester.metacat;
258
    String metacatReturnString = "";
259
    
260
    if (harvester.connectToMetacat()) {
261
      try {
262
        if (insert) {
263
          metacatReturnString = metacat.insert(docidFull, stringReader, null);
264
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
265
                                "InsertDocSuccess", 
266
                                harvestSiteSchedule.siteScheduleID, 
267
                                null, "");
268
        }
269
        else if (update) {
270
          metacatReturnString = metacat.update(docidFull, stringReader, null);
271
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
272
                                "UpdateDocSuccess", 
273
                                harvestSiteSchedule.siteScheduleID, 
274
                                null, "");
275
        }
276
      }
277
      catch (MetacatInaccessibleException e) {
278
        logMetacatError(insert, metacatReturnString, 
279
                        "MetacatInaccessibleException", e);
280
      }
281
      catch (InsufficientKarmaException e) {
282
        logMetacatError(insert, metacatReturnString, 
283
                        "InsufficientKarmaException", e);
284
      }
285
      catch (MetacatException e) {
286
        logMetacatError(insert, metacatReturnString, "MetacatException", e);
287
      }
288
      catch (IOException e) {
289
        logMetacatError(insert, metacatReturnString, "IOException", e);
290
      }
291
    }
292
  }
293
  
294
  
295
  /**
296
   * Validate the document to determine whether it is valid EML prior to 
297
   * inserting or updating it to Metacat. This is QA/QC measure. 
298
   * Not yet implemented.
299
   * 
300
   * @return  true if the document is valid EML, otherwise false
301
   */
302
  private boolean validateDocument () {
303
    boolean success = true;
304
    
305
    /*if (success) {
306
      harvester.addLogEntry(0, 
307
                            "Validated: " + documentURL, 
308
                            "ValidateDocSuccess", 
309
                            harvestSiteSchedule.siteScheduleID, 
310
                            null, 
311
                            "");
312
    }
313
    else {
314
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
315
                            harvestSiteSchedule.siteScheduleID, this, "");
316
    }*/
317
    
318
    return success;
319
  }
320
  
321
}
(2-2/9)