Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2004-05-03 15:59:10 -0700 (Mon, 03 May 2004) $'
8
 * '$Revision: 2155 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import java.io.InputStream;
28
import java.io.InputStreamReader;
29
import java.io.IOException;
30
import java.io.PrintStream;
31
import java.io.StringReader;
32
import java.net.MalformedURLException;
33
import java.net.URL;
34
import java.sql.Connection;
35
import java.sql.ResultSet;
36
import java.sql.SQLException;
37
import java.sql.Statement;
38

    
39
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
40
import edu.ucsb.nceas.metacat.client.Metacat;
41
import edu.ucsb.nceas.metacat.client.MetacatException;
42
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
43
import edu.ucsb.nceas.utilities.IOUtil;
44

    
45

    
46
/**
47
 * HarvestDocument manages operations and data for a single document to be
48
 * harvested.
49
 * 
50
 * @author  costa
51
 */
52
public class HarvestDocument {
53

    
54
   
55
  private String docid;                      // scope + identifier
56
  private String docidFull;                  // scope + identifier + revision
57
  String documentType;
58
  String documentURL;
59
  private Harvester harvester;
60
  private HarvestSiteSchedule harvestSiteSchedule;
61
  int identifier;
62
  int revision;
63
  String scope;
64

    
65
  /* These booleans keep track of status information. They are used when
66
   * generating email reports.
67
   */
68
  boolean accessError = false;
69
  boolean inserted = false;
70
  boolean metacatHasIt = false;
71
  boolean updated = false;
72
  boolean uploadError = false;
73
    
74

    
75
  /**
76
   * Creates a new instance of HarvestDocument. Initialized with the data
77
   * that was read from a single <document> element in site document list.
78
   * 
79
   * @param harvester            the parent Harvester object
80
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
81
   * @param scope                the value of the <scope> element
82
   * @param identifier           the value of the <identifier> element
83
   * @param revision             the value of the <revision> element
84
   * @param documentType         the value of the <documentType> element
85
   * @param documentURL          the value of the <documentURL> element
86
   */
87
  public HarvestDocument (
88
                          Harvester harvester,
89
                          HarvestSiteSchedule harvestSiteSchedule,
90
                          String scope,
91
                          int identifier,
92
                          int revision,
93
                          String documentType,
94
                          String documentURL
95
                        ) {
96
    this.harvester = harvester;
97
    this.harvestSiteSchedule = harvestSiteSchedule;
98
    this.documentType = documentType;
99
    this.documentURL = documentURL;
100
    this.scope = scope;
101
    this.identifier = identifier;
102
    this.revision = revision;
103
    
104
    this.docid = scope + "." + identifier;
105
    this.docidFull = this.docid + "." + revision;
106
  }
107

    
108

    
109
  /**
110
   * Retrieve the document from the site using its <documentURL> value.
111
   * 
112
   * @return   A StringReader containing the document string.
113
   */
114
  public StringReader getSiteDocument() {
115
    String documentString;
116
    InputStream inputStream;
117
    InputStreamReader inputStreamReader;
118
    StringReader stringReader = null;
119
    URL url;
120
    
121
    try {
122
      url = new URL(documentURL);
123
      inputStream = url.openStream();
124
      inputStreamReader = new InputStreamReader(inputStream);
125
      documentString = IOUtil.getAsString(inputStreamReader, true);
126
      stringReader = new StringReader(documentString);
127
      harvester.addLogEntry(0,
128
                            "Retrieved: " + documentURL, 
129
                            "GetDocSuccess", 
130
                            harvestSiteSchedule.siteScheduleID, 
131
                            null, 
132
                            "");
133
    }
134
    catch (MalformedURLException e) {
135
      accessError = true;
136
      harvester.addLogEntry(1, "MalformedURLException", "GetDocError", 
137
                            harvestSiteSchedule.siteScheduleID, this, 
138
                            "MalformedURLException: " + e.getMessage());
139
    }
140
    catch (IOException e) {
141
      accessError = true;
142
      harvester.addLogEntry(1, "IOException", "GetDocError", 
143
                            harvestSiteSchedule.siteScheduleID, this, 
144
                            "IOException: " + e.getMessage());
145
    }
146
    
147
    return stringReader;
148
  }
149
    
150

    
151
  /**
152
   * Harvest the document from the site. Unless Metacat already has the
153
   * document, retrieve the document from the site and put (insert or
154
   * update) it to Metacat. If Metacat already has the document, determine
155
   * the highest revision stored in Metacat so that this can be reported
156
   * back to the user.
157
   */
158
  public void harvestDocument() {
159
    int highestRevision;
160
    boolean insert = false;
161
    String metacatReturnString;
162
    StringReader stringReader;
163
    boolean update = false;
164

    
165
    /* If metacat already has this document, determine the highest revision in
166
     * metacat and report it to the user; else, insert or delete the document 
167
     * into metacat.
168
     */
169
    highestRevision = metacatHighestRevision();
170

    
171
    if (highestRevision == -1) {
172
      insert = true;
173
    }
174
    else if (revision > highestRevision) {
175
      update = true;
176
    }
177
    else {
178
      metacatHasIt = true;
179
      harvester.addLogEntry(0, 
180
                            "Attempting to update " + docid + " to revision " + 
181
                            revision + ". Metacat has document revision " +
182
                            highestRevision + ".", 
183
                            "MetacatHasDoc", 
184
                            harvestSiteSchedule.siteScheduleID, 
185
                            null, 
186
                            "");
187
    }
188
    
189
    if (insert || update) {
190
      stringReader = getSiteDocument();
191
      if (stringReader != null) {
192
        if (validateDocument()) {
193
          putMetacatDocument(insert, update, stringReader);
194
        }
195
      }
196
    }
197
  }
198
  
199

    
200
  /**
201
   * Logs a metacat document error to the harvest detail log. 
202
   *
203
   * @param insert               true if insert operation, false is update
204
   * @param metacatReturnString  string returned from the insert or update
205
   * @param exceptionName        name of the exception class
206
   * @param e                    the exception object
207
   */
208
  private void logMetacatError (boolean insert, 
209
                                String metacatReturnString,
210
                                String exceptionName,
211
                                Exception e
212
                               ) {
213
    uploadError = true;
214

    
215
    if (insert) {
216
      harvester.addLogEntry(1, 
217
                            metacatReturnString,
218
                            "InsertDocError",
219
                            harvestSiteSchedule.siteScheduleID,
220
                            this,
221
                            exceptionName + ": " + e.getMessage());
222
    }
223
    else {
224
      harvester.addLogEntry(1, 
225
                            metacatReturnString,
226
                            "UpdateDocError",
227
                            harvestSiteSchedule.siteScheduleID,
228
                            this,
229
                            exceptionName + ": " + e.getMessage());
230
    }
231
  }
232
  
233

    
234
  /**
235
   * Determines the highest revision that Metacat has for this document.
236
   * 
237
   * @return  int representing the highest revision for this document in
238
   *          Metacat. Returns -1 if Metacat does not currently hold the
239
   *          document.
240
   */
241
  public int metacatHighestRevision() {
242
    Connection conn = harvester.getConnection();
243
    int         highestRevision = -1;
244
		String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " +
245
                   "'" + docid + "'";
246
		Statement stmt;
247
    
248
		try {
249
			stmt = conn.createStatement();							
250
			ResultSet rs = stmt.executeQuery(query);
251
	
252
			while (rs.next()) {
253
				highestRevision = rs.getInt("REV");
254
			}
255
	
256
			stmt.close();	
257
		}
258
    catch(SQLException e) {
259
			System.out.println("SQLException: " + e.getMessage());
260
    }
261

    
262
    return highestRevision;
263
  }
264
  
265
  
266
  /**
267
   * Print the data fields and values in this HarvestDocument object.
268
   * 
269
   * @param out   the PrintStream to write to
270
   */
271
  public void printOutput(PrintStream out) {
272
    out.println("* scope:                " + scope);
273
    out.println("* identifier:           " + identifier);
274
    out.println("* revision:             " + revision);
275
    out.println("* documentType:         " + documentType);
276
    out.println("* documentURL:          " + documentURL);
277
  }
278
 
279
 
280
  /**
281
   * Print the document URL following by its scope.identifier.revision.
282
   * Used for report generation.
283
   * 
284
   * @param out   the PrintStream to write to
285
   */
286
  public void prettyPrint(PrintStream out) {
287
    out.println("*   " + docidFull + "  (" + documentURL + ")");
288
  }
289
 
290
 
291
  /**
292
   * Insert or update this document to Metacat. If revision equals 1, do an
293
   * insert; otherwise, do an update.
294
   * 
295
   * @param insert       true if this is an insert operation
296
   * @param update       true if this is an update operation
297
   * @param stringReader the StringReader object holding the document text
298
   */
299
  private void putMetacatDocument(boolean insert,
300
                                  boolean update, 
301
                                  StringReader stringReader) {
302
    Metacat metacat = harvester.metacat;
303
    String metacatReturnString = "";
304
    
305
    if (harvester.connectToMetacat()) {
306
      try {
307
        if (insert) {
308
          metacatReturnString = metacat.insert(docidFull, stringReader, null);
309
          inserted = true;
310
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
311
                                "InsertDocSuccess", 
312
                                harvestSiteSchedule.siteScheduleID, 
313
                                null, "");
314
        }
315
        else if (update) {
316
          metacatReturnString = metacat.update(docidFull, stringReader, null);
317
          updated = true;
318
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
319
                                "UpdateDocSuccess", 
320
                                harvestSiteSchedule.siteScheduleID, 
321
                                null, "");
322
        }
323
      }
324
      catch (MetacatInaccessibleException e) {
325
        logMetacatError(insert, metacatReturnString, 
326
                        "MetacatInaccessibleException", e);
327
      }
328
      catch (InsufficientKarmaException e) {
329
        logMetacatError(insert, metacatReturnString, 
330
                        "InsufficientKarmaException", e);
331
      }
332
      catch (MetacatException e) {
333
        logMetacatError(insert, metacatReturnString, "MetacatException", e);
334
      }
335
      catch (IOException e) {
336
        logMetacatError(insert, metacatReturnString, "IOException", e);
337
      }
338
    }
339
  }
340
  
341
  
342
  /**
343
   * Validate the document to determine whether it is valid EML prior to 
344
   * inserting or updating it to Metacat. This is QA/QC measure. 
345
   * Not yet implemented.
346
   * 
347
   * @return  true if the document is valid EML, otherwise false
348
   */
349
  private boolean validateDocument () {
350
    boolean success = true;
351
    
352
    /*if (success) {
353
      harvester.addLogEntry(0, 
354
                            "Validated: " + documentURL, 
355
                            "ValidateDocSuccess", 
356
                            harvestSiteSchedule.siteScheduleID, 
357
                            null, 
358
                            "");
359
    }
360
    else {
361
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
362
                            harvestSiteSchedule.siteScheduleID, this, "");
363
    }*/
364
    
365
    return success;
366
  }
367
  
368
}
(2-2/10)