Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: costa $'
7
 *     '$Date: 2009-12-21 12:10:29 -0800 (Mon, 21 Dec 2009) $'
8
 * '$Revision: 5169 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import java.io.InputStream;
28
import java.io.InputStreamReader;
29
import java.io.IOException;
30
import java.io.PrintStream;
31
import java.io.StringReader;
32
import java.net.MalformedURLException;
33
import java.net.URL;
34
import java.sql.Connection;
35
import java.sql.ResultSet;
36
import java.sql.SQLException;
37
import java.sql.Statement;
38

    
39
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
40
import edu.ucsb.nceas.metacat.client.Metacat;
41
import edu.ucsb.nceas.metacat.client.MetacatException;
42
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
43
import edu.ucsb.nceas.utilities.IOUtil;
44

    
45

    
46
/**
47
 * HarvestDocument manages operations and data for a single document to be
48
 * harvested.
49
 * 
50
 * @author  costa
51
 */
52
public class HarvestDocument {
53

    
54
   
55
  private String docid;                      // scope + identifier
56
  private String docidFull;                  // scope + identifier + revision
57
  String documentType;
58
  String documentURL;
59
  private Harvester harvester;
60
  private HarvestSiteSchedule harvestSiteSchedule;
61
  int identifier;
62
  int revision;
63
  String scope;
64

    
65
  /* These booleans keep track of status information. They are used when
66
   * generating email reports.
67
   */
68
  boolean accessError = false;
69
  boolean inserted = false;
70
  boolean metacatHasIt = false;
71
  boolean updated = false;
72
  boolean uploadError = false;
73
    
74

    
75
  /**
76
   * Creates a new instance of HarvestDocument. Initialized with the data
77
   * that was read from a single <document> element in site document list.
78
   * 
79
   * @param harvester            the parent Harvester object
80
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
81
   * @param scope                the value of the <scope> element
82
   * @param identifier           the value of the <identifier> element
83
   * @param revision             the value of the <revision> element
84
   * @param documentType         the value of the <documentType> element
85
   * @param documentURL          the value of the <documentURL> element
86
   */
87
  public HarvestDocument (
88
                          Harvester harvester,
89
                          HarvestSiteSchedule harvestSiteSchedule,
90
                          String scope,
91
                          int identifier,
92
                          int revision,
93
                          String documentType,
94
                          String documentURL
95
                        ) {
96
    this.harvester = harvester;
97
    this.harvestSiteSchedule = harvestSiteSchedule;
98
    this.documentType = documentType;
99
    this.documentURL = documentURL;
100
    this.scope = scope;
101
    this.identifier = identifier;
102
    this.revision = revision;
103
    
104
    this.docid = scope + "." + identifier;
105
    this.docidFull = this.docid + "." + revision;
106
  }
107

    
108

    
109
  /**
110
   * Retrieve the document from the site using its <documentURL> value.
111
   * 
112
   * @return   A StringReader containing the document string.
113
   */
114
  public StringReader getSiteDocument() {
115
    String documentString;
116
    InputStream inputStream;
117
    InputStreamReader inputStreamReader;
118
    StringReader stringReader = null;
119
    URL url;
120
    
121
    try {
122
      url = new URL(documentURL);
123
      inputStream = url.openStream();
124
      inputStreamReader = new InputStreamReader(inputStream);
125
      documentString = IOUtil.getAsString(inputStreamReader, true);
126
      stringReader = new StringReader(documentString);
127
      harvester.addLogEntry(0,
128
                            "Retrieved: " + documentURL, 
129
                            "harvester.GetDocSuccess", 
130
                            harvestSiteSchedule.siteScheduleID, 
131
                            null, 
132
                            "");
133
    }
134
    catch (MalformedURLException e) {
135
      accessError = true;
136
      harvester.addLogEntry(1, "MalformedURLException", "harvester.GetDocError", 
137
                            harvestSiteSchedule.siteScheduleID, this, 
138
                            "MalformedURLException: " + e.getMessage());
139
    }
140
    catch (IOException e) {
141
      accessError = true;
142
      harvester.addLogEntry(1, "IOException", "harvester.GetDocError", 
143
                            harvestSiteSchedule.siteScheduleID, this, 
144
                            "IOException: " + e.getMessage());
145
    }
146
    
147
    return stringReader;
148
  }
149
    
150

    
151
  /**
152
   * Harvest the document from the site. Unless Metacat already has the
153
   * document, retrieve the document from the site and put (insert or
154
   * update) it to Metacat. If Metacat already has the document, determine
155
   * the highest revision stored in Metacat so that this can be reported
156
   * back to the user.
157
   */
158
  public void harvestDocument() {
159
    int highestRevision;
160
    boolean insert = false;
161
    String metacatReturnString;
162
    StringReader stringReader;
163
    boolean update = false;
164

    
165
    /* If metacat already has this document, determine the highest revision in
166
     * metacat and report it to the user; else, insert or delete the document 
167
     * into metacat.
168
     */
169
    highestRevision = metacatHighestRevision();
170

    
171
    if (highestRevision == -1) {
172
      insert = true;
173
    }
174
    else if (revision > highestRevision) {
175
      update = true;
176
    }
177
    else {
178
      metacatHasIt = true;
179
      harvester.addLogEntry(0, 
180
                            "Attempting to update " + docid + " to revision " + 
181
                            revision + ". Metacat has document revision " +
182
                            highestRevision + ".", 
183
                            "harvester.MetacatHasDoc", 
184
                            harvestSiteSchedule.siteScheduleID, 
185
                            null, 
186
                            "");
187
    }
188
    
189
    if (insert || update) {
190
      stringReader = getSiteDocument();
191
      if (stringReader != null) {
192
        if (validateDocument()) {
193
          putMetacatDocument(insert, update, stringReader);
194
        }
195
      }
196
    }
197
  }
198
  
199

    
200
  /**
201
   * Boolean to determine whether the string returned by the Metacat client for
202
   * an insert or update operation indicates that the operation succeeded.
203
   * 
204
   * @param metacatReturnString     The string returned by the Metacat client.
205
   * @return true if the return string indicates success, else false
206
   */
207
  private boolean isMetacatSuccessString(String metacatReturnString) {
208
    boolean isMetacatSuccessString = false;
209
    
210
    if ((metacatReturnString != null) &&
211
        (metacatReturnString.contains("<success>"))
212
       ) {
213
      isMetacatSuccessString = true;
214
    }
215
    
216
    return isMetacatSuccessString;
217
  }
218
 
219
 
220
  /**
221
   * Logs a metacat document error to the harvest detail log. 
222
   *
223
   * @param insert               true if insert operation, false is update
224
   * @param metacatReturnString  string returned from the insert or update
225
   * @param exceptionName        name of the exception class
226
   * @param e                    the exception object
227
   */
228
  private void logMetacatError (boolean insert, 
229
                                String metacatReturnString,
230
                                String exceptionName,
231
                                Exception e
232
                               ) {
233
    uploadError = true;
234

    
235
    if (insert) {
236
      harvester.addLogEntry(1, 
237
                            metacatReturnString,
238
                            "harvester.InsertDocError",
239
                            harvestSiteSchedule.siteScheduleID,
240
                            this,
241
                            exceptionName + ": " + e.getMessage());
242
    }
243
    else {
244
      harvester.addLogEntry(1, 
245
                            metacatReturnString,
246
                            "harvester.UpdateDocError",
247
                            harvestSiteSchedule.siteScheduleID,
248
                            this,
249
                            exceptionName + ": " + e.getMessage());
250
    }
251
  }
252
  
253

    
254
  /**
255
   * Determines the highest revision that Metacat has for this document.
256
   * 
257
   * @return  int representing the highest revision for this document in
258
   *          Metacat. Returns -1 if Metacat does not currently hold the
259
   *          document.
260
   */
261
  public int metacatHighestRevision() {
262
    Connection conn = harvester.getConnection();
263
    int         highestRevision = -1;
264
		String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " +
265
                   "'" + docid + "'";
266
		Statement stmt;
267
    
268
		try {
269
			stmt = conn.createStatement();							
270
			ResultSet rs = stmt.executeQuery(query);
271
	
272
			while (rs.next()) {
273
				highestRevision = rs.getInt("REV");
274
			}
275
	
276
			stmt.close();	
277
		}
278
    catch(SQLException e) {
279
			System.out.println("SQLException: " + e.getMessage());
280
    }
281

    
282
    return highestRevision;
283
  }
284
  
285
  
286
  /**
287
   * Print the data fields and values in this HarvestDocument object.
288
   * 
289
   * @param out   the PrintStream to write to
290
   */
291
  public void printOutput(PrintStream out) {
292
    out.println("* scope:                " + scope);
293
    out.println("* identifier:           " + identifier);
294
    out.println("* revision:             " + revision);
295
    out.println("* documentType:         " + documentType);
296
    out.println("* documentURL:          " + documentURL);
297
  }
298
 
299
 
300
  /**
301
   * Print the document URL following by its scope.identifier.revision.
302
   * Used for report generation.
303
   * 
304
   * @param out   the PrintStream to write to
305
   */
306
  public void prettyPrint(PrintStream out) {
307
    out.println("*   " + docidFull + "  (" + documentURL + ")");
308
  }
309
 
310
 
311
  /**
312
   * Insert or update this document to Metacat. If revision equals 1, do an
313
   * insert; otherwise, do an update.
314
   * 
315
   * @param insert       true if this is an insert operation
316
   * @param update       true if this is an update operation
317
   * @param stringReader the StringReader object holding the document text
318
   */
319
  private void putMetacatDocument(boolean insert,
320
                                  boolean update, 
321
                                  StringReader stringReader) {
322
    Metacat metacat = harvester.metacat;
323
    String metacatReturnString = "";
324
    
325
    if (harvester.connectToMetacat()) {
326
      try {
327
        String harvestOperationCode = "";
328
        
329
        if (insert) {
330
          harvestOperationCode = "harvester.InsertDocSuccess";
331
          metacatReturnString = metacat.insert(docidFull, stringReader, null);
332
          this.inserted = true;
333
        }
334
        else if (update) {
335
          harvestOperationCode = "harvester.UpdateDocSuccess";
336
          metacatReturnString = metacat.update(docidFull, stringReader, null);
337
          this.updated = true;
338
        }
339
        
340
        if (isMetacatSuccessString(metacatReturnString)) {
341
          String message = docidFull + " : " + metacatReturnString;
342
          harvester.addLogEntry(0, message, harvestOperationCode, 
343
                                harvestSiteSchedule.siteScheduleID, null, "");
344
        }
345
        else {
346
          this.inserted = false;
347
          this.updated = false;
348
          final String exceptionName = "UnreportedMetacatException";
349
          final String exceptionMessage = 
350
                    "Metacat insert/update failed without reporting an exception";
351
          Exception e = new Exception(exceptionMessage);
352
          logMetacatError(insert, metacatReturnString, exceptionName, e);
353
        }
354
      }
355
      catch (MetacatInaccessibleException e) {
356
        logMetacatError(insert, metacatReturnString, 
357
                        "MetacatInaccessibleException", e);
358
      }
359
      catch (InsufficientKarmaException e) {
360
        logMetacatError(insert, metacatReturnString, 
361
                        "InsufficientKarmaException", e);
362
      }
363
      catch (MetacatException e) {
364
        logMetacatError(insert, metacatReturnString, "MetacatException", e);
365
      }
366
      catch (IOException e) {
367
        logMetacatError(insert, metacatReturnString, "IOException", e);
368
      }
369
      catch (Exception e) {
370
        logMetacatError(insert, metacatReturnString, "Exception", e);
371
      }
372
    }
373
  }
374
  
375
  
376
  /**
377
   * Validate the document to determine whether it is valid EML prior to 
378
   * inserting or updating it to Metacat. This is QA/QC measure. 
379
   * Not yet implemented.
380
   * 
381
   * @return  true if the document is valid EML, otherwise false
382
   */
383
  private boolean validateDocument () {
384
    boolean success = true;
385
    
386
    /*if (success) {
387
      harvester.addLogEntry(0, 
388
                            "Validated: " + documentURL, 
389
                            "harvester.ValidateDocSuccess", 
390
                            harvestSiteSchedule.siteScheduleID, 
391
                            null, 
392
                            "");
393
    }
394
    else {
395
      harvester.addLogEntry(1, "Error validating document", "harvester.ValidateDocError", 
396
                            harvestSiteSchedule.siteScheduleID, this, "");
397
    }*/
398
    
399
    return success;
400
  }
401
  
402
}
(2-2/11)