Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *  Copyright: 2004 University of New Mexico and the 
4
 *                  Regents of the University of California
5
 *
6
 *   '$Author: jones $'
7
 *     '$Date: 2004-04-01 16:41:58 -0800 (Thu, 01 Apr 2004) $'
8
 * '$Revision: 2094 $'
9
 *
10
 * This program is free software; you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation; either version 2 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
 */
24

    
25
package edu.ucsb.nceas.metacat.harvesterClient;
26

    
27
import java.io.InputStream;
28
import java.io.InputStreamReader;
29
import java.io.IOException;
30
import java.io.PrintStream;
31
import java.io.StringReader;
32
import java.net.MalformedURLException;
33
import java.net.URL;
34
import java.sql.ResultSet;
35
import java.sql.SQLException;
36
import java.sql.Statement;
37

    
38
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
39
import edu.ucsb.nceas.metacat.client.Metacat;
40
import edu.ucsb.nceas.metacat.client.MetacatException;
41
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
42
import edu.ucsb.nceas.utilities.IOUtil;
43

    
44

    
45
/**
46
 * HarvestDocument manages operations and data for a single document to be
47
 * harvested.
48
 * 
49
 * @author  costa
50
 */
51
public class HarvestDocument {
52

    
53
   
54
  private String docid;                      // scope + identifier
55
  private String docidFull;                  // scope + identifier + revision
56
  String documentType;
57
  String documentURL;
58
  private Harvester harvester;
59
  private HarvestSiteSchedule harvestSiteSchedule;
60
  int identifier;
61
  int revision;
62
  String scope;
63
    
64

    
65
  /**
66
   * Creates a new instance of HarvestDocument. Initialized with the data
67
   * that was read from a single <document> element in site document list.
68
   * 
69
   * @param harvester            the parent Harvester object
70
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
71
   * @param scope                the value of the <scope> element
72
   * @param identifier           the value of the <identifier> element
73
   * @param revision             the value of the <revision> element
74
   * @param documentType         the value of the <documentType> element
75
   * @param documentURL          the value of the <documentURL> element
76
   */
77
  public HarvestDocument(
78
                          Harvester harvester,
79
                          HarvestSiteSchedule harvestSiteSchedule,
80
                          String scope,
81
                          int identifier,
82
                          int revision,
83
                          String documentType,
84
                          String documentURL
85
                        ) {
86
    this.harvester = harvester;
87
    this.harvestSiteSchedule = harvestSiteSchedule;
88
    this.documentType = documentType;
89
    this.documentURL = documentURL;
90
    this.scope = scope;
91
    this.identifier = identifier;
92
    this.revision = revision;
93
    
94
    this.docid = scope + "." + identifier;
95
    this.docidFull = this.docid + "." + revision;
96
  }
97

    
98

    
99
  /**
100
   * Retrieve the document from the site using its <documentURL> value.
101
   * 
102
   * @return   A StringReader containing the document string.
103
   */
104
  private StringReader getSiteDocument() {
105
    String documentString;
106
    InputStream inputStream;
107
    InputStreamReader inputStreamReader;
108
    StringReader stringReader = null;
109
    URL url;
110
    
111
    try {
112
      url = new URL(documentURL);
113
      inputStream = url.openStream();
114
      inputStreamReader = new InputStreamReader(inputStream);
115
      documentString = IOUtil.getAsString(inputStreamReader, true);
116
      stringReader = new StringReader(documentString);
117
      harvester.addLogEntry(0,
118
                            "Retrieved: " + documentURL, 
119
                            "GetDocSuccess", 
120
                            harvestSiteSchedule.siteScheduleID, 
121
                            null, 
122
                            "");
123
    }
124
    catch (MalformedURLException e) {
125
      harvester.addLogEntry(1, "MalformedURLException", "GetDocError", 
126
                            harvestSiteSchedule.siteScheduleID, this, 
127
                            "MalformedURLException: " + e.getMessage());
128
    }
129
    catch (IOException e) {
130
      harvester.addLogEntry(1, "IOException", "GetDocError", 
131
                            harvestSiteSchedule.siteScheduleID, this, 
132
                            "IOException: " + e.getMessage());
133
    }
134
    
135
    return stringReader;
136
  }
137
    
138

    
139
  /**
140
   * Harvest the document from the site. Unless Metacat already has the
141
   * document, retrieve the document from the site and put (insert or
142
   * update) it to Metacat. If Metacat already has the document, determine
143
   * the highest revision stored in Metacat so that this can be reported
144
   * back to the user.
145
   */
146
  public void harvestDocument() {
147
    int highestRevision;
148
    boolean insert = false;
149
    String metacatReturnString;
150
    StringReader stringReader;
151
    boolean update = false;
152

    
153
    /* If metacat already has this document, determine the highest revision in
154
     * metacat and report it to the user; else, insert or delete the document 
155
     * into metacat.
156
     */
157
    highestRevision = metacatHighestRevision();
158

    
159
    if (highestRevision == -1) {
160
      insert = true;
161
    }
162
    else if (revision > highestRevision) {
163
      update = true;
164
    }
165
    else {
166
      harvester.addLogEntry(0, 
167
                            "Attempting to update " + docid + " to revision " + 
168
                            revision + ". Metacat has document revision " +
169
                            highestRevision + ".", 
170
                            "MetacatHasDoc", 
171
                            harvestSiteSchedule.siteScheduleID, 
172
                            null, 
173
                            "");
174
    }
175
    
176
    if (insert || update) {
177
      stringReader = getSiteDocument();
178
      if (stringReader != null) {
179
        if (validateDocument()) {
180
          putMetacatDocument(insert, update, stringReader);
181
        }
182
      }
183
    }
184
  }
185
  
186

    
187
  /**
188
   * Logs a metacat document error to the harvest detail log. 
189
   *
190
   * @param insert               true if insert operation, false is update
191
   * @param metacatReturnString  string returned from the insert or update
192
   * @param exceptionName        name of the exception class
193
   * @param e                    the exception object
194
   */
195
  private void logMetacatError (boolean insert, 
196
                                String metacatReturnString,
197
                                String exceptionName,
198
                                Exception e
199
                               ) {
200
    if (insert) {
201
      harvester.addLogEntry(1, 
202
                            metacatReturnString,
203
                            "InsertDocError",
204
                            harvestSiteSchedule.siteScheduleID,
205
                            this,
206
                            exceptionName + ": " + e.getMessage());
207
    }
208
    else {
209
      harvester.addLogEntry(1, 
210
                            metacatReturnString,
211
                            "UpdateDocError",
212
                            harvestSiteSchedule.siteScheduleID,
213
                            this,
214
                            exceptionName + ": " + e.getMessage());
215
    }
216
  }
217
  
218

    
219
  /**
220
   * Determines the highest revision that Metacat has for this document.
221
   * 
222
   * @return  int representing the highest revision for this document in
223
   *          Metacat. Returns -1 if Metacat does not currently hold the
224
   *          document.
225
   */
226
  private int metacatHighestRevision() {
227
    int         highestRevision = -1;
228
		String query = "SELECT REV FROM XML_DOCUMENTS WHERE DOCID = " +
229
                   "'" + docid + "'";
230
		Statement stmt;
231
    
232
		try {
233
			stmt = harvester.conn.createStatement();							
234
			ResultSet rs = stmt.executeQuery(query);
235
	
236
			while (rs.next()) {
237
				highestRevision = rs.getInt("REV");
238
			}
239
	
240
			stmt.close();	
241
		}
242
    catch(SQLException e) {
243
			System.out.println("SQLException: " + e.getMessage());
244
    }
245

    
246
    return highestRevision;
247
  }
248
  
249
  
250
  /**
251
   * Print the data fields and values in this HarvestDocument object.
252
   * 
253
   * @param out   the PrintStream to write to
254
   */
255
  void printOutput(PrintStream out) {
256
    out.println("* scope:                " + scope);
257
    out.println("* identifier:           " + identifier);
258
    out.println("* revision:             " + revision);
259
    out.println("* documentType:         " + documentType);
260
    out.println("* documentURL:          " + documentURL);
261
  }
262
 
263
 
264
  /**
265
   * Insert or update this document to Metacat. If revision equals 1, do an
266
   * insert; otherwise, do an update.
267
   * 
268
   * @param insert       true if this is an insert operation
269
   * @param update       true if this is an update operation
270
   * @param stringReader the StringReader object holding the document text
271
   */
272
  private void putMetacatDocument(boolean insert,
273
                                  boolean update, 
274
                                  StringReader stringReader) {
275
    Metacat metacat = harvester.metacat;
276
    String metacatReturnString = "";
277
    
278
    if (harvester.connectToMetacat()) {
279
      try {
280
        if (insert) {
281
          metacatReturnString = metacat.insert(docidFull, stringReader, null);
282
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
283
                                "InsertDocSuccess", 
284
                                harvestSiteSchedule.siteScheduleID, 
285
                                null, "");
286
        }
287
        else if (update) {
288
          metacatReturnString = metacat.update(docidFull, stringReader, null);
289
          harvester.addLogEntry(0, docidFull + " : " + metacatReturnString, 
290
                                "UpdateDocSuccess", 
291
                                harvestSiteSchedule.siteScheduleID, 
292
                                null, "");
293
        }
294
      }
295
      catch (MetacatInaccessibleException e) {
296
        logMetacatError(insert, metacatReturnString, 
297
                        "MetacatInaccessibleException", e);
298
      }
299
      catch (InsufficientKarmaException e) {
300
        logMetacatError(insert, metacatReturnString, 
301
                        "InsufficientKarmaException", e);
302
      }
303
      catch (MetacatException e) {
304
        logMetacatError(insert, metacatReturnString, "MetacatException", e);
305
      }
306
      catch (IOException e) {
307
        logMetacatError(insert, metacatReturnString, "IOException", e);
308
      }
309
    }
310
  }
311
  
312
  
313
  /**
314
   * Validate the document to determine whether it is valid EML prior to 
315
   * inserting or updating it to Metacat. This is QA/QC measure. 
316
   * Not yet implemented.
317
   * 
318
   * @return  true if the document is valid EML, otherwise false
319
   */
320
  private boolean validateDocument () {
321
    boolean success = true;
322
    
323
    /*if (success) {
324
      harvester.addLogEntry(0, 
325
                            "Validated: " + documentURL, 
326
                            "ValidateDocSuccess", 
327
                            harvestSiteSchedule.siteScheduleID, 
328
                            null, 
329
                            "");
330
    }
331
    else {
332
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
333
                            harvestSiteSchedule.siteScheduleID, this, "");
334
    }*/
335
    
336
    return success;
337
  }
338
  
339
}
(2-2/9)