Project

General

Profile

1
/*
2
 * HarvestDocument.java
3
 *
4
 * Created on January 14, 2004, 4:37 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.InputStream;
10
import java.io.InputStreamReader;
11
import java.io.IOException;
12
import java.io.StringReader;
13
import java.net.MalformedURLException;
14
import java.net.URL;
15

    
16
import edu.ucsb.nceas.metacat.client.InsufficientKarmaException;
17
import edu.ucsb.nceas.metacat.client.Metacat;
18
import edu.ucsb.nceas.metacat.client.MetacatException;
19
import edu.ucsb.nceas.metacat.client.MetacatInaccessibleException;
20
import edu.ucsb.nceas.utilities.IOUtil;
21

    
22

    
23
/**
24
 * HarvestDocument manages operations and data for a single document to be
25
 * harvested.
26
 * 
27
 * @author  costa
28
 */
29
public class HarvestDocument {
30
  
31
  private String documentName;  
32
  String documentType;
33
  String documentURL;
34
  private Harvester harvester;
35
  private HarvestSiteSchedule harvestSiteSchedule;
36
  int identifier;
37
  int revision;
38
  String scope;
39
    
40

    
41
  /**
42
   * Creates a new instance of HarvestDocument. Initialized with the data
43
   * that was read from a single <document> element in site document list.
44
   * 
45
   * @param harvester            the parent Harvester object
46
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
47
   * @param scope                the value of the <scope> element
48
   * @param identifier           the value of the <identifier> element
49
   * @param revision             the value of the <revision> element
50
   * @param documentType         the value of the <documentType> element
51
   * @param documentURL          the value of the <documentURL> element
52
   */
53
  public HarvestDocument(
54
                          Harvester harvester,
55
                          HarvestSiteSchedule harvestSiteSchedule,
56
                          String scope,
57
                          int identifier,
58
                          int revision,
59
                          String documentType,
60
                          String documentURL
61
                        ) {
62
    this.harvester = harvester;
63
    this.harvestSiteSchedule = harvestSiteSchedule;
64
    this.documentType = documentType;
65
    this.documentURL = documentURL;
66
    this.scope = scope;
67
    this.identifier = identifier;
68
    this.revision = revision;
69
    
70
    this.documentName = scope + "." + identifier;
71
  }
72

    
73

    
74
  /**
75
   * Retrieve the document from the site using its <documentURL> value.
76
   * 
77
   * @return   A StringReader containing the document string.
78
   */
79
  private StringReader getSiteDocument() {
80
    String documentString;
81
    InputStream inputStream;
82
    InputStreamReader inputStreamReader;
83
    StringReader stringReader = null;
84
    URL url;
85
    
86
    try {
87
      url = new URL(documentURL);
88
      inputStream = url.openStream();
89
      inputStreamReader = new InputStreamReader(inputStream);
90
      documentString = IOUtil.getAsString(inputStreamReader, true);
91
      stringReader = new StringReader(documentString);
92
      harvester.addLogEntry(0, "", "GetDocSuccess", 
93
                            harvestSiteSchedule.siteScheduleID, null, "");
94
    }
95
    catch (MalformedURLException e) {
96
      harvester.addLogEntry(1, "MalformedURLException", "GetDocError", 
97
                            harvestSiteSchedule.siteScheduleID, this, 
98
                            "MalformedURLException: " + e.getMessage());
99
    }
100
    catch (IOException e) {
101
      harvester.addLogEntry(1, "IOException", "GetDocError", 
102
                            harvestSiteSchedule.siteScheduleID, this, 
103
                            "IOException: " + e.getMessage());
104
    }
105
    
106
    return stringReader;
107
  }
108
    
109

    
110
  /**
111
   * Harvest the document from the site. Unless Metacat already has the
112
   * document, retrieve the document from the site and put (insert or
113
   * update) it to Metacat. If Metacat already has the document, determine
114
   * the highest revision stored in Metacat so that this can be reported
115
   * back to the user.
116
   */
117
  public void harvestDocument() {
118
    int highestRevision;
119
    String metacatReturnString;
120
    StringReader stringReader;
121

    
122
    /* If metacat already has this document, determine the highest revision in
123
     * metacat and report it to the user; else, insert or delete the document 
124
     * into metacat.
125
     */
126
    if (metacatHasDocument()) {
127
      highestRevision = metacatHighestRevision();
128
      harvester.addLogEntry(0, 
129
                            "Metacat has document: " + documentName +
130
                            ", highest revision: " + highestRevision, 
131
                            "MetacatHasDoc", 
132
                            harvestSiteSchedule.siteScheduleID, null, "");
133
    }
134
    else {
135
      stringReader = getSiteDocument();
136
      if (stringReader != null) {
137
        if (parseDocument()) {
138
          putMetacatDocument(stringReader);
139
        }
140
      }
141
    }
142
  }
143
  
144

    
145
  /**
146
   * Logs a metacat document error to the harvest detail log. 
147
   *
148
   * @param insert               true if insert operation, false is update
149
   * @param metacatReturnString  string returned from the insert or update
150
   * @param exceptionName        name of the exception class
151
   * @param e                    the exception object
152
   */
153
  private void logMetacatError (boolean insert, 
154
                                String metacatReturnString,
155
                                String exceptionName,
156
                                Exception e
157
                               ) {
158
    if (insert) {
159
      harvester.addLogEntry(1, metacatReturnString, "InsertDocError", 
160
                                harvestSiteSchedule.siteScheduleID, 
161
                                this, exceptionName + ": " + e.getMessage());
162
    }
163
    else {
164
      harvester.addLogEntry(1, metacatReturnString, "UpdateDocError", 
165
                                harvestSiteSchedule.siteScheduleID, 
166
                                this, exceptionName + ": " + e.getMessage());
167
    }
168
  }
169
  
170

    
171
  /**
172
   * Boolean to determine whether Metacat already has this document.
173
   * 
174
   * @return  true if Metacat has the document, otherwise false
175
   */
176
  private boolean metacatHasDocument() {
177
    boolean     hasDocument = false;
178

    
179
    return hasDocument;
180
  }
181
    
182

    
183
  /**
184
   * Determines the highest revision that Metacat has for this document.
185
   * 
186
   * @return  int representing the highest revision for this document in Metacat
187
   */
188
  private int metacatHighestRevision() {
189
    int         highestRevision = 0;
190
        
191
    return highestRevision;
192
  }
193
  
194
  
195
  /**
196
   * Parse the document to determine whether it is valid EML prior to inserting
197
   * or updating it to Metacat. This is QA/QC measure. Currently unimplemented.
198
   * 
199
   * @return  true if the document is valid EML, otherwise false
200
   */
201
  private boolean parseDocument () {
202
    boolean success = true;
203
    
204
    if (success) {
205
      harvester.addLogEntry(0, "", "ValidateDocSuccess", 
206
                            harvestSiteSchedule.siteScheduleID, null, "");
207
    }
208
    else {
209
      harvester.addLogEntry(1, "Error validating document", "ValidateDocError", 
210
                            harvestSiteSchedule.siteScheduleID, this, "");
211
    }
212
    
213
    return success;
214
  }
215
  
216
  
217
  /**
218
   * Print the data fields and values in this HarvestDocument object.
219
   */
220
  void printOutput() {
221
    System.out.println("scope:                " + scope);
222
    System.out.println("identifier:           " + identifier);
223
    System.out.println("revision:             " + revision);
224
    System.out.println("documentType:         " + documentType);
225
    System.out.println("documentURL:          " + documentURL);
226
    System.out.println("documentName:         " + documentName);
227
  }
228
 
229
 
230
  /**
231
   * Insert or update this document to Metacat. If revision equals 1, do an
232
   * insert; otherwise, do an update.
233
   */
234
  private void putMetacatDocument(StringReader stringReader) {
235
    String docid = scope + "." + identifier + "." + revision;
236
    boolean insert = (revision == 1);
237
    Metacat metacat = harvester.metacat;
238
    String metacatReturnString = "";
239
    
240
    if (harvester.connectToMetacat()) {
241
      try {
242
        if (insert) {
243
          metacatReturnString = metacat.insert(docid, stringReader, null);
244
          harvester.addLogEntry(0, docid + " : " + metacatReturnString, 
245
                                "InsertDocSuccess", 
246
                                harvestSiteSchedule.siteScheduleID, 
247
                                null, "");
248
        }
249
        else {
250
          metacatReturnString = metacat.update(docid, stringReader, null);
251
          harvester.addLogEntry(0, docid + " : " + metacatReturnString, 
252
                                "UpdateDocSuccess", 
253
                                harvestSiteSchedule.siteScheduleID, 
254
                                null, "");
255
        }
256
      }
257
      catch (MetacatInaccessibleException e) {
258
        logMetacatError(insert, metacatReturnString, 
259
                        "MetacatInaccessibleException", e);
260
      }
261
      catch (InsufficientKarmaException e) {
262
        logMetacatError(insert, metacatReturnString, 
263
                        "InsufficientKarmaException", e);
264
      }
265
      catch (MetacatException e) {
266
        logMetacatError(insert, metacatReturnString, "MetacatException", e);
267
      }
268
      catch (IOException e) {
269
        logMetacatError(insert, metacatReturnString, "IOException", e);
270
      }
271
    }
272
  }
273
}
(2-2/7)