Project

General

Profile

1
/*
2
 * HarvestDocument.java
3
 *
4
 * Created on January 14, 2004, 4:37 PM
5
 */
6

    
7
package edu.ucsb.nceas.metacat.harvesterClient;
8

    
9
import java.io.InputStream;
10
import java.io.InputStreamReader;
11
import java.io.IOException;
12
import java.io.StringReader;
13
import java.net.MalformedURLException;
14
import java.net.URL;
15

    
16
import edu.ucsb.nceas.metacat.client.*;
17
import edu.ucsb.nceas.utilities.IOUtil;
18

    
19

    
20
/**
21
 * HarvestDocument manages operations and data for a single document to be
22
 * harvested.
23
 * 
24
 * @author  costa
25
 */
26
public class HarvestDocument {
27
  
28
  private String documentName;  
29
  private String documentType;
30
  private String documentURL;
31
  private Harvester harvester;
32
  private HarvestSiteSchedule harvestSiteSchedule;
33
  private int identifier;
34
  private int revision;
35
  private String scope;
36
    
37

    
38
  /**
39
   * Creates a new instance of HarvestDocument. Initialized with the data
40
   * that was read from a single <document> element in site document list.
41
   * 
42
   * @param harvester            the parent Harvester object
43
   * @param harvestSiteSchedule  the parent HarvestSiteSchedule object
44
   * @param scope                the value of the <scope> element
45
   * @param identifier           the value of the <identifier> element
46
   * @param revision             the value of the <revision> element
47
   * @param documentType         the value of the <documentType> element
48
   * @param documentURL          the value of the <documentURL> element
49
   */
50
  public HarvestDocument(
51
                          Harvester harvester,
52
                          HarvestSiteSchedule harvestSiteSchedule,
53
                          String scope,
54
                          int identifier,
55
                          int revision,
56
                          String documentType,
57
                          String documentURL
58
                        ) {
59
    this.harvester = harvester;
60
    this.harvestSiteSchedule = harvestSiteSchedule;
61
    this.documentType = documentType;
62
    this.documentURL = documentURL;
63
    this.scope = scope;
64
    this.identifier = identifier;
65
    this.revision = revision;
66
    
67
    this.documentName = scope + "." + identifier;
68
  }
69

    
70

    
71
  /**
72
   * Retrieve the document from the site using its <documentURL> value.
73
   * 
74
   * @return   A StringReader containing the document string.
75
   */
76
  private StringReader getSiteDocument() {
77
    String documentString;
78
    InputStream inputStream;
79
    InputStreamReader inputStreamReader;
80
    StringReader stringReader = null;
81
    URL url;
82
    
83
    try {
84
      url = new URL(documentURL);
85
      inputStream = url.openStream();
86
      inputStreamReader = new InputStreamReader(inputStream);
87
      documentString = IOUtil.getAsString(inputStreamReader, true);
88
      stringReader = new StringReader(documentString);
89
      System.out.println("  Successfully read document: " + documentURL);
90
    }
91
    catch (MalformedURLException e) {
92
      System.err.println("MalformedURLException: " + e.getMessage());
93
    }
94
    catch (IOException e) {
95
      System.err.println("IOException: " + e.getMessage());
96
    }
97
    
98
    return stringReader;
99
  }
100
    
101

    
102
  /**
103
   * Harvest the document from the site. Unless Metacat already has the
104
   * document, retrieve the document from the site and put (insert or
105
   * update) it to Metacat. If Metacat already has the document, determine
106
   * the highest revision stored in Metacat so that this can be reported
107
   * back to the user.
108
   */
109
  public void harvestDocument() {
110
    int highestRevision;
111
    String metacatReturnString;
112
    StringReader stringReader;
113

    
114
    /* If metacat already has this document, determine the highest revision in
115
     * metacat and report it to the user; else, insert or delete the document 
116
     * into metacat.
117
     */
118
    if (metacatHasDocument()) {
119
      System.out.println("  metacat has document");
120
      highestRevision = metacatHighestRevision();
121
      System.out.println("  metacatHighestRevision: " + highestRevision);
122
    }
123
    else {
124
      stringReader = getSiteDocument();
125
      if (stringReader != null) {
126
        if (parseDocument()) {
127
          metacatReturnString = putMetacatDocument(stringReader);
128
          System.out.println("  " + metacatReturnString);
129
        }
130
        else {
131
          System.out.println("Error parsing document.");
132
        }
133
      }
134
      else {
135
        System.out.print("  Error reading document at URL: ");
136
        System.out.println(documentURL);
137
      }
138
    }
139
  }
140
    
141

    
142
  /**
143
   * Boolean to determine whether Metacat already has this document.
144
   * 
145
   * @return  true if Metacat has the document, otherwise false
146
   */
147
  private boolean metacatHasDocument() {
148
    boolean     hasDocument = false;
149

    
150
    return hasDocument;
151
  }
152
    
153

    
154
  /**
155
   * Determines the highest revision that Metacat has for this document.
156
   * 
157
   * @return  int representing the highest revision for this document in Metacat
158
   */
159
  private int metacatHighestRevision() {
160
    int         highestRevision = 0;
161
        
162
    return highestRevision;
163
  }
164
  
165
  
166
  /**
167
   * Parse the document to determine whether it is valid EML prior to inserting
168
   * or updating it to Metacat. This is QA/QC measure. Currently unimplemented.
169
   * 
170
   * @return  true if the document is valid EML, otherwise false
171
   */
172
  private boolean parseDocument () {
173
    boolean success = true;
174
    
175
    return success;
176
  }
177
  
178
  
179
  /**
180
   * Print the data fields and values in this HarvestDocument object.
181
   */
182
  void printOutput() 
183
  {
184
    System.out.println("");
185
    System.out.println("  scope: " + scope);
186
    System.out.println("  identifier: " + identifier);
187
    System.out.println("  revision: " + revision);
188
    System.out.println("  documentType: " + documentType);
189
    System.out.println("  documentURL: " + documentURL);
190
    System.out.println("  documentName: " + documentName);
191
  }
192
 
193
 
194
  /**
195
   * Insert or update this document to Metacat. If revision equals 1, do an
196
   * insert; otherwise, do an update.
197
   * 
198
   * @return  the Metacat return string from the insert or update operation
199
   */
200
  private String putMetacatDocument(StringReader stringReader) {
201
    String docid = scope + "." + identifier + "." + revision;
202
    Metacat metacat = harvester.metacat;
203
    String metacatReturnString = "";
204

    
205
    if (harvester.connectToMetacat()) {
206
      try {
207
        if (revision == 1) {
208
          System.out.println("  Inserting document to metacat: " + docid);
209
          metacatReturnString = metacat.insert(docid, stringReader, null);
210
        }
211
        else {
212
          System.out.println("  Updating document to metacat: " + docid);
213
          metacatReturnString = metacat.update(docid, stringReader, null);
214
        }
215
      }
216
      catch (MetacatInaccessibleException e) {
217
        System.err.println("MetacatInaccessibleException: " + e.getMessage());
218
      }
219
      catch (InsufficientKarmaException e) {
220
        System.err.println("InsufficientKarmaException: " + e.getMessage());
221
      }
222
      catch (MetacatException e) {
223
        System.err.println("MetacatException: " + e.getMessage());
224
      }
225
      catch (IOException e) {
226
        System.err.println("IOException: " + e.getMessage());
227
      }
228
    }
229
    else {
230
      metacatReturnString = "Not putting document to metacat";
231
    }
232
        
233
    return metacatReturnString;
234
  }
235
    
236
}
(2-2/7)