Project

General

Profile

« Previous | Next » 

Revision 5568

Added by berkley about 14 years ago

updated the metacatpopulator to use the d1 eml parser to create system metadata in a smarter fashion.

View differences:

test/edu/ucsb/nceas/metacat/util/MetacatPopulatorTest.java
22 22
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 23
 */
24 24

  
25
package edu.ucsb.nceas.metacat.dataone;
25
package edu.ucsb.nceas.metacat.util;
26 26

  
27 27
import java.util.*;
28 28
import java.io.*;
......
99 99
        {
100 100
            MetacatPopulator mp = new MetacatPopulator(
101 101
                    "http://knb.ecoinformatics.org/knb", 
102
                    "http://knb-mn.ecoinformatics.org/knb", 
103
                    "parasitism", "uid=kepler,o=unaffiliated,dc=ecoinformatics,dc=org", "kepler");
102
                    "http://localhost:8080/knb", 
103
                    "msucci", "uid=kepler,o=unaffiliated,dc=ecoinformatics,dc=org", "kepler");
104 104
            mp.populate();
105 105
        }
106 106
        catch(Exception e)
src/edu/ucsb/nceas/metacat/util/MetacatPopulator.java
58 58
import org.dataone.service.types.SystemMetadata;
59 59
import org.dataone.service.types.Identifier;
60 60
import org.dataone.client.D1Client;
61
import org.dataone.client.MNode;
62
import org.dataone.eml.DataoneEMLParser;
63
import org.dataone.eml.EMLDocument;
64
import org.dataone.eml.EMLDocument.DistributionMetadata;
61 65

  
62 66
//import sun.tools.jstat.Identifier;
63 67

  
......
105 109
        String sourceSessionid = loginSource();
106 110
        
107 111
        //do a query
108
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
109
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&" +
110
                        "returndoctype=BIN&" +
111
                        "returndoctype=http://dataone.org/service/types/SystemMetadata/0.1&";
112
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
113
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
114
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
112 115
        params += "action=query&";
113 116
        params += "qformat=xml&";
114 117
        params += "anyfield=" + query;
......
124 127
        
125 128
        printHeader("Parsing source results");
126 129
        D1Client d1 = new D1Client(destinationUrl + "/");
130
        MNode mn = d1.getMN(destinationUrl + "/");
131
        
127 132
        printHeader("Processing " + docs.size() + " results.");
128 133
        printHeader("logging in to the destination " + destinationUrl);
129
        AuthToken authtoken = d1.login(username, password);
134
        AuthToken authtoken = mn.login(username, password);
130 135
        for(int i=0; i<docs.size(); i++)
131 136
        {
132 137
            //for each document in the query
......
137 142
            params = "action=read&qformat=xml&docid=" + docid;
138 143
            is = getResponse(sourceUrl, "/metacat", params, "POST");
139 144
            String doctext = streamToString(is);
140
            //System.out.println("Done retrieving document: " + doctext);
145
            System.out.println("doctext: " + doctext);
141 146
            is = stringToStream(doctext);
147
            //parse the document
148
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
149
            EMLDocument emld = parser.parseDocument(is);
150
            //go through the DistributionMetadata and download any described data
151
            
152
            is = stringToStream(doctext);
142 153
            doc.doctext = doctext;
143 154

  
144 155
            printHeader("creating document on destination " + destinationUrl);            
145 156
            SystemMetadata sysmeta = generateSystemMetadata(doc);
157
            for(int j=0; j<emld.distributionMetadata.size(); j++)
158
            {
159
                Identifier emlId = sysmeta.getIdentifier();
160
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
161
                String dataDocUrl = dm.url;
162
                String dataDocMimeType = dm.mimeType;
163
                String dataDocLocalId = "";
164
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
165
                { //we only handle ecogrid urls right now
166
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
167
                            "ecogrid://knb/".length(), dataDocUrl.length());
168
                    //get the file
169
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
170
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
171
                    String dataDocText = streamToString(dataDocIs);
172
                    
173
                    //set the id
174
                    Identifier did = new Identifier();
175
                    did.setValue(dataDocLocalId);
176
                    
177
                    //add the desribeby to the eml's sysmeta
178
                    System.out.println("adding describe for doc " + 
179
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
180
                    sysmeta.addDescribe(did);
181
                    
182
                    //create sysmeta for the data doc                    
183
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
184
                    //overwrite the bogus values from the last call 
185
                    dataDocSysMeta.setIdentifier(did);
186
                    dataDocSysMeta.setObjectFormat(ObjectFormat.convert(dataDocMimeType));
187
                    Checksum checksum = new Checksum();
188
                    dataDocIs = stringToStream(dataDocText);
189
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
190
                    checksum.setAlgorithm(ca);
191
                    checksum.setValue(checksum(dataDocIs));
192
                    dataDocSysMeta.setChecksum(checksum);
193
                    dataDocSysMeta.setSize(dataDocText.getBytes().length);
194
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
195
                    boolean error = false;
196
                    //create the data doc on d1
197
                    try
198
                    {
199
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
200
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
201
                    }
202
                    catch(Exception e)
203
                    {
204
                        error = true;
205
                        System.out.println("ERROR: Could not create data document with id " + 
206
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
207
                    }
208
                    finally
209
                    {
210
                        if(error)
211
                        {
212
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
213
                                    "FAILED.");
214
                        }
215
                        else
216
                        {
217
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
218
                                " which is described by " + sysmeta.getIdentifier().getValue());
219
                        }
220
                    }
221
                }
222
                else
223
                {
224
                    System.out.println("WARNING: Could not process describes url " +
225
                            dataDocUrl + " for document " + doc.docid + 
226
                    ".  Only ecogrid://knb/ urls are currently supported.");
227
                }
228
            }
229
            
146 230
            try
147 231
            {
148
              Identifier id = d1.create(authtoken, sysmeta.getIdentifier(), 
232
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
149 233
                    IOUtils.toInputStream(doc.doctext), sysmeta);
150 234
              System.out.println("Success inserting document " + id.getValue());
235
              
151 236
            }
152 237
            catch(Exception e)
153 238
            {
239
                e.printStackTrace();
154 240
                System.out.println("Could not create document with id " + 
155 241
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
242
                
156 243
            }
157 244
            finally
158 245
            {
......
163 250
        logout();
164 251
    }
165 252
    
166
    private void printHeader(String s)
167
    {
168
        System.out.println("****** " + s + " *******");
169
    }
170
    
171 253
    /**
172
     * produce an md5 checksum for item
254
     * create the documents listed by an eml document as described in the 
255
     * new system
256
     * @param doc
257
     * @param emld
173 258
     */
174
    private String checksum(InputStream is)
175
      throws Exception
176
    {        
177
        byte[] buffer = new byte[1024];
178
        MessageDigest complete = MessageDigest.getInstance("MD5");
179
        int numRead;
259
    private void createDescribedDocuments(Document doc, EMLDocument emld)
260
    {
180 261
        
181
        do 
182
        {
183
          numRead = is.read(buffer);
184
          if (numRead > 0) 
185
          {
186
            complete.update(buffer, 0, numRead);
187
          }
188
        } while (numRead != -1);
189
        
190
        
191
        return getHex(complete.digest());
192 262
    }
193 263
    
194 264
    /**
195
     * convert a byte array to a hex string
196
     */
197
    private static String getHex( byte [] raw ) 
198
    {
199
        final String HEXES = "0123456789ABCDEF";
200
        if ( raw == null ) {
201
          return null;
202
        }
203
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
204
        for ( final byte b : raw ) {
205
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
206
             .append(HEXES.charAt((b & 0x0F)));
207
        }
208
        return hex.toString();
209
    }
210
    
211
    /**
212 265
     * @param doc
213 266
     * @return
214 267
     */
......
231 284
            }
232 285
            else
233 286
            {
234
                format = ObjectFormat.convert("text/plain");
287
                format = ObjectFormat.TEXT_PLAIN;
235 288
            }
236 289
        }
237 290
        sm.setObjectFormat(format);
......
269 322
            sm.setDateSysMetadataModified(dateUpdated);
270 323
        }
271 324
        NodeReference nr = new NodeReference();
272
        nr.setValue(sourceUrl);
325
        nr.setValue("KNB");
273 326
        sm.setOriginMemberNode(nr);
274 327
        sm.setAuthoritativeMemberNode(nr);
328
        
275 329
        return sm;
276 330
    }
277 331
    
332
    private void printHeader(String s)
333
    {
334
        System.out.println("****** " + s + " *******");
335
    }
336
    
278 337
    /**
338
     * produce an md5 checksum for item
339
     */
340
    private String checksum(InputStream is)
341
      throws Exception
342
    {        
343
        byte[] buffer = new byte[1024];
344
        MessageDigest complete = MessageDigest.getInstance("MD5");
345
        int numRead;
346
        
347
        do 
348
        {
349
          numRead = is.read(buffer);
350
          if (numRead > 0) 
351
          {
352
            complete.update(buffer, 0, numRead);
353
          }
354
        } while (numRead != -1);
355
        
356
        
357
        return getHex(complete.digest());
358
    }
359
    
360
    /**
361
     * convert a byte array to a hex string
362
     */
363
    private static String getHex( byte [] raw ) 
364
    {
365
        final String HEXES = "0123456789ABCDEF";
366
        if ( raw == null ) {
367
          return null;
368
        }
369
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
370
        for ( final byte b : raw ) {
371
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
372
             .append(HEXES.charAt((b & 0x0F)));
373
        }
374
        return hex.toString();
375
    }
376
    
377
    /**
279 378
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
280 379
     * a proper date object
281 380
     * @param date

Also available in: Unified diff