Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetacatHandler;
45
import edu.ucsb.nceas.metacat.MetacatResultSet;
46
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
47
import edu.ucsb.nceas.metacat.dataone.CrudService;
48
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
49

    
50
import org.dataone.service.exceptions.InvalidSystemMetadata;
51
import org.dataone.service.exceptions.ServiceFailure;
52
import org.dataone.service.types.AuthToken;
53
import org.dataone.service.types.Checksum;
54
import org.dataone.service.types.ChecksumAlgorithm;
55
import org.dataone.service.types.NodeReference;
56
import org.dataone.service.types.ObjectFormat;
57
import org.dataone.service.types.Principal;
58
import org.dataone.service.types.SystemMetadata;
59
import org.dataone.service.types.Identifier;
60
import org.dataone.client.D1Client;
61

    
62
//import sun.tools.jstat.Identifier;
63

    
64
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
65

    
66
/**
67
 * @author berkley
68
 * A class to populate a metacat instance based on documents returned from a query
69
 */
70
public class MetacatPopulator
71
{
72
    private String sourceUrl = null;
73
    private String destinationUrl = null;
74
    private String query = null;
75
    private String username = null;
76
    private String password = null;
77
    
78
    /**
79
     * create a new MetacatPopulator with given source and destination urls.  
80
     * These should be
81
     * of the form "http://<url>/<metacat_instance>"
82
     * If username and/or password is null, the query will be run as public
83
     * @param sourceUrl
84
     * @param destUrl
85
     * @param query
86
     * @param username
87
     * @param password
88
     */
89
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
90
    {
91
        this.sourceUrl = sourceUrl;
92
        this.query = query;
93
        this.username = username;
94
        this.password = password;
95
        this.destinationUrl = destUrl;
96
    }
97
    
98
    /**
99
     * populate from the source
100
     */
101
    public void populate()
102
      throws Exception
103
    {
104
        printHeader("Source login");
105
        String sourceSessionid = loginSource();
106
        
107
        //do a query
108
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
109
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&" +
110
                        "returndoctype=BIN&" +
111
                        "returndoctype=http://dataone.org/service/types/SystemMetadata/0.1&";
112
        params += "action=query&";
113
        params += "qformat=xml&";
114
        params += "anyfield=" + query;
115
        
116
        printHeader("Searching source");
117
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
118
        InputStream is = getResponse(sourceUrl, "/metacat",
119
                params, "POST");
120
        String response = streamToString(is);
121
        //System.out.println("response: " + response);
122
        Vector<Document> docs = parseResponse(response);
123
        printHeader("Parsing source results");
124
        D1Client d1 = new D1Client(destinationUrl + "/");
125
        printHeader("logging in to the destination " + destinationUrl);
126
        AuthToken authtoken = d1.login(username, password);
127
        for(int i=0; i<docs.size(); i++)
128
        {
129
            //for each document in the query
130
            Document doc = docs.get(i);
131
            String docid = doc.docid;
132
            //get the doc from source
133
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
134
            params = "action=read&qformat=xml&docid=" + docid;
135
            is = getResponse(sourceUrl, "/metacat", params, "POST");
136
            String doctext = streamToString(is);
137
            //System.out.println("Done retrieving document: " + doctext);
138
            is = stringToStream(doctext);
139
            doc.doctext = doctext;
140

    
141
            printHeader("creating document on destination " + destinationUrl);            
142
            SystemMetadata sysmeta = generateSystemMetadata(doc);
143
            try
144
            {
145
              Identifier id = d1.create(authtoken, sysmeta.getIdentifier(), 
146
                    IOUtils.toInputStream(doc.doctext), sysmeta);
147
              System.out.println("Success inserting document " + id.getValue());
148
            }
149
            catch(Exception e)
150
            {
151
                System.out.println("Could not create document with id " + 
152
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
153
            }
154
            finally
155
            {
156
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
157
            }
158
        }
159
        
160
        logout();
161
    }
162
    
163
    private void printHeader(String s)
164
    {
165
        System.out.println("****** " + s + " *******");
166
    }
167
    
168
    /**
169
     * produce an md5 checksum for item
170
     */
171
    private String checksum(InputStream is)
172
      throws Exception
173
    {        
174
        byte[] buffer = new byte[1024];
175
        MessageDigest complete = MessageDigest.getInstance("MD5");
176
        int numRead;
177
        
178
        do 
179
        {
180
          numRead = is.read(buffer);
181
          if (numRead > 0) 
182
          {
183
            complete.update(buffer, 0, numRead);
184
          }
185
        } while (numRead != -1);
186
        
187
        
188
        return getHex(complete.digest());
189
    }
190
    
191
    /**
192
     * convert a byte array to a hex string
193
     */
194
    private static String getHex( byte [] raw ) 
195
    {
196
        final String HEXES = "0123456789ABCDEF";
197
        if ( raw == null ) {
198
          return null;
199
        }
200
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
201
        for ( final byte b : raw ) {
202
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
203
             .append(HEXES.charAt((b & 0x0F)));
204
        }
205
        return hex.toString();
206
    }
207
    
208
    /**
209
     * @param doc
210
     * @return
211
     */
212
    private SystemMetadata generateSystemMetadata(Document doc)
213
      throws Exception
214
    {
215
        SystemMetadata sm = new SystemMetadata();
216
        //set the id
217
        Identifier id = new Identifier();
218
        id.setValue(doc.docid);
219
        sm.setIdentifier(id);
220
        
221
        //set the object format
222
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
223
        if(format == null)
224
        {
225
            if(doc.doctype.trim().equals("BIN"))
226
            {
227
                format = ObjectFormat.APPLICATIONOCTETSTREAM;
228
            }
229
            else
230
            {
231
                format = ObjectFormat.convert("text/plain");
232
            }
233
        }
234
        sm.setObjectFormat(format);
235
        
236
        //create the checksum
237
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes());
238
        String checksumS = checksum(bais);
239
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
240
        Checksum checksum = new Checksum();
241
        checksum.setValue(checksumS);
242
        checksum.setAlgorithm(ca);
243
        sm.setChecksum(checksum);
244
        
245
        //set the size
246
        sm.setSize(doc.doctext.getBytes().length);
247
        
248
        //submitter
249
        Principal p = new Principal();
250
        p.setValue("");
251
        sm.setSubmitter(p);
252
        sm.setRightsHolder(p);
253
        try
254
        {
255
            Date dateCreated = parseMetacatDate(doc.createDate);
256
            sm.setDateUploaded(dateCreated);
257
            Date dateUpdated = parseMetacatDate(doc.updateDate);
258
            sm.setDateSysMetadataModified(dateUpdated);
259
        }
260
        catch(Exception e)
261
        {
262
            System.out.println("couldn't parse a date: " + e.getMessage());
263
            Date dateCreated = new Date();
264
            sm.setDateUploaded(dateCreated);
265
            Date dateUpdated = new Date();
266
            sm.setDateSysMetadataModified(dateUpdated);
267
        }
268
        NodeReference nr = new NodeReference();
269
        nr.setValue(sourceUrl);
270
        sm.setOriginMemberNode(nr);
271
        sm.setAuthoritativeMemberNode(nr);
272
        return sm;
273
    }
274
    
275
    /**
276
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
277
     * a proper date object
278
     * @param date
279
     * @return
280
     */
281
    private Date parseMetacatDate(String date)
282
    {
283
        String year = date.substring(0, 4);
284
        String month = date.substring(5, 7);
285
        String day = date.substring(8, 10);
286
        Calendar c = Calendar.getInstance();
287
        c.set(new Integer(year).intValue(), 
288
              new Integer(month).intValue(), 
289
              new Integer(day).intValue());
290
        return c.getTime();
291
    }
292

    
293
    /**
294
     * send a request to the resource
295
     */
296
    private InputStream sendRequest(String contextRootUrl, String resource, 
297
            String sessionid, String method, String urlParamaters, 
298
            String contentType, InputStream dataStream) 
299
        throws Exception 
300
    {
301
        
302
        HttpURLConnection connection = null ;
303
        String restURL = contextRootUrl + resource;
304

    
305
        if (urlParamaters != null) {
306
            if (restURL.indexOf("?") == -1)             
307
                restURL += "?";
308
            restURL += urlParamaters; 
309
            if(restURL.indexOf(" ") != -1)
310
            {
311
                restURL = restURL.replaceAll("\\s", "%20");
312
            }
313
        }
314
        
315
        if(sessionid != null)
316
        {
317
            if(restURL.indexOf("?") == -1)
318
            {
319
                restURL += "?sessionid=" + sessionid;
320
            }
321
            else
322
            {
323
                restURL += "&sessionid=" + sessionid;
324
            }
325
        }
326

    
327
        URL u = null;
328
        InputStream content = null;
329
        System.out.println("url: " + restURL);
330
        System.out.println("method: " + method);
331
        u = new URL(restURL);
332
        connection = (HttpURLConnection) u.openConnection();
333
        if (contentType!=null) {
334
            connection.setRequestProperty("Content-Type",contentType);
335
        }
336

    
337
        connection.setDoOutput(true);
338
        connection.setDoInput(true);
339
        connection.setRequestMethod(method);
340

    
341
        if (!method.equals("GET")) {
342
            if (dataStream != null) {
343
                OutputStream out = connection.getOutputStream();
344
                IOUtils.copy(dataStream, out);
345
            }
346
        }
347

    
348
        return connection.getInputStream();   
349
    }
350
    
351
    /**
352
     * create a mime multipart message from object and sysmeta
353
     */
354
    private MimeMultipart createMimeMultipart(InputStream object)
355
      throws Exception
356
    {
357
        final MimeMultipart mmp = new MimeMultipart();
358
        MimeBodyPart objectPart = new MimeBodyPart();
359
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
360
        objectPart.setFileName("doctext");
361
        DataSource ds = new InputStreamDataSource("doctext", object);
362
        DataHandler dh = new DataHandler(ds);
363
        objectPart.setDataHandler(dh);
364
        mmp.addBodyPart(objectPart);
365
        return mmp;
366
    }
367
    
368
    /**
369
     * parse a metacat query response and return a vector of docids
370
     * @param response
371
     * @return
372
     */
373
    private Vector<Document> parseResponse(String response)
374
    {
375
        Vector<Document> v = new Vector<Document>();
376
        int dstart = response.indexOf("<document>");
377
        int dend = response.indexOf("</document>", dstart);
378
        while(dstart != -1)
379
        {
380
            String doc = response.substring(dstart + "<document>".length(), dend);
381
            //System.out.println("adding " + docid);
382
            Document d = new Document(getFieldFromDoc(doc, "docid"),
383
                    getFieldFromDoc(doc, "doctype"),
384
                    getFieldFromDoc(doc, "createdate"),
385
                    getFieldFromDoc(doc, "updatedate"));
386
            v.add(d);
387
            dstart = response.indexOf("<document>", dend);
388
            dend = response.indexOf("</document>", dstart);
389
        }
390
        
391
        return v;
392
    }
393
    
394
    private String getFieldFromDoc(String doc, String fieldname)
395
    {
396
        String field = "<" + fieldname + ">";
397
        String fieldend = "</" + fieldname + ">";
398
        int start = doc.indexOf(field);
399
        int end = doc.indexOf(fieldend);
400
        String s = doc.substring(start + field.length(), end);
401
        System.out.println("field: " + fieldname + " : " + s);
402
        return s;
403
    }
404
    
405
    /**
406
     * login the source
407
     * @return
408
     * @throws Exception
409
     */
410
    private String loginSource()
411
      throws Exception
412
    {
413
        return login(sourceUrl);
414
    }
415
    
416
    /**
417
     * login the destination
418
     * @return
419
     * @throws Exception
420
     */
421
    private String loginDest()
422
        throws Exception
423
    {
424
        return login(destinationUrl);
425
    }
426
    
427
    /**
428
     * returns a sessionid
429
     * @return
430
     */
431
    private String login(String sourceUrl)
432
      throws Exception
433
    {
434
        InputStream is = getResponse(sourceUrl, "/metacat", 
435
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
436
        "POST");
437
        String response = streamToString(is);
438
        //System.out.println("response: " + response);
439
        if(response.indexOf("sessionId") == -1)
440
        {
441
            throw new Exception("Error logging into " + sourceUrl);
442
        }
443
        
444
        String sessionid = response.substring(
445
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
446
                response.indexOf("</sessionId>"));
447
        System.out.println("sessionid: " + sessionid);
448
        return sessionid;
449
    }
450
    
451
    /**
452
     * logout both the source and destination
453
     * @throws Exception
454
     */
455
    private void logout()
456
        throws Exception
457
    {
458
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
459
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
460
    }
461
    
462
    /**
463
     * get an http response
464
     * @param contextRootUrl
465
     * @param resource
466
     * @param urlParameters
467
     * @param method
468
     * @return
469
     * @throws Exception
470
     */
471
    private InputStream getResponse(String contextRootUrl, String resource, 
472
            String urlParameters, String method)
473
      throws Exception
474
    {
475
        HttpURLConnection connection = null ;
476

    
477
        String restURL = contextRootUrl+resource;
478

    
479
        if (urlParameters != null) {
480
            if (restURL.indexOf("?") == -1)             
481
                restURL += "?";
482
            restURL += urlParameters; 
483
            if(restURL.indexOf(" ") != -1)
484
            {
485
                restURL = restURL.replaceAll("\\s", "%20");
486
            }
487
        }
488

    
489
        URL u = null;
490
        InputStream content = null;            
491
        System.out.println("url: " + restURL);
492
        System.out.println("method: " + method);
493
        u = new URL(restURL);
494
        connection = (HttpURLConnection) u.openConnection();
495
        connection.setDoOutput(true);
496
        connection.setDoInput(true);
497
        connection.setRequestMethod(method);
498
        content = connection.getInputStream();
499
        return content;
500
    }
501
    
502
    private String streamToString(InputStream is)
503
        throws Exception
504
    {
505
        byte b[] = new byte[1024];
506
        int numread = is.read(b, 0, 1024);
507
        String response = new String();
508
        while(numread != -1)
509
        {
510
            response += new String(b, 0, numread);
511
            numread = is.read(b, 0, 1024);
512
        }
513
        return response;
514
    }
515
    
516
    private InputStream stringToStream(String s)
517
      throws Exception
518
    {
519
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes());
520
        return bais;
521
    }
522
    
523
    private class Document
524
    {
525
        public String docid;
526
        public String doctype;
527
        public String createDate;
528
        public String updateDate;
529
        public String doctext;
530
        
531
        public Document(String docid, String doctype, String createDate, String updateDate)
532
        {
533
            this.docid = docid;
534
            this.doctype = doctype;
535
            this.createDate = createDate;
536
            this.updateDate = updateDate;
537
        }
538
    }
539
}
(8-8/15)