Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.io.ByteArrayInputStream;
29
import java.io.InputStream;
30
import java.io.OutputStream;
31
import java.net.HttpURLConnection;
32
import java.net.URL;
33
import java.security.MessageDigest;
34
import java.util.Calendar;
35
import java.util.Date;
36
import java.util.Vector;
37

    
38
import javax.activation.DataHandler;
39
import javax.activation.DataSource;
40
import javax.mail.internet.MimeBodyPart;
41
import javax.mail.internet.MimeMultipart;
42

    
43
import org.apache.commons.io.IOUtils;
44
import org.dataone.client.D1Client;
45
import org.dataone.client.MNode;
46
import org.dataone.eml.DataoneEMLParser;
47
import org.dataone.eml.EMLDocument;
48
import org.dataone.eml.EMLDocument.DistributionMetadata;
49
import org.dataone.service.types.AuthToken;
50
import org.dataone.service.types.Checksum;
51
import org.dataone.service.types.ChecksumAlgorithm;
52
import org.dataone.service.types.Identifier;
53
import org.dataone.service.types.NodeReference;
54
import org.dataone.service.types.ObjectFormat;
55
import org.dataone.service.types.Subject;
56
import org.dataone.service.types.SystemMetadata;
57

    
58
import edu.ucsb.nceas.metacat.MetaCatServlet;
59
import edu.ucsb.nceas.metacat.properties.PropertyService;
60
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
61

    
62
/**
63
 * @author berkley
64
 * A class to populate a metacat instance based on documents returned from a query
65
 */
66
public class MetacatPopulator
67
{
68
    private String sourceUrl = null;
69
    private String destinationUrl = null;
70
    private String query = null;
71
    private String username = null;
72
    private String password = null;
73
    
74
    /**
75
     * create a new MetacatPopulator with given source and destination urls.  
76
     * These should be
77
     * of the form "http://<url>/<metacat_instance>"
78
     * If username and/or password is null, the query will be run as public
79
     * @param sourceUrl
80
     * @param destUrl
81
     * @param query
82
     * @param username
83
     * @param password
84
     */
85
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
86
    {
87
        this.sourceUrl = sourceUrl;
88
        this.query = query;
89
        this.username = username;
90
        this.password = password;
91
        this.destinationUrl = destUrl;
92
    }
93
    
94
    /**
95
     * populate from the source
96
     */
97
    public void populate()
98
      throws Exception
99
    {
100
        printHeader("Source login");
101
        String sourceSessionid = loginSource();
102
        
103
        //do a query
104
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
105
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
106
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
107
        params += "action=query&";
108
        params += "qformat=xml&";
109
        params += "anyfield=" + query;
110
        
111
        printHeader("Searching source");
112
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
113
        InputStream is = getResponse(sourceUrl, "/metacat",
114
                params, "POST");
115
        String response = streamToString(is);
116
        //System.out.println("response: " + response);
117
        Vector<Document> docs = parseResponse(response);
118
        
119
        
120
        printHeader("Parsing source results");
121
        System.out.println("creating MN with url: " + destinationUrl + "/");
122
        MNode mn = D1Client.getMN(destinationUrl + "/");
123
        
124
        printHeader("Processing " + docs.size() + " results.");
125
        printHeader("logging in to the destination " + destinationUrl);
126
        AuthToken authtoken = mn.login(username, password);
127
        System.out.println("authtoken: " + authtoken.getToken());
128
        for(int i=0; i<docs.size(); i++)
129
        {
130
            //for each document in the query
131
            Document doc = docs.get(i);
132
            String docid = doc.docid;
133
            //get the doc from source
134
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
135
            params = "action=read&qformat=xml&docid=" + docid;
136
            is = getResponse(sourceUrl, "/metacat", params, "POST");
137
            String doctext = streamToString(is);
138
            System.out.println("doctext: " + doctext);
139
            is = stringToStream(doctext);
140
            //parse the document
141
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
142
            EMLDocument emld = parser.parseDocument(is);
143
            if(emld == null)
144
            {
145
                continue;
146
            }
147
            //go through the DistributionMetadata and download any described data
148
            
149
            is = stringToStream(doctext);
150
            doc.doctext = doctext;
151

    
152
            printHeader("creating document on destination " + destinationUrl);            
153
            SystemMetadata sysmeta = generateSystemMetadata(doc);
154
            for(int j=0; j<emld.distributionMetadata.size(); j++)
155
            {
156
                Identifier emlId = sysmeta.getIdentifier();
157
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
158
                String dataDocUrl = dm.url;
159
                String dataDocMimeType = dm.mimeType;
160
                String dataDocLocalId = "";
161
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
162
                { //we only handle ecogrid urls right now
163
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
164
                            "ecogrid://knb/".length(), dataDocUrl.length());
165
                    //get the file
166
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
167
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
168
                    String dataDocText = streamToString(dataDocIs);
169
                    
170
                    //set the id
171
                    Identifier did = new Identifier();
172
                    did.setValue(dataDocLocalId);
173
                    
174
                    //add the desribeby to the eml's sysmeta
175
                    System.out.println("adding describe for doc " + 
176
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
177
                    sysmeta.addDescribe(did);
178
                    
179
                    //create sysmeta for the data doc                    
180
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
181
                    //overwrite the bogus values from the last call 
182
                    dataDocSysMeta.setIdentifier(did);
183
                    dataDocSysMeta.setObjectFormat(ObjectFormat.convert(dataDocMimeType));
184
                    Checksum checksum = new Checksum();
185
                    dataDocIs = stringToStream(dataDocText);
186
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
187
                    checksum.setAlgorithm(ca);
188
                    checksum.setValue(checksum(dataDocIs));
189
                    dataDocSysMeta.setChecksum(checksum);
190
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
191
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
192
                    boolean error = false;
193
                    //create the data doc on d1
194
                    try
195
                    {
196
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
197
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
198
                    }
199
                    catch(Exception e)
200
                    {
201
                        error = true;
202
                        System.out.println("ERROR: Could not create data document with id " + 
203
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
204
                    }
205
                    finally
206
                    {
207
                        if(error)
208
                        {
209
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
210
                                    "FAILED.");
211
                        }
212
                        else
213
                        {
214
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
215
                                " which is described by " + sysmeta.getIdentifier().getValue());
216
                        }
217
                    }
218
                }
219
                else
220
                {
221
                    System.out.println("WARNING: Could not process describes url " +
222
                            dataDocUrl + " for document " + doc.docid + 
223
                    ".  Only ecogrid://knb/ urls are currently supported.");
224
                }
225
            }
226
            
227
            try
228
            {
229
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
230
                    IOUtils.toInputStream(doc.doctext), sysmeta);
231
              System.out.println("Success inserting document " + id.getValue());
232
              
233
            }
234
            catch(Exception e)
235
            {
236
                e.printStackTrace();
237
                System.out.println("Could not create document with id " + 
238
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
239
                
240
            }
241
            finally
242
            {
243
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
244
            }
245
        }
246
        
247
        logout();
248
    }
249
    
250
    /**
251
     * create the documents listed by an eml document as described in the 
252
     * new system
253
     * @param doc
254
     * @param emld
255
     */
256
    private void createDescribedDocuments(Document doc, EMLDocument emld)
257
    {
258
        
259
    }
260
    
261
    /**
262
     * @param doc
263
     * @return
264
     */
265
    private SystemMetadata generateSystemMetadata(Document doc)
266
      throws Exception
267
    {
268
        SystemMetadata sm = new SystemMetadata();
269
        //set the id
270
        Identifier id = new Identifier();
271
        id.setValue(doc.docid.trim());
272
        sm.setIdentifier(id);
273
        
274
        //set the object format
275
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
276
        if(format == null)
277
        {
278
            if(doc.doctype.trim().equals("BIN"))
279
            {
280
                format = ObjectFormat.OCTET_STREAM;
281
            }
282
            else
283
            {
284
                format = ObjectFormat.TEXT_PLAIN;
285
            }
286
        }
287
        sm.setObjectFormat(format);
288
        
289
        //create the checksum
290
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
291
        String checksumS = checksum(bais);
292
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
293
        Checksum checksum = new Checksum();
294
        checksum.setValue(checksumS);
295
        checksum.setAlgorithm(ca);
296
        sm.setChecksum(checksum);
297
        
298
        //set the size
299
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
300
        
301
        //submitter
302
        Subject p = new Subject();
303
        p.setValue("unknown");
304
        sm.setSubmitter(p);
305
        sm.setRightsHolder(p);
306
        try
307
        {
308
            Date dateCreated = parseMetacatDate(doc.createDate);
309
            sm.setDateUploaded(dateCreated);
310
            Date dateUpdated = parseMetacatDate(doc.updateDate);
311
            sm.setDateSysMetadataModified(dateUpdated);
312
        }
313
        catch(Exception e)
314
        {
315
            System.out.println("couldn't parse a date: " + e.getMessage());
316
            Date dateCreated = new Date();
317
            sm.setDateUploaded(dateCreated);
318
            Date dateUpdated = new Date();
319
            sm.setDateSysMetadataModified(dateUpdated);
320
        }
321
        NodeReference nr = new NodeReference();
322
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
323
        sm.setOriginMemberNode(nr);
324
        sm.setAuthoritativeMemberNode(nr);
325
        
326
        return sm;
327
    }
328
    
329
    private void printHeader(String s)
330
    {
331
        System.out.println("****** " + s + " *******");
332
    }
333
    
334
    /**
335
     * produce an md5 checksum for item
336
     */
337
    private String checksum(InputStream is)
338
      throws Exception
339
    {        
340
        byte[] buffer = new byte[1024];
341
        MessageDigest complete = MessageDigest.getInstance("MD5");
342
        int numRead;
343
        
344
        do 
345
        {
346
          numRead = is.read(buffer);
347
          if (numRead > 0) 
348
          {
349
            complete.update(buffer, 0, numRead);
350
          }
351
        } while (numRead != -1);
352
        
353
        
354
        return getHex(complete.digest());
355
    }
356
    
357
    /**
358
     * convert a byte array to a hex string
359
     */
360
    private static String getHex( byte [] raw ) 
361
    {
362
        final String HEXES = "0123456789ABCDEF";
363
        if ( raw == null ) {
364
          return null;
365
        }
366
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
367
        for ( final byte b : raw ) {
368
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
369
             .append(HEXES.charAt((b & 0x0F)));
370
        }
371
        return hex.toString();
372
    }
373
    
374
    /**
375
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
376
     * a proper date object
377
     * @param date
378
     * @return
379
     */
380
    private Date parseMetacatDate(String date)
381
    {
382
        String year = date.substring(0, 4);
383
        String month = date.substring(5, 7);
384
        String day = date.substring(8, 10);
385
        Calendar c = Calendar.getInstance();
386
        c.set(new Integer(year).intValue(), 
387
              new Integer(month).intValue(), 
388
              new Integer(day).intValue());
389
        return c.getTime();
390
    }
391

    
392
    /**
393
     * send a request to the resource
394
     */
395
    private InputStream sendRequest(String contextRootUrl, String resource, 
396
            String sessionid, String method, String urlParamaters, 
397
            String contentType, InputStream dataStream) 
398
        throws Exception 
399
    {
400
        
401
        HttpURLConnection connection = null ;
402
        String restURL = contextRootUrl + resource;
403

    
404
        if (urlParamaters != null) {
405
            if (restURL.indexOf("?") == -1)             
406
                restURL += "?";
407
            restURL += urlParamaters; 
408
            if(restURL.indexOf(" ") != -1)
409
            {
410
                restURL = restURL.replaceAll("\\s", "%20");
411
            }
412
        }
413
        
414
        if(sessionid != null)
415
        {
416
            if(restURL.indexOf("?") == -1)
417
            {
418
                restURL += "?sessionid=" + sessionid;
419
            }
420
            else
421
            {
422
                restURL += "&sessionid=" + sessionid;
423
            }
424
        }
425

    
426
        URL u = null;
427
        InputStream content = null;
428
        System.out.println("url: " + restURL);
429
        System.out.println("method: " + method);
430
        u = new URL(restURL);
431
        connection = (HttpURLConnection) u.openConnection();
432
        if (contentType!=null) {
433
            connection.setRequestProperty("Content-Type",contentType);
434
        }
435

    
436
        connection.setDoOutput(true);
437
        connection.setDoInput(true);
438
        connection.setRequestMethod(method);
439

    
440
        if (!method.equals("GET")) {
441
            if (dataStream != null) {
442
                OutputStream out = connection.getOutputStream();
443
                IOUtils.copy(dataStream, out);
444
            }
445
        }
446

    
447
        return connection.getInputStream();   
448
    }
449
    
450
    /**
451
     * create a mime multipart message from object and sysmeta
452
     */
453
    private MimeMultipart createMimeMultipart(InputStream object)
454
      throws Exception
455
    {
456
        final MimeMultipart mmp = new MimeMultipart();
457
        MimeBodyPart objectPart = new MimeBodyPart();
458
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
459
        objectPart.setFileName("doctext");
460
        DataSource ds = new InputStreamDataSource("doctext", object);
461
        DataHandler dh = new DataHandler(ds);
462
        objectPart.setDataHandler(dh);
463
        mmp.addBodyPart(objectPart);
464
        return mmp;
465
    }
466
    
467
    /**
468
     * parse a metacat query response and return a vector of docids
469
     * @param response
470
     * @return
471
     */
472
    private Vector<Document> parseResponse(String response)
473
    {
474
        Vector<Document> v = new Vector<Document>();
475
        int dstart = response.indexOf("<document>");
476
        int dend = response.indexOf("</document>", dstart);
477
        while(dstart != -1)
478
        {
479
            String doc = response.substring(dstart + "<document>".length(), dend);
480
            //System.out.println("adding " + docid);
481
            Document d = new Document(getFieldFromDoc(doc, "docid"),
482
                    getFieldFromDoc(doc, "doctype"),
483
                    getFieldFromDoc(doc, "createdate"),
484
                    getFieldFromDoc(doc, "updatedate"));
485
            v.add(d);
486
            dstart = response.indexOf("<document>", dend);
487
            dend = response.indexOf("</document>", dstart);
488
        }
489
        
490
        return v;
491
    }
492
    
493
    private String getFieldFromDoc(String doc, String fieldname)
494
    {
495
        String field = "<" + fieldname + ">";
496
        String fieldend = "</" + fieldname + ">";
497
        int start = doc.indexOf(field);
498
        int end = doc.indexOf(fieldend);
499
        String s = doc.substring(start + field.length(), end);
500
        //System.out.println("field: " + fieldname + " : " + s);
501
        return s;
502
    }
503
    
504
    /**
505
     * login the source
506
     * @return
507
     * @throws Exception
508
     */
509
    private String loginSource()
510
      throws Exception
511
    {
512
        return login(sourceUrl);
513
    }
514
    
515
    /**
516
     * login the destination
517
     * @return
518
     * @throws Exception
519
     */
520
    private String loginDest()
521
        throws Exception
522
    {
523
        return login(destinationUrl);
524
    }
525
    
526
    /**
527
     * returns a sessionid
528
     * @return
529
     */
530
    private String login(String sourceUrl)
531
      throws Exception
532
    {
533
        InputStream is = getResponse(sourceUrl, "/metacat", 
534
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
535
        "POST");
536
        String response = streamToString(is);
537
        //System.out.println("response: " + response);
538
        if(response.indexOf("sessionId") == -1)
539
        {
540
            throw new Exception("Error logging into " + sourceUrl);
541
        }
542
        
543
        String sessionid = response.substring(
544
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
545
                response.indexOf("</sessionId>"));
546
        System.out.println("sessionid: " + sessionid);
547
        return sessionid;
548
    }
549
    
550
    /**
551
     * logout both the source and destination
552
     * @throws Exception
553
     */
554
    private void logout()
555
        throws Exception
556
    {
557
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
558
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
559
    }
560
    
561
    /**
562
     * get an http response
563
     * @param contextRootUrl
564
     * @param resource
565
     * @param urlParameters
566
     * @param method
567
     * @return
568
     * @throws Exception
569
     */
570
    private InputStream getResponse(String contextRootUrl, String resource, 
571
            String urlParameters, String method)
572
      throws Exception
573
    {
574
        HttpURLConnection connection = null ;
575

    
576
        String restURL = contextRootUrl+resource;
577

    
578
        if (urlParameters != null) {
579
            if (restURL.indexOf("?") == -1)             
580
                restURL += "?";
581
            restURL += urlParameters; 
582
            if(restURL.indexOf(" ") != -1)
583
            {
584
                restURL = restURL.replaceAll("\\s", "%20");
585
            }
586
        }
587

    
588
        URL u = null;
589
        InputStream content = null;            
590
        System.out.println("url: " + restURL);
591
        System.out.println("method: " + method);
592
        u = new URL(restURL);
593
        connection = (HttpURLConnection) u.openConnection();
594
        connection.setDoOutput(true);
595
        connection.setDoInput(true);
596
        connection.setRequestMethod(method);
597
        content = connection.getInputStream();
598
        return content;
599
    }
600
    
601
    private String streamToString(InputStream is)
602
        throws Exception
603
    {
604
        byte b[] = new byte[1024];
605
        int numread = is.read(b, 0, 1024);
606
        String response = new String();
607
        while(numread != -1)
608
        {
609
            response += new String(b, 0, numread);
610
            numread = is.read(b, 0, 1024);
611
        }
612
        return response;
613
    }
614
    
615
    private InputStream stringToStream(String s)
616
      throws Exception
617
    {
618
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
619
        return bais;
620
    }
621
    
622
    private class Document
623
    {
624
        public String docid;
625
        public String doctype;
626
        public String createDate;
627
        public String updateDate;
628
        public String doctext;
629
        
630
        public Document(String docid, String doctype, String createDate, String updateDate)
631
        {
632
            this.docid = docid.trim();
633
            this.doctype = doctype.trim();
634
            this.createDate = createDate.trim();
635
            this.updateDate = updateDate.trim();
636
        }
637
    }
638
}
(8-8/15)