Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetacatHandler;
45
import edu.ucsb.nceas.metacat.MetacatResultSet;
46
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
47
import edu.ucsb.nceas.metacat.dataone.CrudService;
48
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
49

    
50
import org.dataone.service.exceptions.InvalidSystemMetadata;
51
import org.dataone.service.exceptions.ServiceFailure;
52
import org.dataone.service.types.AuthToken;
53
import org.dataone.service.types.Checksum;
54
import org.dataone.service.types.ChecksumAlgorithm;
55
import org.dataone.service.types.NodeReference;
56
import org.dataone.service.types.ObjectFormat;
57
import org.dataone.service.types.Principal;
58
import org.dataone.service.types.SystemMetadata;
59
import org.dataone.service.types.Identifier;
60
import org.dataone.client.D1Client;
61
import org.dataone.client.MNode;
62
import org.dataone.eml.DataoneEMLParser;
63
import org.dataone.eml.EMLDocument;
64
import org.dataone.eml.EMLDocument.DistributionMetadata;
65

    
66
//import sun.tools.jstat.Identifier;
67

    
68
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
69

    
70
/**
71
 * @author berkley
72
 * A class to populate a metacat instance based on documents returned from a query
73
 */
74
public class MetacatPopulator
75
{
76
    private String sourceUrl = null;
77
    private String destinationUrl = null;
78
    private String query = null;
79
    private String username = null;
80
    private String password = null;
81
    
82
    /**
83
     * create a new MetacatPopulator with given source and destination urls.  
84
     * These should be
85
     * of the form "http://<url>/<metacat_instance>"
86
     * If username and/or password is null, the query will be run as public
87
     * @param sourceUrl
88
     * @param destUrl
89
     * @param query
90
     * @param username
91
     * @param password
92
     */
93
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
94
    {
95
        this.sourceUrl = sourceUrl;
96
        this.query = query;
97
        this.username = username;
98
        this.password = password;
99
        this.destinationUrl = destUrl;
100
    }
101
    
102
    /**
103
     * populate from the source
104
     */
105
    public void populate()
106
      throws Exception
107
    {
108
        printHeader("Source login");
109
        String sourceSessionid = loginSource();
110
        
111
        //do a query
112
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
113
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
114
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
115
        params += "action=query&";
116
        params += "qformat=xml&";
117
        params += "anyfield=" + query;
118
        
119
        printHeader("Searching source");
120
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
121
        InputStream is = getResponse(sourceUrl, "/metacat",
122
                params, "POST");
123
        String response = streamToString(is);
124
        //System.out.println("response: " + response);
125
        Vector<Document> docs = parseResponse(response);
126
        
127
        
128
        printHeader("Parsing source results");
129
        D1Client d1 = new D1Client(destinationUrl + "/");
130
        MNode mn = d1.getMN(destinationUrl + "/");
131
        
132
        printHeader("Processing " + docs.size() + " results.");
133
        printHeader("logging in to the destination " + destinationUrl);
134
        AuthToken authtoken = mn.login(username, password);
135
        for(int i=0; i<docs.size(); i++)
136
        {
137
            //for each document in the query
138
            Document doc = docs.get(i);
139
            String docid = doc.docid;
140
            //get the doc from source
141
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
142
            params = "action=read&qformat=xml&docid=" + docid;
143
            is = getResponse(sourceUrl, "/metacat", params, "POST");
144
            String doctext = streamToString(is);
145
            System.out.println("doctext: " + doctext);
146
            is = stringToStream(doctext);
147
            //parse the document
148
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
149
            EMLDocument emld = parser.parseDocument(is);
150
            if(emld == null)
151
            {
152
                continue;
153
            }
154
            //go through the DistributionMetadata and download any described data
155
            
156
            is = stringToStream(doctext);
157
            doc.doctext = doctext;
158

    
159
            printHeader("creating document on destination " + destinationUrl);            
160
            SystemMetadata sysmeta = generateSystemMetadata(doc);
161
            for(int j=0; j<emld.distributionMetadata.size(); j++)
162
            {
163
                Identifier emlId = sysmeta.getIdentifier();
164
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
165
                String dataDocUrl = dm.url;
166
                String dataDocMimeType = dm.mimeType;
167
                String dataDocLocalId = "";
168
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
169
                { //we only handle ecogrid urls right now
170
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
171
                            "ecogrid://knb/".length(), dataDocUrl.length());
172
                    //get the file
173
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
174
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
175
                    String dataDocText = streamToString(dataDocIs);
176
                    
177
                    //set the id
178
                    Identifier did = new Identifier();
179
                    did.setValue(dataDocLocalId);
180
                    
181
                    //add the desribeby to the eml's sysmeta
182
                    System.out.println("adding describe for doc " + 
183
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
184
                    sysmeta.addDescribe(did);
185
                    
186
                    //create sysmeta for the data doc                    
187
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
188
                    //overwrite the bogus values from the last call 
189
                    dataDocSysMeta.setIdentifier(did);
190
                    dataDocSysMeta.setObjectFormat(ObjectFormat.convert(dataDocMimeType));
191
                    Checksum checksum = new Checksum();
192
                    dataDocIs = stringToStream(dataDocText);
193
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
194
                    checksum.setAlgorithm(ca);
195
                    checksum.setValue(checksum(dataDocIs));
196
                    dataDocSysMeta.setChecksum(checksum);
197
                    dataDocSysMeta.setSize(dataDocText.getBytes().length);
198
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
199
                    boolean error = false;
200
                    //create the data doc on d1
201
                    try
202
                    {
203
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
204
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
205
                    }
206
                    catch(Exception e)
207
                    {
208
                        error = true;
209
                        System.out.println("ERROR: Could not create data document with id " + 
210
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
211
                    }
212
                    finally
213
                    {
214
                        if(error)
215
                        {
216
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
217
                                    "FAILED.");
218
                        }
219
                        else
220
                        {
221
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
222
                                " which is described by " + sysmeta.getIdentifier().getValue());
223
                        }
224
                    }
225
                }
226
                else
227
                {
228
                    System.out.println("WARNING: Could not process describes url " +
229
                            dataDocUrl + " for document " + doc.docid + 
230
                    ".  Only ecogrid://knb/ urls are currently supported.");
231
                }
232
            }
233
            
234
            try
235
            {
236
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
237
                    IOUtils.toInputStream(doc.doctext), sysmeta);
238
              System.out.println("Success inserting document " + id.getValue());
239
              
240
            }
241
            catch(Exception e)
242
            {
243
                e.printStackTrace();
244
                System.out.println("Could not create document with id " + 
245
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
246
                
247
            }
248
            finally
249
            {
250
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
251
            }
252
        }
253
        
254
        logout();
255
    }
256
    
257
    /**
258
     * create the documents listed by an eml document as described in the 
259
     * new system
260
     * @param doc
261
     * @param emld
262
     */
263
    private void createDescribedDocuments(Document doc, EMLDocument emld)
264
    {
265
        
266
    }
267
    
268
    /**
269
     * @param doc
270
     * @return
271
     */
272
    private SystemMetadata generateSystemMetadata(Document doc)
273
      throws Exception
274
    {
275
        SystemMetadata sm = new SystemMetadata();
276
        //set the id
277
        Identifier id = new Identifier();
278
        id.setValue(doc.docid.trim());
279
        sm.setIdentifier(id);
280
        
281
        //set the object format
282
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
283
        if(format == null)
284
        {
285
            if(doc.doctype.trim().equals("BIN"))
286
            {
287
                format = ObjectFormat.OCTET_STREAM;
288
            }
289
            else
290
            {
291
                format = ObjectFormat.TEXT_PLAIN;
292
            }
293
        }
294
        sm.setObjectFormat(format);
295
        
296
        //create the checksum
297
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes());
298
        String checksumS = checksum(bais);
299
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
300
        Checksum checksum = new Checksum();
301
        checksum.setValue(checksumS);
302
        checksum.setAlgorithm(ca);
303
        sm.setChecksum(checksum);
304
        
305
        //set the size
306
        sm.setSize(doc.doctext.getBytes().length);
307
        
308
        //submitter
309
        Principal p = new Principal();
310
        p.setValue("unknown");
311
        sm.setSubmitter(p);
312
        sm.setRightsHolder(p);
313
        try
314
        {
315
            Date dateCreated = parseMetacatDate(doc.createDate);
316
            sm.setDateUploaded(dateCreated);
317
            Date dateUpdated = parseMetacatDate(doc.updateDate);
318
            sm.setDateSysMetadataModified(dateUpdated);
319
        }
320
        catch(Exception e)
321
        {
322
            System.out.println("couldn't parse a date: " + e.getMessage());
323
            Date dateCreated = new Date();
324
            sm.setDateUploaded(dateCreated);
325
            Date dateUpdated = new Date();
326
            sm.setDateSysMetadataModified(dateUpdated);
327
        }
328
        NodeReference nr = new NodeReference();
329
        nr.setValue("KNB");
330
        sm.setOriginMemberNode(nr);
331
        sm.setAuthoritativeMemberNode(nr);
332
        
333
        return sm;
334
    }
335
    
336
    private void printHeader(String s)
337
    {
338
        System.out.println("****** " + s + " *******");
339
    }
340
    
341
    /**
342
     * produce an md5 checksum for item
343
     */
344
    private String checksum(InputStream is)
345
      throws Exception
346
    {        
347
        byte[] buffer = new byte[1024];
348
        MessageDigest complete = MessageDigest.getInstance("MD5");
349
        int numRead;
350
        
351
        do 
352
        {
353
          numRead = is.read(buffer);
354
          if (numRead > 0) 
355
          {
356
            complete.update(buffer, 0, numRead);
357
          }
358
        } while (numRead != -1);
359
        
360
        
361
        return getHex(complete.digest());
362
    }
363
    
364
    /**
365
     * convert a byte array to a hex string
366
     */
367
    private static String getHex( byte [] raw ) 
368
    {
369
        final String HEXES = "0123456789ABCDEF";
370
        if ( raw == null ) {
371
          return null;
372
        }
373
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
374
        for ( final byte b : raw ) {
375
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
376
             .append(HEXES.charAt((b & 0x0F)));
377
        }
378
        return hex.toString();
379
    }
380
    
381
    /**
382
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
383
     * a proper date object
384
     * @param date
385
     * @return
386
     */
387
    private Date parseMetacatDate(String date)
388
    {
389
        String year = date.substring(0, 4);
390
        String month = date.substring(5, 7);
391
        String day = date.substring(8, 10);
392
        Calendar c = Calendar.getInstance();
393
        c.set(new Integer(year).intValue(), 
394
              new Integer(month).intValue(), 
395
              new Integer(day).intValue());
396
        return c.getTime();
397
    }
398

    
399
    /**
400
     * send a request to the resource
401
     */
402
    private InputStream sendRequest(String contextRootUrl, String resource, 
403
            String sessionid, String method, String urlParamaters, 
404
            String contentType, InputStream dataStream) 
405
        throws Exception 
406
    {
407
        
408
        HttpURLConnection connection = null ;
409
        String restURL = contextRootUrl + resource;
410

    
411
        if (urlParamaters != null) {
412
            if (restURL.indexOf("?") == -1)             
413
                restURL += "?";
414
            restURL += urlParamaters; 
415
            if(restURL.indexOf(" ") != -1)
416
            {
417
                restURL = restURL.replaceAll("\\s", "%20");
418
            }
419
        }
420
        
421
        if(sessionid != null)
422
        {
423
            if(restURL.indexOf("?") == -1)
424
            {
425
                restURL += "?sessionid=" + sessionid;
426
            }
427
            else
428
            {
429
                restURL += "&sessionid=" + sessionid;
430
            }
431
        }
432

    
433
        URL u = null;
434
        InputStream content = null;
435
        System.out.println("url: " + restURL);
436
        System.out.println("method: " + method);
437
        u = new URL(restURL);
438
        connection = (HttpURLConnection) u.openConnection();
439
        if (contentType!=null) {
440
            connection.setRequestProperty("Content-Type",contentType);
441
        }
442

    
443
        connection.setDoOutput(true);
444
        connection.setDoInput(true);
445
        connection.setRequestMethod(method);
446

    
447
        if (!method.equals("GET")) {
448
            if (dataStream != null) {
449
                OutputStream out = connection.getOutputStream();
450
                IOUtils.copy(dataStream, out);
451
            }
452
        }
453

    
454
        return connection.getInputStream();   
455
    }
456
    
457
    /**
458
     * create a mime multipart message from object and sysmeta
459
     */
460
    private MimeMultipart createMimeMultipart(InputStream object)
461
      throws Exception
462
    {
463
        final MimeMultipart mmp = new MimeMultipart();
464
        MimeBodyPart objectPart = new MimeBodyPart();
465
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
466
        objectPart.setFileName("doctext");
467
        DataSource ds = new InputStreamDataSource("doctext", object);
468
        DataHandler dh = new DataHandler(ds);
469
        objectPart.setDataHandler(dh);
470
        mmp.addBodyPart(objectPart);
471
        return mmp;
472
    }
473
    
474
    /**
475
     * parse a metacat query response and return a vector of docids
476
     * @param response
477
     * @return
478
     */
479
    private Vector<Document> parseResponse(String response)
480
    {
481
        Vector<Document> v = new Vector<Document>();
482
        int dstart = response.indexOf("<document>");
483
        int dend = response.indexOf("</document>", dstart);
484
        while(dstart != -1)
485
        {
486
            String doc = response.substring(dstart + "<document>".length(), dend);
487
            //System.out.println("adding " + docid);
488
            Document d = new Document(getFieldFromDoc(doc, "docid"),
489
                    getFieldFromDoc(doc, "doctype"),
490
                    getFieldFromDoc(doc, "createdate"),
491
                    getFieldFromDoc(doc, "updatedate"));
492
            v.add(d);
493
            dstart = response.indexOf("<document>", dend);
494
            dend = response.indexOf("</document>", dstart);
495
        }
496
        
497
        return v;
498
    }
499
    
500
    private String getFieldFromDoc(String doc, String fieldname)
501
    {
502
        String field = "<" + fieldname + ">";
503
        String fieldend = "</" + fieldname + ">";
504
        int start = doc.indexOf(field);
505
        int end = doc.indexOf(fieldend);
506
        String s = doc.substring(start + field.length(), end);
507
        //System.out.println("field: " + fieldname + " : " + s);
508
        return s;
509
    }
510
    
511
    /**
512
     * login the source
513
     * @return
514
     * @throws Exception
515
     */
516
    private String loginSource()
517
      throws Exception
518
    {
519
        return login(sourceUrl);
520
    }
521
    
522
    /**
523
     * login the destination
524
     * @return
525
     * @throws Exception
526
     */
527
    private String loginDest()
528
        throws Exception
529
    {
530
        return login(destinationUrl);
531
    }
532
    
533
    /**
534
     * returns a sessionid
535
     * @return
536
     */
537
    private String login(String sourceUrl)
538
      throws Exception
539
    {
540
        InputStream is = getResponse(sourceUrl, "/metacat", 
541
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
542
        "POST");
543
        String response = streamToString(is);
544
        //System.out.println("response: " + response);
545
        if(response.indexOf("sessionId") == -1)
546
        {
547
            throw new Exception("Error logging into " + sourceUrl);
548
        }
549
        
550
        String sessionid = response.substring(
551
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
552
                response.indexOf("</sessionId>"));
553
        System.out.println("sessionid: " + sessionid);
554
        return sessionid;
555
    }
556
    
557
    /**
558
     * logout both the source and destination
559
     * @throws Exception
560
     */
561
    private void logout()
562
        throws Exception
563
    {
564
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
565
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
566
    }
567
    
568
    /**
569
     * get an http response
570
     * @param contextRootUrl
571
     * @param resource
572
     * @param urlParameters
573
     * @param method
574
     * @return
575
     * @throws Exception
576
     */
577
    private InputStream getResponse(String contextRootUrl, String resource, 
578
            String urlParameters, String method)
579
      throws Exception
580
    {
581
        HttpURLConnection connection = null ;
582

    
583
        String restURL = contextRootUrl+resource;
584

    
585
        if (urlParameters != null) {
586
            if (restURL.indexOf("?") == -1)             
587
                restURL += "?";
588
            restURL += urlParameters; 
589
            if(restURL.indexOf(" ") != -1)
590
            {
591
                restURL = restURL.replaceAll("\\s", "%20");
592
            }
593
        }
594

    
595
        URL u = null;
596
        InputStream content = null;            
597
        System.out.println("url: " + restURL);
598
        System.out.println("method: " + method);
599
        u = new URL(restURL);
600
        connection = (HttpURLConnection) u.openConnection();
601
        connection.setDoOutput(true);
602
        connection.setDoInput(true);
603
        connection.setRequestMethod(method);
604
        content = connection.getInputStream();
605
        return content;
606
    }
607
    
608
    private String streamToString(InputStream is)
609
        throws Exception
610
    {
611
        byte b[] = new byte[1024];
612
        int numread = is.read(b, 0, 1024);
613
        String response = new String();
614
        while(numread != -1)
615
        {
616
            response += new String(b, 0, numread);
617
            numread = is.read(b, 0, 1024);
618
        }
619
        return response;
620
    }
621
    
622
    private InputStream stringToStream(String s)
623
      throws Exception
624
    {
625
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes());
626
        return bais;
627
    }
628
    
629
    private class Document
630
    {
631
        public String docid;
632
        public String doctype;
633
        public String createDate;
634
        public String updateDate;
635
        public String doctext;
636
        
637
        public Document(String docid, String doctype, String createDate, String updateDate)
638
        {
639
            this.docid = docid.trim();
640
            this.doctype = doctype.trim();
641
            this.createDate = createDate.trim();
642
            this.updateDate = updateDate.trim();
643
        }
644
    }
645
}
(8-8/15)