Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetaCatServlet;
45
import edu.ucsb.nceas.metacat.MetacatHandler;
46
import edu.ucsb.nceas.metacat.MetacatResultSet;
47
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
48
import edu.ucsb.nceas.metacat.dataone.CrudService;
49
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
50

    
51
import org.dataone.service.exceptions.InvalidSystemMetadata;
52
import org.dataone.service.exceptions.ServiceFailure;
53
import org.dataone.service.types.AuthToken;
54
import org.dataone.service.types.Checksum;
55
import org.dataone.service.types.ChecksumAlgorithm;
56
import org.dataone.service.types.NodeReference;
57
import org.dataone.service.types.ObjectFormat;
58
import org.dataone.service.types.Principal;
59
import org.dataone.service.types.SystemMetadata;
60
import org.dataone.service.types.Identifier;
61
import org.dataone.client.D1Client;
62
import org.dataone.client.MNode;
63
import org.dataone.eml.DataoneEMLParser;
64
import org.dataone.eml.EMLDocument;
65
import org.dataone.eml.EMLDocument.DistributionMetadata;
66

    
67
//import sun.tools.jstat.Identifier;
68

    
69
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
70

    
71
/**
72
 * @author berkley
73
 * A class to populate a metacat instance based on documents returned from a query
74
 */
75
public class MetacatPopulator
76
{
77
    private String sourceUrl = null;
78
    private String destinationUrl = null;
79
    private String query = null;
80
    private String username = null;
81
    private String password = null;
82
    
83
    /**
84
     * create a new MetacatPopulator with given source and destination urls.  
85
     * These should be
86
     * of the form "http://<url>/<metacat_instance>"
87
     * If username and/or password is null, the query will be run as public
88
     * @param sourceUrl
89
     * @param destUrl
90
     * @param query
91
     * @param username
92
     * @param password
93
     */
94
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
95
    {
96
        this.sourceUrl = sourceUrl;
97
        this.query = query;
98
        this.username = username;
99
        this.password = password;
100
        this.destinationUrl = destUrl;
101
    }
102
    
103
    /**
104
     * populate from the source
105
     */
106
    public void populate()
107
      throws Exception
108
    {
109
        printHeader("Source login");
110
        String sourceSessionid = loginSource();
111
        
112
        //do a query
113
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
114
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
115
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
116
        params += "action=query&";
117
        params += "qformat=xml&";
118
        params += "anyfield=" + query;
119
        
120
        printHeader("Searching source");
121
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
122
        InputStream is = getResponse(sourceUrl, "/metacat",
123
                params, "POST");
124
        String response = streamToString(is);
125
        //System.out.println("response: " + response);
126
        Vector<Document> docs = parseResponse(response);
127
        
128
        
129
        printHeader("Parsing source results");
130
        System.out.println("creating MN with url: " + destinationUrl + "/");
131
        MNode mn = D1Client.getMN(destinationUrl + "/");
132
        
133
        printHeader("Processing " + docs.size() + " results.");
134
        printHeader("logging in to the destination " + destinationUrl);
135
        AuthToken authtoken = mn.login(username, password);
136
        System.out.println("authtoken: " + authtoken.getToken());
137
        for(int i=0; i<docs.size(); i++)
138
        {
139
            //for each document in the query
140
            Document doc = docs.get(i);
141
            String docid = doc.docid;
142
            //get the doc from source
143
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
144
            params = "action=read&qformat=xml&docid=" + docid;
145
            is = getResponse(sourceUrl, "/metacat", params, "POST");
146
            String doctext = streamToString(is);
147
            System.out.println("doctext: " + doctext);
148
            is = stringToStream(doctext);
149
            //parse the document
150
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
151
            EMLDocument emld = parser.parseDocument(is);
152
            if(emld == null)
153
            {
154
                continue;
155
            }
156
            //go through the DistributionMetadata and download any described data
157
            
158
            is = stringToStream(doctext);
159
            doc.doctext = doctext;
160

    
161
            printHeader("creating document on destination " + destinationUrl);            
162
            SystemMetadata sysmeta = generateSystemMetadata(doc);
163
            for(int j=0; j<emld.distributionMetadata.size(); j++)
164
            {
165
                Identifier emlId = sysmeta.getIdentifier();
166
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
167
                String dataDocUrl = dm.url;
168
                String dataDocMimeType = dm.mimeType;
169
                String dataDocLocalId = "";
170
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
171
                { //we only handle ecogrid urls right now
172
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
173
                            "ecogrid://knb/".length(), dataDocUrl.length());
174
                    //get the file
175
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
176
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
177
                    String dataDocText = streamToString(dataDocIs);
178
                    
179
                    //set the id
180
                    Identifier did = new Identifier();
181
                    did.setValue(dataDocLocalId);
182
                    
183
                    //add the desribeby to the eml's sysmeta
184
                    System.out.println("adding describe for doc " + 
185
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
186
                    sysmeta.addDescribe(did);
187
                    
188
                    //create sysmeta for the data doc                    
189
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
190
                    //overwrite the bogus values from the last call 
191
                    dataDocSysMeta.setIdentifier(did);
192
                    dataDocSysMeta.setObjectFormat(ObjectFormat.convert(dataDocMimeType));
193
                    Checksum checksum = new Checksum();
194
                    dataDocIs = stringToStream(dataDocText);
195
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
196
                    checksum.setAlgorithm(ca);
197
                    checksum.setValue(checksum(dataDocIs));
198
                    dataDocSysMeta.setChecksum(checksum);
199
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
200
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
201
                    boolean error = false;
202
                    //create the data doc on d1
203
                    try
204
                    {
205
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
206
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
207
                    }
208
                    catch(Exception e)
209
                    {
210
                        error = true;
211
                        System.out.println("ERROR: Could not create data document with id " + 
212
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
213
                    }
214
                    finally
215
                    {
216
                        if(error)
217
                        {
218
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
219
                                    "FAILED.");
220
                        }
221
                        else
222
                        {
223
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
224
                                " which is described by " + sysmeta.getIdentifier().getValue());
225
                        }
226
                    }
227
                }
228
                else
229
                {
230
                    System.out.println("WARNING: Could not process describes url " +
231
                            dataDocUrl + " for document " + doc.docid + 
232
                    ".  Only ecogrid://knb/ urls are currently supported.");
233
                }
234
            }
235
            
236
            try
237
            {
238
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
239
                    IOUtils.toInputStream(doc.doctext), sysmeta);
240
              System.out.println("Success inserting document " + id.getValue());
241
              
242
            }
243
            catch(Exception e)
244
            {
245
                e.printStackTrace();
246
                System.out.println("Could not create document with id " + 
247
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
248
                
249
            }
250
            finally
251
            {
252
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
253
            }
254
        }
255
        
256
        logout();
257
    }
258
    
259
    /**
260
     * create the documents listed by an eml document as described in the 
261
     * new system
262
     * @param doc
263
     * @param emld
264
     */
265
    private void createDescribedDocuments(Document doc, EMLDocument emld)
266
    {
267
        
268
    }
269
    
270
    /**
271
     * @param doc
272
     * @return
273
     */
274
    private SystemMetadata generateSystemMetadata(Document doc)
275
      throws Exception
276
    {
277
        SystemMetadata sm = new SystemMetadata();
278
        //set the id
279
        Identifier id = new Identifier();
280
        id.setValue(doc.docid.trim());
281
        sm.setIdentifier(id);
282
        
283
        //set the object format
284
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
285
        if(format == null)
286
        {
287
            if(doc.doctype.trim().equals("BIN"))
288
            {
289
                format = ObjectFormat.OCTET_STREAM;
290
            }
291
            else
292
            {
293
                format = ObjectFormat.TEXT_PLAIN;
294
            }
295
        }
296
        sm.setObjectFormat(format);
297
        
298
        //create the checksum
299
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
300
        String checksumS = checksum(bais);
301
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
302
        Checksum checksum = new Checksum();
303
        checksum.setValue(checksumS);
304
        checksum.setAlgorithm(ca);
305
        sm.setChecksum(checksum);
306
        
307
        //set the size
308
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
309
        
310
        //submitter
311
        Principal p = new Principal();
312
        p.setValue("unknown");
313
        sm.setSubmitter(p);
314
        sm.setRightsHolder(p);
315
        try
316
        {
317
            Date dateCreated = parseMetacatDate(doc.createDate);
318
            sm.setDateUploaded(dateCreated);
319
            Date dateUpdated = parseMetacatDate(doc.updateDate);
320
            sm.setDateSysMetadataModified(dateUpdated);
321
        }
322
        catch(Exception e)
323
        {
324
            System.out.println("couldn't parse a date: " + e.getMessage());
325
            Date dateCreated = new Date();
326
            sm.setDateUploaded(dateCreated);
327
            Date dateUpdated = new Date();
328
            sm.setDateSysMetadataModified(dateUpdated);
329
        }
330
        NodeReference nr = new NodeReference();
331
        nr.setValue("KNB");
332
        sm.setOriginMemberNode(nr);
333
        sm.setAuthoritativeMemberNode(nr);
334
        
335
        return sm;
336
    }
337
    
338
    private void printHeader(String s)
339
    {
340
        System.out.println("****** " + s + " *******");
341
    }
342
    
343
    /**
344
     * produce an md5 checksum for item
345
     */
346
    private String checksum(InputStream is)
347
      throws Exception
348
    {        
349
        byte[] buffer = new byte[1024];
350
        MessageDigest complete = MessageDigest.getInstance("MD5");
351
        int numRead;
352
        
353
        do 
354
        {
355
          numRead = is.read(buffer);
356
          if (numRead > 0) 
357
          {
358
            complete.update(buffer, 0, numRead);
359
          }
360
        } while (numRead != -1);
361
        
362
        
363
        return getHex(complete.digest());
364
    }
365
    
366
    /**
367
     * convert a byte array to a hex string
368
     */
369
    private static String getHex( byte [] raw ) 
370
    {
371
        final String HEXES = "0123456789ABCDEF";
372
        if ( raw == null ) {
373
          return null;
374
        }
375
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
376
        for ( final byte b : raw ) {
377
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
378
             .append(HEXES.charAt((b & 0x0F)));
379
        }
380
        return hex.toString();
381
    }
382
    
383
    /**
384
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
385
     * a proper date object
386
     * @param date
387
     * @return
388
     */
389
    private Date parseMetacatDate(String date)
390
    {
391
        String year = date.substring(0, 4);
392
        String month = date.substring(5, 7);
393
        String day = date.substring(8, 10);
394
        Calendar c = Calendar.getInstance();
395
        c.set(new Integer(year).intValue(), 
396
              new Integer(month).intValue(), 
397
              new Integer(day).intValue());
398
        return c.getTime();
399
    }
400

    
401
    /**
402
     * send a request to the resource
403
     */
404
    private InputStream sendRequest(String contextRootUrl, String resource, 
405
            String sessionid, String method, String urlParamaters, 
406
            String contentType, InputStream dataStream) 
407
        throws Exception 
408
    {
409
        
410
        HttpURLConnection connection = null ;
411
        String restURL = contextRootUrl + resource;
412

    
413
        if (urlParamaters != null) {
414
            if (restURL.indexOf("?") == -1)             
415
                restURL += "?";
416
            restURL += urlParamaters; 
417
            if(restURL.indexOf(" ") != -1)
418
            {
419
                restURL = restURL.replaceAll("\\s", "%20");
420
            }
421
        }
422
        
423
        if(sessionid != null)
424
        {
425
            if(restURL.indexOf("?") == -1)
426
            {
427
                restURL += "?sessionid=" + sessionid;
428
            }
429
            else
430
            {
431
                restURL += "&sessionid=" + sessionid;
432
            }
433
        }
434

    
435
        URL u = null;
436
        InputStream content = null;
437
        System.out.println("url: " + restURL);
438
        System.out.println("method: " + method);
439
        u = new URL(restURL);
440
        connection = (HttpURLConnection) u.openConnection();
441
        if (contentType!=null) {
442
            connection.setRequestProperty("Content-Type",contentType);
443
        }
444

    
445
        connection.setDoOutput(true);
446
        connection.setDoInput(true);
447
        connection.setRequestMethod(method);
448

    
449
        if (!method.equals("GET")) {
450
            if (dataStream != null) {
451
                OutputStream out = connection.getOutputStream();
452
                IOUtils.copy(dataStream, out);
453
            }
454
        }
455

    
456
        return connection.getInputStream();   
457
    }
458
    
459
    /**
460
     * create a mime multipart message from object and sysmeta
461
     */
462
    private MimeMultipart createMimeMultipart(InputStream object)
463
      throws Exception
464
    {
465
        final MimeMultipart mmp = new MimeMultipart();
466
        MimeBodyPart objectPart = new MimeBodyPart();
467
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
468
        objectPart.setFileName("doctext");
469
        DataSource ds = new InputStreamDataSource("doctext", object);
470
        DataHandler dh = new DataHandler(ds);
471
        objectPart.setDataHandler(dh);
472
        mmp.addBodyPart(objectPart);
473
        return mmp;
474
    }
475
    
476
    /**
477
     * parse a metacat query response and return a vector of docids
478
     * @param response
479
     * @return
480
     */
481
    private Vector<Document> parseResponse(String response)
482
    {
483
        Vector<Document> v = new Vector<Document>();
484
        int dstart = response.indexOf("<document>");
485
        int dend = response.indexOf("</document>", dstart);
486
        while(dstart != -1)
487
        {
488
            String doc = response.substring(dstart + "<document>".length(), dend);
489
            //System.out.println("adding " + docid);
490
            Document d = new Document(getFieldFromDoc(doc, "docid"),
491
                    getFieldFromDoc(doc, "doctype"),
492
                    getFieldFromDoc(doc, "createdate"),
493
                    getFieldFromDoc(doc, "updatedate"));
494
            v.add(d);
495
            dstart = response.indexOf("<document>", dend);
496
            dend = response.indexOf("</document>", dstart);
497
        }
498
        
499
        return v;
500
    }
501
    
502
    private String getFieldFromDoc(String doc, String fieldname)
503
    {
504
        String field = "<" + fieldname + ">";
505
        String fieldend = "</" + fieldname + ">";
506
        int start = doc.indexOf(field);
507
        int end = doc.indexOf(fieldend);
508
        String s = doc.substring(start + field.length(), end);
509
        //System.out.println("field: " + fieldname + " : " + s);
510
        return s;
511
    }
512
    
513
    /**
514
     * login the source
515
     * @return
516
     * @throws Exception
517
     */
518
    private String loginSource()
519
      throws Exception
520
    {
521
        return login(sourceUrl);
522
    }
523
    
524
    /**
525
     * login the destination
526
     * @return
527
     * @throws Exception
528
     */
529
    private String loginDest()
530
        throws Exception
531
    {
532
        return login(destinationUrl);
533
    }
534
    
535
    /**
536
     * returns a sessionid
537
     * @return
538
     */
539
    private String login(String sourceUrl)
540
      throws Exception
541
    {
542
        InputStream is = getResponse(sourceUrl, "/metacat", 
543
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
544
        "POST");
545
        String response = streamToString(is);
546
        //System.out.println("response: " + response);
547
        if(response.indexOf("sessionId") == -1)
548
        {
549
            throw new Exception("Error logging into " + sourceUrl);
550
        }
551
        
552
        String sessionid = response.substring(
553
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
554
                response.indexOf("</sessionId>"));
555
        System.out.println("sessionid: " + sessionid);
556
        return sessionid;
557
    }
558
    
559
    /**
560
     * logout both the source and destination
561
     * @throws Exception
562
     */
563
    private void logout()
564
        throws Exception
565
    {
566
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
567
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
568
    }
569
    
570
    /**
571
     * get an http response
572
     * @param contextRootUrl
573
     * @param resource
574
     * @param urlParameters
575
     * @param method
576
     * @return
577
     * @throws Exception
578
     */
579
    private InputStream getResponse(String contextRootUrl, String resource, 
580
            String urlParameters, String method)
581
      throws Exception
582
    {
583
        HttpURLConnection connection = null ;
584

    
585
        String restURL = contextRootUrl+resource;
586

    
587
        if (urlParameters != null) {
588
            if (restURL.indexOf("?") == -1)             
589
                restURL += "?";
590
            restURL += urlParameters; 
591
            if(restURL.indexOf(" ") != -1)
592
            {
593
                restURL = restURL.replaceAll("\\s", "%20");
594
            }
595
        }
596

    
597
        URL u = null;
598
        InputStream content = null;            
599
        System.out.println("url: " + restURL);
600
        System.out.println("method: " + method);
601
        u = new URL(restURL);
602
        connection = (HttpURLConnection) u.openConnection();
603
        connection.setDoOutput(true);
604
        connection.setDoInput(true);
605
        connection.setRequestMethod(method);
606
        content = connection.getInputStream();
607
        return content;
608
    }
609
    
610
    private String streamToString(InputStream is)
611
        throws Exception
612
    {
613
        byte b[] = new byte[1024];
614
        int numread = is.read(b, 0, 1024);
615
        String response = new String();
616
        while(numread != -1)
617
        {
618
            response += new String(b, 0, numread);
619
            numread = is.read(b, 0, 1024);
620
        }
621
        return response;
622
    }
623
    
624
    private InputStream stringToStream(String s)
625
      throws Exception
626
    {
627
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
628
        return bais;
629
    }
630
    
631
    private class Document
632
    {
633
        public String docid;
634
        public String doctype;
635
        public String createDate;
636
        public String updateDate;
637
        public String doctext;
638
        
639
        public Document(String docid, String doctype, String createDate, String updateDate)
640
        {
641
            this.docid = docid.trim();
642
            this.doctype = doctype.trim();
643
            this.createDate = createDate.trim();
644
            this.updateDate = updateDate.trim();
645
        }
646
    }
647
}
(8-8/15)