Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetaCatServlet;
45
import edu.ucsb.nceas.metacat.MetacatHandler;
46
import edu.ucsb.nceas.metacat.MetacatResultSet;
47
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
48
import edu.ucsb.nceas.metacat.dataone.CrudService;
49
import edu.ucsb.nceas.metacat.properties.PropertyService;
50
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
51
import edu.ucsb.nceas.metacat.service.ObjectFormatService;
52

    
53
import org.dataone.service.exceptions.InvalidSystemMetadata;
54
import org.dataone.service.exceptions.ServiceFailure;
55
import org.dataone.service.types.AuthToken;
56
import org.dataone.service.types.Checksum;
57
import org.dataone.service.types.ChecksumAlgorithm;
58
import org.dataone.service.types.NodeReference;
59
import org.dataone.service.types.ObjectFormat;
60
import org.dataone.service.types.Principal;
61
import org.dataone.service.types.SystemMetadata;
62
import org.dataone.service.types.Identifier;
63
import org.dataone.client.D1Client;
64
import org.dataone.client.MNode;
65
import org.dataone.eml.DataoneEMLParser;
66
import org.dataone.eml.EMLDocument;
67
import org.dataone.eml.EMLDocument.DistributionMetadata;
68

    
69
//import sun.tools.jstat.Identifier;
70

    
71
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
72

    
73
/**
74
 * @author berkley
75
 * A class to populate a metacat instance based on documents returned from a query
76
 */
77
public class MetacatPopulator
78
{
79
    private String sourceUrl = null;
80
    private String destinationUrl = null;
81
    private String query = null;
82
    private String username = null;
83
    private String password = null;
84
    
85
    /**
86
     * create a new MetacatPopulator with given source and destination urls.  
87
     * These should be
88
     * of the form "http://<url>/<metacat_instance>"
89
     * If username and/or password is null, the query will be run as public
90
     * @param sourceUrl
91
     * @param destUrl
92
     * @param query
93
     * @param username
94
     * @param password
95
     */
96
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
97
    {
98
        this.sourceUrl = sourceUrl;
99
        this.query = query;
100
        this.username = username;
101
        this.password = password;
102
        this.destinationUrl = destUrl;
103
    }
104
    
105
    /**
106
     * populate from the source
107
     */
108
    public void populate()
109
      throws Exception
110
    {
111
        printHeader("Source login");
112
        String sourceSessionid = loginSource();
113
        
114
        //do a query
115
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
116
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
117
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
118
        params += "action=query&";
119
        params += "qformat=xml&";
120
        params += "anyfield=" + query;
121
        
122
        printHeader("Searching source");
123
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
124
        InputStream is = getResponse(sourceUrl, "/metacat",
125
                params, "POST");
126
        String response = streamToString(is);
127
        //System.out.println("response: " + response);
128
        Vector<Document> docs = parseResponse(response);
129
        
130
        
131
        printHeader("Parsing source results");
132
        System.out.println("creating MN with url: " + destinationUrl + "/");
133
        MNode mn = D1Client.getMN(destinationUrl + "/");
134
        
135
        printHeader("Processing " + docs.size() + " results.");
136
        printHeader("logging in to the destination " + destinationUrl);
137
        AuthToken authtoken = mn.login(username, password);
138
        System.out.println("authtoken: " + authtoken.getToken());
139
        for(int i=0; i<docs.size(); i++)
140
        {
141
            //for each document in the query
142
            Document doc = docs.get(i);
143
            String docid = doc.docid;
144
            //get the doc from source
145
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
146
            params = "action=read&qformat=xml&docid=" + docid;
147
            is = getResponse(sourceUrl, "/metacat", params, "POST");
148
            String doctext = streamToString(is);
149
            System.out.println("doctext: " + doctext);
150
            is = stringToStream(doctext);
151
            //parse the document
152
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
153
            EMLDocument emld = parser.parseDocument(is);
154
            if(emld == null)
155
            {
156
                continue;
157
            }
158
            //go through the DistributionMetadata and download any described data
159
            
160
            is = stringToStream(doctext);
161
            doc.doctext = doctext;
162

    
163
            printHeader("creating document on destination " + destinationUrl);            
164
            SystemMetadata sysmeta = generateSystemMetadata(doc);
165
            for(int j=0; j<emld.distributionMetadata.size(); j++)
166
            {
167
                Identifier emlId = sysmeta.getIdentifier();
168
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
169
                String dataDocUrl = dm.url;
170
                String dataDocMimeType = dm.mimeType;
171
                String dataDocLocalId = "";
172
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
173
                { //we only handle ecogrid urls right now
174
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
175
                            "ecogrid://knb/".length(), dataDocUrl.length());
176
                    //get the file
177
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
178
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
179
                    String dataDocText = streamToString(dataDocIs);
180
                    
181
                    //set the id
182
                    Identifier did = new Identifier();
183
                    did.setValue(dataDocLocalId);
184
                    
185
                    //add the desribeby to the eml's sysmeta
186
                    System.out.println("adding describe for doc " + 
187
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
188
                    sysmeta.addDescribe(did);
189
                    
190
                    //create sysmeta for the data doc                    
191
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
192
                    //overwrite the bogus values from the last call 
193
                    dataDocSysMeta.setIdentifier(did);
194
                    dataDocSysMeta.setObjectFormat(ObjectFormatService.getFormat(dataDocMimeType));
195
                    Checksum checksum = new Checksum();
196
                    dataDocIs = stringToStream(dataDocText);
197
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
198
                    checksum.setAlgorithm(ca);
199
                    checksum.setValue(checksum(dataDocIs));
200
                    dataDocSysMeta.setChecksum(checksum);
201
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
202
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
203
                    boolean error = false;
204
                    //create the data doc on d1
205
                    try
206
                    {
207
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
208
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
209
                    }
210
                    catch(Exception e)
211
                    {
212
                        error = true;
213
                        System.out.println("ERROR: Could not create data document with id " + 
214
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
215
                    }
216
                    finally
217
                    {
218
                        if(error)
219
                        {
220
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
221
                                    "FAILED.");
222
                        }
223
                        else
224
                        {
225
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
226
                                " which is described by " + sysmeta.getIdentifier().getValue());
227
                        }
228
                    }
229
                }
230
                else
231
                {
232
                    System.out.println("WARNING: Could not process describes url " +
233
                            dataDocUrl + " for document " + doc.docid + 
234
                    ".  Only ecogrid://knb/ urls are currently supported.");
235
                }
236
            }
237
            
238
            try
239
            {
240
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
241
                    IOUtils.toInputStream(doc.doctext), sysmeta);
242
              System.out.println("Success inserting document " + id.getValue());
243
              
244
            }
245
            catch(Exception e)
246
            {
247
                e.printStackTrace();
248
                System.out.println("Could not create document with id " + 
249
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
250
                
251
            }
252
            finally
253
            {
254
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
255
            }
256
        }
257
        
258
        logout();
259
    }
260
    
261
    /**
262
     * create the documents listed by an eml document as described in the 
263
     * new system
264
     * @param doc
265
     * @param emld
266
     */
267
    private void createDescribedDocuments(Document doc, EMLDocument emld)
268
    {
269
        
270
    }
271
    
272
    /**
273
     * @param doc
274
     * @return
275
     */
276
    private SystemMetadata generateSystemMetadata(Document doc)
277
      throws Exception
278
    {
279
        SystemMetadata sm = new SystemMetadata();
280
        //set the id
281
        Identifier id = new Identifier();
282
        id.setValue(doc.docid.trim());
283
        sm.setIdentifier(id);
284
        
285
        //set the object format
286
        ObjectFormat format = ObjectFormatService.getFormat(doc.doctype);
287
        if(format == null)
288
        {
289
            if(doc.doctype.trim().equals("BIN"))
290
            {
291
                format = ObjectFormatService.getFormat("application/octet-stream");
292
            }
293
            else
294
            {
295
                format = ObjectFormatService.getFormat("text/plain");
296
            }
297
        }
298
        sm.setObjectFormat(format);
299
        
300
        //create the checksum
301
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
302
        String checksumS = checksum(bais);
303
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
304
        Checksum checksum = new Checksum();
305
        checksum.setValue(checksumS);
306
        checksum.setAlgorithm(ca);
307
        sm.setChecksum(checksum);
308
        
309
        //set the size
310
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
311
        
312
        //submitter
313
        Principal p = new Principal();
314
        p.setValue("unknown");
315
        sm.setSubmitter(p);
316
        sm.setRightsHolder(p);
317
        try
318
        {
319
            Date dateCreated = parseMetacatDate(doc.createDate);
320
            sm.setDateUploaded(dateCreated);
321
            Date dateUpdated = parseMetacatDate(doc.updateDate);
322
            sm.setDateSysMetadataModified(dateUpdated);
323
        }
324
        catch(Exception e)
325
        {
326
            System.out.println("couldn't parse a date: " + e.getMessage());
327
            Date dateCreated = new Date();
328
            sm.setDateUploaded(dateCreated);
329
            Date dateUpdated = new Date();
330
            sm.setDateSysMetadataModified(dateUpdated);
331
        }
332
        NodeReference nr = new NodeReference();
333
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
334
        sm.setOriginMemberNode(nr);
335
        sm.setAuthoritativeMemberNode(nr);
336
        
337
        return sm;
338
    }
339
    
340
    private void printHeader(String s)
341
    {
342
        System.out.println("****** " + s + " *******");
343
    }
344
    
345
    /**
346
     * produce an md5 checksum for item
347
     */
348
    private String checksum(InputStream is)
349
      throws Exception
350
    {        
351
        byte[] buffer = new byte[1024];
352
        MessageDigest complete = MessageDigest.getInstance("MD5");
353
        int numRead;
354
        
355
        do 
356
        {
357
          numRead = is.read(buffer);
358
          if (numRead > 0) 
359
          {
360
            complete.update(buffer, 0, numRead);
361
          }
362
        } while (numRead != -1);
363
        
364
        
365
        return getHex(complete.digest());
366
    }
367
    
368
    /**
369
     * convert a byte array to a hex string
370
     */
371
    private static String getHex( byte [] raw ) 
372
    {
373
        final String HEXES = "0123456789ABCDEF";
374
        if ( raw == null ) {
375
          return null;
376
        }
377
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
378
        for ( final byte b : raw ) {
379
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
380
             .append(HEXES.charAt((b & 0x0F)));
381
        }
382
        return hex.toString();
383
    }
384
    
385
    /**
386
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
387
     * a proper date object
388
     * @param date
389
     * @return
390
     */
391
    private Date parseMetacatDate(String date)
392
    {
393
        String year = date.substring(0, 4);
394
        String month = date.substring(5, 7);
395
        String day = date.substring(8, 10);
396
        Calendar c = Calendar.getInstance();
397
        c.set(new Integer(year).intValue(), 
398
              new Integer(month).intValue(), 
399
              new Integer(day).intValue());
400
        return c.getTime();
401
    }
402

    
403
    /**
404
     * send a request to the resource
405
     */
406
    private InputStream sendRequest(String contextRootUrl, String resource, 
407
            String sessionid, String method, String urlParamaters, 
408
            String contentType, InputStream dataStream) 
409
        throws Exception 
410
    {
411
        
412
        HttpURLConnection connection = null ;
413
        String restURL = contextRootUrl + resource;
414

    
415
        if (urlParamaters != null) {
416
            if (restURL.indexOf("?") == -1)             
417
                restURL += "?";
418
            restURL += urlParamaters; 
419
            if(restURL.indexOf(" ") != -1)
420
            {
421
                restURL = restURL.replaceAll("\\s", "%20");
422
            }
423
        }
424
        
425
        if(sessionid != null)
426
        {
427
            if(restURL.indexOf("?") == -1)
428
            {
429
                restURL += "?sessionid=" + sessionid;
430
            }
431
            else
432
            {
433
                restURL += "&sessionid=" + sessionid;
434
            }
435
        }
436

    
437
        URL u = null;
438
        InputStream content = null;
439
        System.out.println("url: " + restURL);
440
        System.out.println("method: " + method);
441
        u = new URL(restURL);
442
        connection = (HttpURLConnection) u.openConnection();
443
        if (contentType!=null) {
444
            connection.setRequestProperty("Content-Type",contentType);
445
        }
446

    
447
        connection.setDoOutput(true);
448
        connection.setDoInput(true);
449
        connection.setRequestMethod(method);
450

    
451
        if (!method.equals("GET")) {
452
            if (dataStream != null) {
453
                OutputStream out = connection.getOutputStream();
454
                IOUtils.copy(dataStream, out);
455
            }
456
        }
457

    
458
        return connection.getInputStream();   
459
    }
460
    
461
    /**
462
     * create a mime multipart message from object and sysmeta
463
     */
464
    private MimeMultipart createMimeMultipart(InputStream object)
465
      throws Exception
466
    {
467
        final MimeMultipart mmp = new MimeMultipart();
468
        MimeBodyPart objectPart = new MimeBodyPart();
469
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
470
        objectPart.setFileName("doctext");
471
        DataSource ds = new InputStreamDataSource("doctext", object);
472
        DataHandler dh = new DataHandler(ds);
473
        objectPart.setDataHandler(dh);
474
        mmp.addBodyPart(objectPart);
475
        return mmp;
476
    }
477
    
478
    /**
479
     * parse a metacat query response and return a vector of docids
480
     * @param response
481
     * @return
482
     */
483
    private Vector<Document> parseResponse(String response)
484
    {
485
        Vector<Document> v = new Vector<Document>();
486
        int dstart = response.indexOf("<document>");
487
        int dend = response.indexOf("</document>", dstart);
488
        while(dstart != -1)
489
        {
490
            String doc = response.substring(dstart + "<document>".length(), dend);
491
            //System.out.println("adding " + docid);
492
            Document d = new Document(getFieldFromDoc(doc, "docid"),
493
                    getFieldFromDoc(doc, "doctype"),
494
                    getFieldFromDoc(doc, "createdate"),
495
                    getFieldFromDoc(doc, "updatedate"));
496
            v.add(d);
497
            dstart = response.indexOf("<document>", dend);
498
            dend = response.indexOf("</document>", dstart);
499
        }
500
        
501
        return v;
502
    }
503
    
504
    private String getFieldFromDoc(String doc, String fieldname)
505
    {
506
        String field = "<" + fieldname + ">";
507
        String fieldend = "</" + fieldname + ">";
508
        int start = doc.indexOf(field);
509
        int end = doc.indexOf(fieldend);
510
        String s = doc.substring(start + field.length(), end);
511
        //System.out.println("field: " + fieldname + " : " + s);
512
        return s;
513
    }
514
    
515
    /**
516
     * login the source
517
     * @return
518
     * @throws Exception
519
     */
520
    private String loginSource()
521
      throws Exception
522
    {
523
        return login(sourceUrl);
524
    }
525
    
526
    /**
527
     * login the destination
528
     * @return
529
     * @throws Exception
530
     */
531
    private String loginDest()
532
        throws Exception
533
    {
534
        return login(destinationUrl);
535
    }
536
    
537
    /**
538
     * returns a sessionid
539
     * @return
540
     */
541
    private String login(String sourceUrl)
542
      throws Exception
543
    {
544
        InputStream is = getResponse(sourceUrl, "/metacat", 
545
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
546
        "POST");
547
        String response = streamToString(is);
548
        //System.out.println("response: " + response);
549
        if(response.indexOf("sessionId") == -1)
550
        {
551
            throw new Exception("Error logging into " + sourceUrl);
552
        }
553
        
554
        String sessionid = response.substring(
555
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
556
                response.indexOf("</sessionId>"));
557
        System.out.println("sessionid: " + sessionid);
558
        return sessionid;
559
    }
560
    
561
    /**
562
     * logout both the source and destination
563
     * @throws Exception
564
     */
565
    private void logout()
566
        throws Exception
567
    {
568
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
569
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
570
    }
571
    
572
    /**
573
     * get an http response
574
     * @param contextRootUrl
575
     * @param resource
576
     * @param urlParameters
577
     * @param method
578
     * @return
579
     * @throws Exception
580
     */
581
    private InputStream getResponse(String contextRootUrl, String resource, 
582
            String urlParameters, String method)
583
      throws Exception
584
    {
585
        HttpURLConnection connection = null ;
586

    
587
        String restURL = contextRootUrl+resource;
588

    
589
        if (urlParameters != null) {
590
            if (restURL.indexOf("?") == -1)             
591
                restURL += "?";
592
            restURL += urlParameters; 
593
            if(restURL.indexOf(" ") != -1)
594
            {
595
                restURL = restURL.replaceAll("\\s", "%20");
596
            }
597
        }
598

    
599
        URL u = null;
600
        InputStream content = null;            
601
        System.out.println("url: " + restURL);
602
        System.out.println("method: " + method);
603
        u = new URL(restURL);
604
        connection = (HttpURLConnection) u.openConnection();
605
        connection.setDoOutput(true);
606
        connection.setDoInput(true);
607
        connection.setRequestMethod(method);
608
        content = connection.getInputStream();
609
        return content;
610
    }
611
    
612
    private String streamToString(InputStream is)
613
        throws Exception
614
    {
615
        byte b[] = new byte[1024];
616
        int numread = is.read(b, 0, 1024);
617
        String response = new String();
618
        while(numread != -1)
619
        {
620
            response += new String(b, 0, numread);
621
            numread = is.read(b, 0, 1024);
622
        }
623
        return response;
624
    }
625
    
626
    private InputStream stringToStream(String s)
627
      throws Exception
628
    {
629
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
630
        return bais;
631
    }
632
    
633
    private class Document
634
    {
635
        public String docid;
636
        public String doctype;
637
        public String createDate;
638
        public String updateDate;
639
        public String doctext;
640
        
641
        public Document(String docid, String doctype, String createDate, String updateDate)
642
        {
643
            this.docid = docid.trim();
644
            this.doctype = doctype.trim();
645
            this.createDate = createDate.trim();
646
            this.updateDate = updateDate.trim();
647
        }
648
    }
649
}
(8-8/15)