Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetaCatServlet;
45
import edu.ucsb.nceas.metacat.MetacatHandler;
46
import edu.ucsb.nceas.metacat.MetacatResultSet;
47
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
48
import edu.ucsb.nceas.metacat.dataone.CrudService;
49
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
50

    
51
import org.dataone.service.exceptions.InvalidSystemMetadata;
52
import org.dataone.service.exceptions.ServiceFailure;
53
import org.dataone.service.types.AuthToken;
54
import org.dataone.service.types.Checksum;
55
import org.dataone.service.types.ChecksumAlgorithm;
56
import org.dataone.service.types.NodeReference;
57
import org.dataone.service.types.ObjectFormat;
58
import org.dataone.service.types.Principal;
59
import org.dataone.service.types.SystemMetadata;
60
import org.dataone.service.types.Identifier;
61
import org.dataone.client.D1Client;
62
import org.dataone.client.MNode;
63
import org.dataone.eml.DataoneEMLParser;
64
import org.dataone.eml.EMLDocument;
65
import org.dataone.eml.EMLDocument.DistributionMetadata;
66

    
67
//import sun.tools.jstat.Identifier;
68

    
69
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
70

    
71
/**
72
 * @author berkley
73
 * A class to populate a metacat instance based on documents returned from a query
74
 */
75
public class MetacatPopulator
76
{
77
    private String sourceUrl = null;
78
    private String destinationUrl = null;
79
    private String query = null;
80
    private String username = null;
81
    private String password = null;
82
    
83
    /**
84
     * create a new MetacatPopulator with given source and destination urls.  
85
     * These should be
86
     * of the form "http://<url>/<metacat_instance>"
87
     * If username and/or password is null, the query will be run as public
88
     * @param sourceUrl
89
     * @param destUrl
90
     * @param query
91
     * @param username
92
     * @param password
93
     */
94
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
95
    {
96
        this.sourceUrl = sourceUrl;
97
        this.query = query;
98
        this.username = username;
99
        this.password = password;
100
        this.destinationUrl = destUrl;
101
    }
102
    
103
    /**
104
     * populate from the source
105
     */
106
    public void populate()
107
      throws Exception
108
    {
109
        printHeader("Source login");
110
        String sourceSessionid = loginSource();
111
        
112
        //do a query
113
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
114
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
115
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
116
        params += "action=query&";
117
        params += "qformat=xml&";
118
        params += "anyfield=" + query;
119
        
120
        printHeader("Searching source");
121
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
122
        InputStream is = getResponse(sourceUrl, "/metacat",
123
                params, "POST");
124
        String response = streamToString(is);
125
        //System.out.println("response: " + response);
126
        Vector<Document> docs = parseResponse(response);
127
        
128
        
129
        printHeader("Parsing source results");
130
        System.out.println("creating MN with url: " + destinationUrl + "/");
131
        MNode mn = D1Client.getMN(destinationUrl + "/");
132
        
133
        printHeader("Processing " + docs.size() + " results.");
134
        printHeader("logging in to the destination " + destinationUrl);
135
        AuthToken authtoken = mn.login(username, password);
136
        for(int i=0; i<docs.size(); i++)
137
        {
138
            //for each document in the query
139
            Document doc = docs.get(i);
140
            String docid = doc.docid;
141
            //get the doc from source
142
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
143
            params = "action=read&qformat=xml&docid=" + docid;
144
            is = getResponse(sourceUrl, "/metacat", params, "POST");
145
            String doctext = streamToString(is);
146
            System.out.println("doctext: " + doctext);
147
            is = stringToStream(doctext);
148
            //parse the document
149
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
150
            EMLDocument emld = parser.parseDocument(is);
151
            if(emld == null)
152
            {
153
                continue;
154
            }
155
            //go through the DistributionMetadata and download any described data
156
            
157
            is = stringToStream(doctext);
158
            doc.doctext = doctext;
159

    
160
            printHeader("creating document on destination " + destinationUrl);            
161
            SystemMetadata sysmeta = generateSystemMetadata(doc);
162
            for(int j=0; j<emld.distributionMetadata.size(); j++)
163
            {
164
                Identifier emlId = sysmeta.getIdentifier();
165
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
166
                String dataDocUrl = dm.url;
167
                String dataDocMimeType = dm.mimeType;
168
                String dataDocLocalId = "";
169
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
170
                { //we only handle ecogrid urls right now
171
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
172
                            "ecogrid://knb/".length(), dataDocUrl.length());
173
                    //get the file
174
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
175
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
176
                    String dataDocText = streamToString(dataDocIs);
177
                    
178
                    //set the id
179
                    Identifier did = new Identifier();
180
                    did.setValue(dataDocLocalId);
181
                    
182
                    //add the desribeby to the eml's sysmeta
183
                    System.out.println("adding describe for doc " + 
184
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
185
                    sysmeta.addDescribe(did);
186
                    
187
                    //create sysmeta for the data doc                    
188
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
189
                    //overwrite the bogus values from the last call 
190
                    dataDocSysMeta.setIdentifier(did);
191
                    dataDocSysMeta.setObjectFormat(ObjectFormat.convert(dataDocMimeType));
192
                    Checksum checksum = new Checksum();
193
                    dataDocIs = stringToStream(dataDocText);
194
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
195
                    checksum.setAlgorithm(ca);
196
                    checksum.setValue(checksum(dataDocIs));
197
                    dataDocSysMeta.setChecksum(checksum);
198
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
199
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
200
                    boolean error = false;
201
                    //create the data doc on d1
202
                    try
203
                    {
204
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
205
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
206
                    }
207
                    catch(Exception e)
208
                    {
209
                        error = true;
210
                        System.out.println("ERROR: Could not create data document with id " + 
211
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
212
                    }
213
                    finally
214
                    {
215
                        if(error)
216
                        {
217
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
218
                                    "FAILED.");
219
                        }
220
                        else
221
                        {
222
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
223
                                " which is described by " + sysmeta.getIdentifier().getValue());
224
                        }
225
                    }
226
                }
227
                else
228
                {
229
                    System.out.println("WARNING: Could not process describes url " +
230
                            dataDocUrl + " for document " + doc.docid + 
231
                    ".  Only ecogrid://knb/ urls are currently supported.");
232
                }
233
            }
234
            
235
            try
236
            {
237
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
238
                    IOUtils.toInputStream(doc.doctext), sysmeta);
239
              System.out.println("Success inserting document " + id.getValue());
240
              
241
            }
242
            catch(Exception e)
243
            {
244
                e.printStackTrace();
245
                System.out.println("Could not create document with id " + 
246
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
247
                
248
            }
249
            finally
250
            {
251
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
252
            }
253
        }
254
        
255
        logout();
256
    }
257
    
258
    /**
259
     * create the documents listed by an eml document as described in the 
260
     * new system
261
     * @param doc
262
     * @param emld
263
     */
264
    private void createDescribedDocuments(Document doc, EMLDocument emld)
265
    {
266
        
267
    }
268
    
269
    /**
270
     * @param doc
271
     * @return
272
     */
273
    private SystemMetadata generateSystemMetadata(Document doc)
274
      throws Exception
275
    {
276
        SystemMetadata sm = new SystemMetadata();
277
        //set the id
278
        Identifier id = new Identifier();
279
        id.setValue(doc.docid.trim());
280
        sm.setIdentifier(id);
281
        
282
        //set the object format
283
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
284
        if(format == null)
285
        {
286
            if(doc.doctype.trim().equals("BIN"))
287
            {
288
                format = ObjectFormat.OCTET_STREAM;
289
            }
290
            else
291
            {
292
                format = ObjectFormat.TEXT_PLAIN;
293
            }
294
        }
295
        sm.setObjectFormat(format);
296
        
297
        //create the checksum
298
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
299
        String checksumS = checksum(bais);
300
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
301
        Checksum checksum = new Checksum();
302
        checksum.setValue(checksumS);
303
        checksum.setAlgorithm(ca);
304
        sm.setChecksum(checksum);
305
        
306
        //set the size
307
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
308
        
309
        //submitter
310
        Principal p = new Principal();
311
        p.setValue("unknown");
312
        sm.setSubmitter(p);
313
        sm.setRightsHolder(p);
314
        try
315
        {
316
            Date dateCreated = parseMetacatDate(doc.createDate);
317
            sm.setDateUploaded(dateCreated);
318
            Date dateUpdated = parseMetacatDate(doc.updateDate);
319
            sm.setDateSysMetadataModified(dateUpdated);
320
        }
321
        catch(Exception e)
322
        {
323
            System.out.println("couldn't parse a date: " + e.getMessage());
324
            Date dateCreated = new Date();
325
            sm.setDateUploaded(dateCreated);
326
            Date dateUpdated = new Date();
327
            sm.setDateSysMetadataModified(dateUpdated);
328
        }
329
        NodeReference nr = new NodeReference();
330
        nr.setValue("KNB");
331
        sm.setOriginMemberNode(nr);
332
        sm.setAuthoritativeMemberNode(nr);
333
        
334
        return sm;
335
    }
336
    
337
    private void printHeader(String s)
338
    {
339
        System.out.println("****** " + s + " *******");
340
    }
341
    
342
    /**
343
     * produce an md5 checksum for item
344
     */
345
    private String checksum(InputStream is)
346
      throws Exception
347
    {        
348
        byte[] buffer = new byte[1024];
349
        MessageDigest complete = MessageDigest.getInstance("MD5");
350
        int numRead;
351
        
352
        do 
353
        {
354
          numRead = is.read(buffer);
355
          if (numRead > 0) 
356
          {
357
            complete.update(buffer, 0, numRead);
358
          }
359
        } while (numRead != -1);
360
        
361
        
362
        return getHex(complete.digest());
363
    }
364
    
365
    /**
366
     * convert a byte array to a hex string
367
     */
368
    private static String getHex( byte [] raw ) 
369
    {
370
        final String HEXES = "0123456789ABCDEF";
371
        if ( raw == null ) {
372
          return null;
373
        }
374
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
375
        for ( final byte b : raw ) {
376
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
377
             .append(HEXES.charAt((b & 0x0F)));
378
        }
379
        return hex.toString();
380
    }
381
    
382
    /**
383
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
384
     * a proper date object
385
     * @param date
386
     * @return
387
     */
388
    private Date parseMetacatDate(String date)
389
    {
390
        String year = date.substring(0, 4);
391
        String month = date.substring(5, 7);
392
        String day = date.substring(8, 10);
393
        Calendar c = Calendar.getInstance();
394
        c.set(new Integer(year).intValue(), 
395
              new Integer(month).intValue(), 
396
              new Integer(day).intValue());
397
        return c.getTime();
398
    }
399

    
400
    /**
401
     * send a request to the resource
402
     */
403
    private InputStream sendRequest(String contextRootUrl, String resource, 
404
            String sessionid, String method, String urlParamaters, 
405
            String contentType, InputStream dataStream) 
406
        throws Exception 
407
    {
408
        
409
        HttpURLConnection connection = null ;
410
        String restURL = contextRootUrl + resource;
411

    
412
        if (urlParamaters != null) {
413
            if (restURL.indexOf("?") == -1)             
414
                restURL += "?";
415
            restURL += urlParamaters; 
416
            if(restURL.indexOf(" ") != -1)
417
            {
418
                restURL = restURL.replaceAll("\\s", "%20");
419
            }
420
        }
421
        
422
        if(sessionid != null)
423
        {
424
            if(restURL.indexOf("?") == -1)
425
            {
426
                restURL += "?sessionid=" + sessionid;
427
            }
428
            else
429
            {
430
                restURL += "&sessionid=" + sessionid;
431
            }
432
        }
433

    
434
        URL u = null;
435
        InputStream content = null;
436
        System.out.println("url: " + restURL);
437
        System.out.println("method: " + method);
438
        u = new URL(restURL);
439
        connection = (HttpURLConnection) u.openConnection();
440
        if (contentType!=null) {
441
            connection.setRequestProperty("Content-Type",contentType);
442
        }
443

    
444
        connection.setDoOutput(true);
445
        connection.setDoInput(true);
446
        connection.setRequestMethod(method);
447

    
448
        if (!method.equals("GET")) {
449
            if (dataStream != null) {
450
                OutputStream out = connection.getOutputStream();
451
                IOUtils.copy(dataStream, out);
452
            }
453
        }
454

    
455
        return connection.getInputStream();   
456
    }
457
    
458
    /**
459
     * create a mime multipart message from object and sysmeta
460
     */
461
    private MimeMultipart createMimeMultipart(InputStream object)
462
      throws Exception
463
    {
464
        final MimeMultipart mmp = new MimeMultipart();
465
        MimeBodyPart objectPart = new MimeBodyPart();
466
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
467
        objectPart.setFileName("doctext");
468
        DataSource ds = new InputStreamDataSource("doctext", object);
469
        DataHandler dh = new DataHandler(ds);
470
        objectPart.setDataHandler(dh);
471
        mmp.addBodyPart(objectPart);
472
        return mmp;
473
    }
474
    
475
    /**
476
     * parse a metacat query response and return a vector of docids
477
     * @param response
478
     * @return
479
     */
480
    private Vector<Document> parseResponse(String response)
481
    {
482
        Vector<Document> v = new Vector<Document>();
483
        int dstart = response.indexOf("<document>");
484
        int dend = response.indexOf("</document>", dstart);
485
        while(dstart != -1)
486
        {
487
            String doc = response.substring(dstart + "<document>".length(), dend);
488
            //System.out.println("adding " + docid);
489
            Document d = new Document(getFieldFromDoc(doc, "docid"),
490
                    getFieldFromDoc(doc, "doctype"),
491
                    getFieldFromDoc(doc, "createdate"),
492
                    getFieldFromDoc(doc, "updatedate"));
493
            v.add(d);
494
            dstart = response.indexOf("<document>", dend);
495
            dend = response.indexOf("</document>", dstart);
496
        }
497
        
498
        return v;
499
    }
500
    
501
    private String getFieldFromDoc(String doc, String fieldname)
502
    {
503
        String field = "<" + fieldname + ">";
504
        String fieldend = "</" + fieldname + ">";
505
        int start = doc.indexOf(field);
506
        int end = doc.indexOf(fieldend);
507
        String s = doc.substring(start + field.length(), end);
508
        //System.out.println("field: " + fieldname + " : " + s);
509
        return s;
510
    }
511
    
512
    /**
513
     * login the source
514
     * @return
515
     * @throws Exception
516
     */
517
    private String loginSource()
518
      throws Exception
519
    {
520
        return login(sourceUrl);
521
    }
522
    
523
    /**
524
     * login the destination
525
     * @return
526
     * @throws Exception
527
     */
528
    private String loginDest()
529
        throws Exception
530
    {
531
        return login(destinationUrl);
532
    }
533
    
534
    /**
535
     * returns a sessionid
536
     * @return
537
     */
538
    private String login(String sourceUrl)
539
      throws Exception
540
    {
541
        InputStream is = getResponse(sourceUrl, "/metacat", 
542
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
543
        "POST");
544
        String response = streamToString(is);
545
        //System.out.println("response: " + response);
546
        if(response.indexOf("sessionId") == -1)
547
        {
548
            throw new Exception("Error logging into " + sourceUrl);
549
        }
550
        
551
        String sessionid = response.substring(
552
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
553
                response.indexOf("</sessionId>"));
554
        System.out.println("sessionid: " + sessionid);
555
        return sessionid;
556
    }
557
    
558
    /**
559
     * logout both the source and destination
560
     * @throws Exception
561
     */
562
    private void logout()
563
        throws Exception
564
    {
565
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
566
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
567
    }
568
    
569
    /**
570
     * get an http response
571
     * @param contextRootUrl
572
     * @param resource
573
     * @param urlParameters
574
     * @param method
575
     * @return
576
     * @throws Exception
577
     */
578
    private InputStream getResponse(String contextRootUrl, String resource, 
579
            String urlParameters, String method)
580
      throws Exception
581
    {
582
        HttpURLConnection connection = null ;
583

    
584
        String restURL = contextRootUrl+resource;
585

    
586
        if (urlParameters != null) {
587
            if (restURL.indexOf("?") == -1)             
588
                restURL += "?";
589
            restURL += urlParameters; 
590
            if(restURL.indexOf(" ") != -1)
591
            {
592
                restURL = restURL.replaceAll("\\s", "%20");
593
            }
594
        }
595

    
596
        URL u = null;
597
        InputStream content = null;            
598
        System.out.println("url: " + restURL);
599
        System.out.println("method: " + method);
600
        u = new URL(restURL);
601
        connection = (HttpURLConnection) u.openConnection();
602
        connection.setDoOutput(true);
603
        connection.setDoInput(true);
604
        connection.setRequestMethod(method);
605
        content = connection.getInputStream();
606
        return content;
607
    }
608
    
609
    private String streamToString(InputStream is)
610
        throws Exception
611
    {
612
        byte b[] = new byte[1024];
613
        int numread = is.read(b, 0, 1024);
614
        String response = new String();
615
        while(numread != -1)
616
        {
617
            response += new String(b, 0, numread);
618
            numread = is.read(b, 0, 1024);
619
        }
620
        return response;
621
    }
622
    
623
    private InputStream stringToStream(String s)
624
      throws Exception
625
    {
626
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
627
        return bais;
628
    }
629
    
630
    private class Document
631
    {
632
        public String docid;
633
        public String doctype;
634
        public String createDate;
635
        public String updateDate;
636
        public String doctext;
637
        
638
        public Document(String docid, String doctype, String createDate, String updateDate)
639
        {
640
            this.docid = docid.trim();
641
            this.doctype = doctype.trim();
642
            this.createDate = createDate.trim();
643
            this.updateDate = updateDate.trim();
644
        }
645
    }
646
}
(8-8/15)