Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetaCatServlet;
45
import edu.ucsb.nceas.metacat.MetacatHandler;
46
import edu.ucsb.nceas.metacat.MetacatResultSet;
47
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
48
import edu.ucsb.nceas.metacat.dataone.CrudService;
49
import edu.ucsb.nceas.metacat.properties.PropertyService;
50
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
51

    
52
import org.dataone.service.exceptions.InvalidSystemMetadata;
53
import org.dataone.service.exceptions.ServiceFailure;
54
import org.dataone.service.types.AuthToken;
55
import org.dataone.service.types.Checksum;
56
import org.dataone.service.types.ChecksumAlgorithm;
57
import org.dataone.service.types.NodeReference;
58
import org.dataone.service.types.ObjectFormat;
59
import org.dataone.service.types.Principal;
60
import org.dataone.service.types.SystemMetadata;
61
import org.dataone.service.types.Identifier;
62
import org.dataone.client.D1Client;
63
import org.dataone.client.MNode;
64
import org.dataone.eml.DataoneEMLParser;
65
import org.dataone.eml.EMLDocument;
66
import org.dataone.eml.EMLDocument.DistributionMetadata;
67

    
68
//import sun.tools.jstat.Identifier;
69

    
70
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
71

    
72
/**
73
 * @author berkley
74
 * A class to populate a metacat instance based on documents returned from a query
75
 */
76
public class MetacatPopulator
77
{
78
    private String sourceUrl = null;
79
    private String destinationUrl = null;
80
    private String query = null;
81
    private String username = null;
82
    private String password = null;
83
    
84
    /**
85
     * create a new MetacatPopulator with given source and destination urls.  
86
     * These should be
87
     * of the form "http://<url>/<metacat_instance>"
88
     * If username and/or password is null, the query will be run as public
89
     * @param sourceUrl
90
     * @param destUrl
91
     * @param query
92
     * @param username
93
     * @param password
94
     */
95
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
96
    {
97
        this.sourceUrl = sourceUrl;
98
        this.query = query;
99
        this.username = username;
100
        this.password = password;
101
        this.destinationUrl = destUrl;
102
    }
103
    
104
    /**
105
     * populate from the source
106
     */
107
    public void populate()
108
      throws Exception
109
    {
110
        printHeader("Source login");
111
        String sourceSessionid = loginSource();
112
        
113
        //do a query
114
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
115
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
116
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
117
        params += "action=query&";
118
        params += "qformat=xml&";
119
        params += "anyfield=" + query;
120
        
121
        printHeader("Searching source");
122
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
123
        InputStream is = getResponse(sourceUrl, "/metacat",
124
                params, "POST");
125
        String response = streamToString(is);
126
        //System.out.println("response: " + response);
127
        Vector<Document> docs = parseResponse(response);
128
        
129
        
130
        printHeader("Parsing source results");
131
        System.out.println("creating MN with url: " + destinationUrl + "/");
132
        MNode mn = D1Client.getMN(destinationUrl + "/");
133
        
134
        printHeader("Processing " + docs.size() + " results.");
135
        printHeader("logging in to the destination " + destinationUrl);
136
        AuthToken authtoken = mn.login(username, password);
137
        System.out.println("authtoken: " + authtoken.getToken());
138
        for(int i=0; i<docs.size(); i++)
139
        {
140
            //for each document in the query
141
            Document doc = docs.get(i);
142
            String docid = doc.docid;
143
            //get the doc from source
144
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
145
            params = "action=read&qformat=xml&docid=" + docid;
146
            is = getResponse(sourceUrl, "/metacat", params, "POST");
147
            String doctext = streamToString(is);
148
            System.out.println("doctext: " + doctext);
149
            is = stringToStream(doctext);
150
            //parse the document
151
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
152
            EMLDocument emld = parser.parseDocument(is);
153
            if(emld == null)
154
            {
155
                continue;
156
            }
157
            //go through the DistributionMetadata and download any described data
158
            
159
            is = stringToStream(doctext);
160
            doc.doctext = doctext;
161

    
162
            printHeader("creating document on destination " + destinationUrl);            
163
            SystemMetadata sysmeta = generateSystemMetadata(doc);
164
            for(int j=0; j<emld.distributionMetadata.size(); j++)
165
            {
166
                Identifier emlId = sysmeta.getIdentifier();
167
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
168
                String dataDocUrl = dm.url;
169
                String dataDocMimeType = dm.mimeType;
170
                String dataDocLocalId = "";
171
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
172
                { //we only handle ecogrid urls right now
173
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
174
                            "ecogrid://knb/".length(), dataDocUrl.length());
175
                    //get the file
176
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
177
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
178
                    String dataDocText = streamToString(dataDocIs);
179
                    
180
                    //set the id
181
                    Identifier did = new Identifier();
182
                    did.setValue(dataDocLocalId);
183
                    
184
                    //add the desribeby to the eml's sysmeta
185
                    System.out.println("adding describe for doc " + 
186
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
187
                    sysmeta.addDescribe(did);
188
                    
189
                    //create sysmeta for the data doc                    
190
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
191
                    //overwrite the bogus values from the last call 
192
                    dataDocSysMeta.setIdentifier(did);
193
                    dataDocSysMeta.setObjectFormat(ObjectFormat.convert(dataDocMimeType));
194
                    Checksum checksum = new Checksum();
195
                    dataDocIs = stringToStream(dataDocText);
196
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
197
                    checksum.setAlgorithm(ca);
198
                    checksum.setValue(checksum(dataDocIs));
199
                    dataDocSysMeta.setChecksum(checksum);
200
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
201
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
202
                    boolean error = false;
203
                    //create the data doc on d1
204
                    try
205
                    {
206
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
207
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
208
                    }
209
                    catch(Exception e)
210
                    {
211
                        error = true;
212
                        System.out.println("ERROR: Could not create data document with id " + 
213
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
214
                    }
215
                    finally
216
                    {
217
                        if(error)
218
                        {
219
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
220
                                    "FAILED.");
221
                        }
222
                        else
223
                        {
224
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
225
                                " which is described by " + sysmeta.getIdentifier().getValue());
226
                        }
227
                    }
228
                }
229
                else
230
                {
231
                    System.out.println("WARNING: Could not process describes url " +
232
                            dataDocUrl + " for document " + doc.docid + 
233
                    ".  Only ecogrid://knb/ urls are currently supported.");
234
                }
235
            }
236
            
237
            try
238
            {
239
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
240
                    IOUtils.toInputStream(doc.doctext), sysmeta);
241
              System.out.println("Success inserting document " + id.getValue());
242
              
243
            }
244
            catch(Exception e)
245
            {
246
                e.printStackTrace();
247
                System.out.println("Could not create document with id " + 
248
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
249
                
250
            }
251
            finally
252
            {
253
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
254
            }
255
        }
256
        
257
        logout();
258
    }
259
    
260
    /**
261
     * create the documents listed by an eml document as described in the 
262
     * new system
263
     * @param doc
264
     * @param emld
265
     */
266
    private void createDescribedDocuments(Document doc, EMLDocument emld)
267
    {
268
        
269
    }
270
    
271
    /**
272
     * @param doc
273
     * @return
274
     */
275
    private SystemMetadata generateSystemMetadata(Document doc)
276
      throws Exception
277
    {
278
        SystemMetadata sm = new SystemMetadata();
279
        //set the id
280
        Identifier id = new Identifier();
281
        id.setValue(doc.docid.trim());
282
        sm.setIdentifier(id);
283
        
284
        //set the object format
285
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
286
        if(format == null)
287
        {
288
            if(doc.doctype.trim().equals("BIN"))
289
            {
290
                format = ObjectFormat.OCTET_STREAM;
291
            }
292
            else
293
            {
294
                format = ObjectFormat.TEXT_PLAIN;
295
            }
296
        }
297
        sm.setObjectFormat(format);
298
        
299
        //create the checksum
300
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
301
        String checksumS = checksum(bais);
302
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
303
        Checksum checksum = new Checksum();
304
        checksum.setValue(checksumS);
305
        checksum.setAlgorithm(ca);
306
        sm.setChecksum(checksum);
307
        
308
        //set the size
309
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
310
        
311
        //submitter
312
        Principal p = new Principal();
313
        p.setValue("unknown");
314
        sm.setSubmitter(p);
315
        sm.setRightsHolder(p);
316
        try
317
        {
318
            Date dateCreated = parseMetacatDate(doc.createDate);
319
            sm.setDateUploaded(dateCreated);
320
            Date dateUpdated = parseMetacatDate(doc.updateDate);
321
            sm.setDateSysMetadataModified(dateUpdated);
322
        }
323
        catch(Exception e)
324
        {
325
            System.out.println("couldn't parse a date: " + e.getMessage());
326
            Date dateCreated = new Date();
327
            sm.setDateUploaded(dateCreated);
328
            Date dateUpdated = new Date();
329
            sm.setDateSysMetadataModified(dateUpdated);
330
        }
331
        NodeReference nr = new NodeReference();
332
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
333
        sm.setOriginMemberNode(nr);
334
        sm.setAuthoritativeMemberNode(nr);
335
        
336
        return sm;
337
    }
338
    
339
    private void printHeader(String s)
340
    {
341
        System.out.println("****** " + s + " *******");
342
    }
343
    
344
    /**
345
     * produce an md5 checksum for item
346
     */
347
    private String checksum(InputStream is)
348
      throws Exception
349
    {        
350
        byte[] buffer = new byte[1024];
351
        MessageDigest complete = MessageDigest.getInstance("MD5");
352
        int numRead;
353
        
354
        do 
355
        {
356
          numRead = is.read(buffer);
357
          if (numRead > 0) 
358
          {
359
            complete.update(buffer, 0, numRead);
360
          }
361
        } while (numRead != -1);
362
        
363
        
364
        return getHex(complete.digest());
365
    }
366
    
367
    /**
368
     * convert a byte array to a hex string
369
     */
370
    private static String getHex( byte [] raw ) 
371
    {
372
        final String HEXES = "0123456789ABCDEF";
373
        if ( raw == null ) {
374
          return null;
375
        }
376
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
377
        for ( final byte b : raw ) {
378
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
379
             .append(HEXES.charAt((b & 0x0F)));
380
        }
381
        return hex.toString();
382
    }
383
    
384
    /**
385
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
386
     * a proper date object
387
     * @param date
388
     * @return
389
     */
390
    private Date parseMetacatDate(String date)
391
    {
392
        String year = date.substring(0, 4);
393
        String month = date.substring(5, 7);
394
        String day = date.substring(8, 10);
395
        Calendar c = Calendar.getInstance();
396
        c.set(new Integer(year).intValue(), 
397
              new Integer(month).intValue(), 
398
              new Integer(day).intValue());
399
        return c.getTime();
400
    }
401

    
402
    /**
403
     * send a request to the resource
404
     */
405
    private InputStream sendRequest(String contextRootUrl, String resource, 
406
            String sessionid, String method, String urlParamaters, 
407
            String contentType, InputStream dataStream) 
408
        throws Exception 
409
    {
410
        
411
        HttpURLConnection connection = null ;
412
        String restURL = contextRootUrl + resource;
413

    
414
        if (urlParamaters != null) {
415
            if (restURL.indexOf("?") == -1)             
416
                restURL += "?";
417
            restURL += urlParamaters; 
418
            if(restURL.indexOf(" ") != -1)
419
            {
420
                restURL = restURL.replaceAll("\\s", "%20");
421
            }
422
        }
423
        
424
        if(sessionid != null)
425
        {
426
            if(restURL.indexOf("?") == -1)
427
            {
428
                restURL += "?sessionid=" + sessionid;
429
            }
430
            else
431
            {
432
                restURL += "&sessionid=" + sessionid;
433
            }
434
        }
435

    
436
        URL u = null;
437
        InputStream content = null;
438
        System.out.println("url: " + restURL);
439
        System.out.println("method: " + method);
440
        u = new URL(restURL);
441
        connection = (HttpURLConnection) u.openConnection();
442
        if (contentType!=null) {
443
            connection.setRequestProperty("Content-Type",contentType);
444
        }
445

    
446
        connection.setDoOutput(true);
447
        connection.setDoInput(true);
448
        connection.setRequestMethod(method);
449

    
450
        if (!method.equals("GET")) {
451
            if (dataStream != null) {
452
                OutputStream out = connection.getOutputStream();
453
                IOUtils.copy(dataStream, out);
454
            }
455
        }
456

    
457
        return connection.getInputStream();   
458
    }
459
    
460
    /**
461
     * create a mime multipart message from object and sysmeta
462
     */
463
    private MimeMultipart createMimeMultipart(InputStream object)
464
      throws Exception
465
    {
466
        final MimeMultipart mmp = new MimeMultipart();
467
        MimeBodyPart objectPart = new MimeBodyPart();
468
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
469
        objectPart.setFileName("doctext");
470
        DataSource ds = new InputStreamDataSource("doctext", object);
471
        DataHandler dh = new DataHandler(ds);
472
        objectPart.setDataHandler(dh);
473
        mmp.addBodyPart(objectPart);
474
        return mmp;
475
    }
476
    
477
    /**
478
     * parse a metacat query response and return a vector of docids
479
     * @param response
480
     * @return
481
     */
482
    private Vector<Document> parseResponse(String response)
483
    {
484
        Vector<Document> v = new Vector<Document>();
485
        int dstart = response.indexOf("<document>");
486
        int dend = response.indexOf("</document>", dstart);
487
        while(dstart != -1)
488
        {
489
            String doc = response.substring(dstart + "<document>".length(), dend);
490
            //System.out.println("adding " + docid);
491
            Document d = new Document(getFieldFromDoc(doc, "docid"),
492
                    getFieldFromDoc(doc, "doctype"),
493
                    getFieldFromDoc(doc, "createdate"),
494
                    getFieldFromDoc(doc, "updatedate"));
495
            v.add(d);
496
            dstart = response.indexOf("<document>", dend);
497
            dend = response.indexOf("</document>", dstart);
498
        }
499
        
500
        return v;
501
    }
502
    
503
    private String getFieldFromDoc(String doc, String fieldname)
504
    {
505
        String field = "<" + fieldname + ">";
506
        String fieldend = "</" + fieldname + ">";
507
        int start = doc.indexOf(field);
508
        int end = doc.indexOf(fieldend);
509
        String s = doc.substring(start + field.length(), end);
510
        //System.out.println("field: " + fieldname + " : " + s);
511
        return s;
512
    }
513
    
514
    /**
515
     * login the source
516
     * @return
517
     * @throws Exception
518
     */
519
    private String loginSource()
520
      throws Exception
521
    {
522
        return login(sourceUrl);
523
    }
524
    
525
    /**
526
     * login the destination
527
     * @return
528
     * @throws Exception
529
     */
530
    private String loginDest()
531
        throws Exception
532
    {
533
        return login(destinationUrl);
534
    }
535
    
536
    /**
537
     * returns a sessionid
538
     * @return
539
     */
540
    private String login(String sourceUrl)
541
      throws Exception
542
    {
543
        InputStream is = getResponse(sourceUrl, "/metacat", 
544
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
545
        "POST");
546
        String response = streamToString(is);
547
        //System.out.println("response: " + response);
548
        if(response.indexOf("sessionId") == -1)
549
        {
550
            throw new Exception("Error logging into " + sourceUrl);
551
        }
552
        
553
        String sessionid = response.substring(
554
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
555
                response.indexOf("</sessionId>"));
556
        System.out.println("sessionid: " + sessionid);
557
        return sessionid;
558
    }
559
    
560
    /**
561
     * logout both the source and destination
562
     * @throws Exception
563
     */
564
    private void logout()
565
        throws Exception
566
    {
567
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
568
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
569
    }
570
    
571
    /**
572
     * get an http response
573
     * @param contextRootUrl
574
     * @param resource
575
     * @param urlParameters
576
     * @param method
577
     * @return
578
     * @throws Exception
579
     */
580
    private InputStream getResponse(String contextRootUrl, String resource, 
581
            String urlParameters, String method)
582
      throws Exception
583
    {
584
        HttpURLConnection connection = null ;
585

    
586
        String restURL = contextRootUrl+resource;
587

    
588
        if (urlParameters != null) {
589
            if (restURL.indexOf("?") == -1)             
590
                restURL += "?";
591
            restURL += urlParameters; 
592
            if(restURL.indexOf(" ") != -1)
593
            {
594
                restURL = restURL.replaceAll("\\s", "%20");
595
            }
596
        }
597

    
598
        URL u = null;
599
        InputStream content = null;            
600
        System.out.println("url: " + restURL);
601
        System.out.println("method: " + method);
602
        u = new URL(restURL);
603
        connection = (HttpURLConnection) u.openConnection();
604
        connection.setDoOutput(true);
605
        connection.setDoInput(true);
606
        connection.setRequestMethod(method);
607
        content = connection.getInputStream();
608
        return content;
609
    }
610
    
611
    private String streamToString(InputStream is)
612
        throws Exception
613
    {
614
        byte b[] = new byte[1024];
615
        int numread = is.read(b, 0, 1024);
616
        String response = new String();
617
        while(numread != -1)
618
        {
619
            response += new String(b, 0, numread);
620
            numread = is.read(b, 0, 1024);
621
        }
622
        return response;
623
    }
624
    
625
    private InputStream stringToStream(String s)
626
      throws Exception
627
    {
628
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
629
        return bais;
630
    }
631
    
632
    private class Document
633
    {
634
        public String docid;
635
        public String doctype;
636
        public String createDate;
637
        public String updateDate;
638
        public String doctext;
639
        
640
        public Document(String docid, String doctype, String createDate, String updateDate)
641
        {
642
            this.docid = docid.trim();
643
            this.doctype = doctype.trim();
644
            this.createDate = createDate.trim();
645
            this.updateDate = updateDate.trim();
646
        }
647
    }
648
}
(8-8/15)