Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.io.ByteArrayInputStream;
29
import java.io.InputStream;
30
import java.io.OutputStream;
31
import java.net.HttpURLConnection;
32
import java.net.URL;
33
import java.security.MessageDigest;
34
import java.util.Calendar;
35
import java.util.Date;
36
import java.util.Vector;
37

    
38
import javax.activation.DataHandler;
39
import javax.activation.DataSource;
40
import javax.mail.internet.MimeBodyPart;
41
import javax.mail.internet.MimeMultipart;
42

    
43
import org.apache.commons.io.IOUtils;
44
import org.dataone.client.D1Client;
45
import org.dataone.client.MNode;
46
import org.dataone.client.ObjectFormatCache;
47
import org.dataone.eml.DataoneEMLParser;
48
import org.dataone.eml.EMLDocument;
49
import org.dataone.eml.EMLDocument.DistributionMetadata;
50
import org.dataone.service.types.AuthToken;
51
import org.dataone.service.types.Checksum;
52
import org.dataone.service.types.ChecksumAlgorithm;
53
import org.dataone.service.types.Identifier;
54
import org.dataone.service.types.NodeReference;
55
import org.dataone.service.types.ObjectFormat;
56
import org.dataone.service.types.Subject;
57
import org.dataone.service.types.SystemMetadata;
58

    
59
import edu.ucsb.nceas.metacat.MetaCatServlet;
60
import edu.ucsb.nceas.metacat.properties.PropertyService;
61
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
62

    
63
/**
64
 * @author berkley
65
 * A class to populate a metacat instance based on documents returned from a query
66
 */
67
public class MetacatPopulator
68
{
69
    private String sourceUrl = null;
70
    private String destinationUrl = null;
71
    private String query = null;
72
    private String username = null;
73
    private String password = null;
74
    
75
    /**
76
     * create a new MetacatPopulator with given source and destination urls.  
77
     * These should be
78
     * of the form "http://<url>/<metacat_instance>"
79
     * If username and/or password is null, the query will be run as public
80
     * @param sourceUrl
81
     * @param destUrl
82
     * @param query
83
     * @param username
84
     * @param password
85
     */
86
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
87
    {
88
        this.sourceUrl = sourceUrl;
89
        this.query = query;
90
        this.username = username;
91
        this.password = password;
92
        this.destinationUrl = destUrl;
93
    }
94
    
95
    /**
96
     * populate from the source
97
     */
98
    public void populate()
99
      throws Exception
100
    {
101
        printHeader("Source login");
102
        String sourceSessionid = loginSource();
103
        
104
        //do a query
105
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
106
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
107
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
108
        params += "action=query&";
109
        params += "qformat=xml&";
110
        params += "anyfield=" + query;
111
        
112
        printHeader("Searching source");
113
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
114
        InputStream is = getResponse(sourceUrl, "/metacat",
115
                params, "POST");
116
        String response = streamToString(is);
117
        //System.out.println("response: " + response);
118
        Vector<Document> docs = parseResponse(response);
119
        
120
        
121
        printHeader("Parsing source results");
122
        System.out.println("creating MN with url: " + destinationUrl + "/");
123
        MNode mn = D1Client.getMN(destinationUrl + "/");
124
        
125
        printHeader("Processing " + docs.size() + " results.");
126
        printHeader("logging in to the destination " + destinationUrl);
127
        AuthToken authtoken = mn.login(username, password);
128
        System.out.println("authtoken: " + authtoken.getToken());
129
        for(int i=0; i<docs.size(); i++)
130
        {
131
            //for each document in the query
132
            Document doc = docs.get(i);
133
            String docid = doc.docid;
134
            //get the doc from source
135
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
136
            params = "action=read&qformat=xml&docid=" + docid;
137
            is = getResponse(sourceUrl, "/metacat", params, "POST");
138
            String doctext = streamToString(is);
139
            System.out.println("doctext: " + doctext);
140
            is = stringToStream(doctext);
141
            //parse the document
142
            DataoneEMLParser parser = DataoneEMLParser.getInstance();
143
            EMLDocument emld = parser.parseDocument(is);
144
            if(emld == null)
145
            {
146
                continue;
147
            }
148
            //go through the DistributionMetadata and download any described data
149
            
150
            is = stringToStream(doctext);
151
            doc.doctext = doctext;
152

    
153
            printHeader("creating document on destination " + destinationUrl);            
154
            SystemMetadata sysmeta = generateSystemMetadata(doc);
155
            for(int j=0; j<emld.distributionMetadata.size(); j++)
156
            {
157
                Identifier emlId = sysmeta.getIdentifier();
158
                DistributionMetadata dm = emld.distributionMetadata.elementAt(j);
159
                String dataDocUrl = dm.url;
160
                String dataDocMimeType = dm.mimeType;
161
                String dataDocLocalId = "";
162
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
163
                { //we only handle ecogrid urls right now
164
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
165
                            "ecogrid://knb/".length(), dataDocUrl.length());
166
                    //get the file
167
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
168
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
169
                    String dataDocText = streamToString(dataDocIs);
170
                    
171
                    //set the id
172
                    Identifier did = new Identifier();
173
                    did.setValue(dataDocLocalId);
174
                    
175
                    //add the desribeby to the eml's sysmeta
176
                    System.out.println("adding describe for doc " + 
177
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
178
                    sysmeta.addDescribe(did);
179
                    
180
                    //create sysmeta for the data doc                    
181
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
182
                    //overwrite the bogus values from the last call 
183
                    dataDocSysMeta.setIdentifier(did);
184
                    dataDocSysMeta.setObjectFormat(ObjectFormatCache.getFormat(dataDocMimeType));
185
                    Checksum checksum = new Checksum();
186
                    dataDocIs = stringToStream(dataDocText);
187
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
188
                    checksum.setAlgorithm(ca);
189
                    checksum.setValue(checksum(dataDocIs));
190
                    dataDocSysMeta.setChecksum(checksum);
191
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
192
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
193
                    boolean error = false;
194
                    //create the data doc on d1
195
                    try
196
                    {
197
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
198
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
199
                    }
200
                    catch(Exception e)
201
                    {
202
                        error = true;
203
                        System.out.println("ERROR: Could not create data document with id " + 
204
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
205
                    }
206
                    finally
207
                    {
208
                        if(error)
209
                        {
210
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
211
                                    "FAILED.");
212
                        }
213
                        else
214
                        {
215
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
216
                                " which is described by " + sysmeta.getIdentifier().getValue());
217
                        }
218
                    }
219
                }
220
                else
221
                {
222
                    System.out.println("WARNING: Could not process describes url " +
223
                            dataDocUrl + " for document " + doc.docid + 
224
                    ".  Only ecogrid://knb/ urls are currently supported.");
225
                }
226
            }
227
            
228
            try
229
            {
230
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
231
                    IOUtils.toInputStream(doc.doctext), sysmeta);
232
              System.out.println("Success inserting document " + id.getValue());
233
              
234
            }
235
            catch(Exception e)
236
            {
237
                e.printStackTrace();
238
                System.out.println("Could not create document with id " + 
239
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
240
                
241
            }
242
            finally
243
            {
244
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
245
            }
246
        }
247
        
248
        logout();
249
    }
250
    
251
    /**
252
     * create the documents listed by an eml document as described in the 
253
     * new system
254
     * @param doc
255
     * @param emld
256
     */
257
    private void createDescribedDocuments(Document doc, EMLDocument emld)
258
    {
259
        
260
    }
261
    
262
    /**
263
     * @param doc
264
     * @return
265
     */
266
    private SystemMetadata generateSystemMetadata(Document doc)
267
      throws Exception
268
    {
269
        SystemMetadata sm = new SystemMetadata();
270
        //set the id
271
        Identifier id = new Identifier();
272
        id.setValue(doc.docid.trim());
273
        sm.setIdentifier(id);
274
        
275
        //set the object format
276
        ObjectFormat format = ObjectFormatCache.getFormat(doc.doctype);
277
        if(format == null)
278
        {
279
            if(doc.doctype.trim().equals("BIN"))
280
            {
281
                format = ObjectFormatCache.getFormat("application/octet-stream");
282
            }
283
            else
284
            {
285
                format = ObjectFormatCache.getFormat("text/plain");
286
            }
287
        }
288
        sm.setObjectFormat(format);
289
        
290
        //create the checksum
291
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
292
        String checksumS = checksum(bais);
293
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
294
        Checksum checksum = new Checksum();
295
        checksum.setValue(checksumS);
296
        checksum.setAlgorithm(ca);
297
        sm.setChecksum(checksum);
298
        
299
        //set the size
300
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
301
        
302
        //submitter
303
        Subject p = new Subject();
304
        p.setValue("unknown");
305
        sm.setSubmitter(p);
306
        sm.setRightsHolder(p);
307
        try
308
        {
309
            Date dateCreated = parseMetacatDate(doc.createDate);
310
            sm.setDateUploaded(dateCreated);
311
            Date dateUpdated = parseMetacatDate(doc.updateDate);
312
            sm.setDateSysMetadataModified(dateUpdated);
313
        }
314
        catch(Exception e)
315
        {
316
            System.out.println("couldn't parse a date: " + e.getMessage());
317
            Date dateCreated = new Date();
318
            sm.setDateUploaded(dateCreated);
319
            Date dateUpdated = new Date();
320
            sm.setDateSysMetadataModified(dateUpdated);
321
        }
322
        NodeReference nr = new NodeReference();
323
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
324
        sm.setOriginMemberNode(nr);
325
        sm.setAuthoritativeMemberNode(nr);
326
        
327
        return sm;
328
    }
329
    
330
    private void printHeader(String s)
331
    {
332
        System.out.println("****** " + s + " *******");
333
    }
334
    
335
    /**
336
     * produce an md5 checksum for item
337
     */
338
    private String checksum(InputStream is)
339
      throws Exception
340
    {        
341
        byte[] buffer = new byte[1024];
342
        MessageDigest complete = MessageDigest.getInstance("MD5");
343
        int numRead;
344
        
345
        do 
346
        {
347
          numRead = is.read(buffer);
348
          if (numRead > 0) 
349
          {
350
            complete.update(buffer, 0, numRead);
351
          }
352
        } while (numRead != -1);
353
        
354
        
355
        return getHex(complete.digest());
356
    }
357
    
358
    /**
359
     * convert a byte array to a hex string
360
     */
361
    private static String getHex( byte [] raw ) 
362
    {
363
        final String HEXES = "0123456789ABCDEF";
364
        if ( raw == null ) {
365
          return null;
366
        }
367
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
368
        for ( final byte b : raw ) {
369
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
370
             .append(HEXES.charAt((b & 0x0F)));
371
        }
372
        return hex.toString();
373
    }
374
    
375
    /**
376
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
377
     * a proper date object
378
     * @param date
379
     * @return
380
     */
381
    private Date parseMetacatDate(String date)
382
    {
383
        String year = date.substring(0, 4);
384
        String month = date.substring(5, 7);
385
        String day = date.substring(8, 10);
386
        Calendar c = Calendar.getInstance();
387
        c.set(new Integer(year).intValue(), 
388
              new Integer(month).intValue(), 
389
              new Integer(day).intValue());
390
        return c.getTime();
391
    }
392

    
393
    /**
394
     * send a request to the resource
395
     */
396
    private InputStream sendRequest(String contextRootUrl, String resource, 
397
            String sessionid, String method, String urlParamaters, 
398
            String contentType, InputStream dataStream) 
399
        throws Exception 
400
    {
401
        
402
        HttpURLConnection connection = null ;
403
        String restURL = contextRootUrl + resource;
404

    
405
        if (urlParamaters != null) {
406
            if (restURL.indexOf("?") == -1)             
407
                restURL += "?";
408
            restURL += urlParamaters; 
409
            if(restURL.indexOf(" ") != -1)
410
            {
411
                restURL = restURL.replaceAll("\\s", "%20");
412
            }
413
        }
414
        
415
        if(sessionid != null)
416
        {
417
            if(restURL.indexOf("?") == -1)
418
            {
419
                restURL += "?sessionid=" + sessionid;
420
            }
421
            else
422
            {
423
                restURL += "&sessionid=" + sessionid;
424
            }
425
        }
426

    
427
        URL u = null;
428
        InputStream content = null;
429
        System.out.println("url: " + restURL);
430
        System.out.println("method: " + method);
431
        u = new URL(restURL);
432
        connection = (HttpURLConnection) u.openConnection();
433
        if (contentType!=null) {
434
            connection.setRequestProperty("Content-Type",contentType);
435
        }
436

    
437
        connection.setDoOutput(true);
438
        connection.setDoInput(true);
439
        connection.setRequestMethod(method);
440

    
441
        if (!method.equals("GET")) {
442
            if (dataStream != null) {
443
                OutputStream out = connection.getOutputStream();
444
                IOUtils.copy(dataStream, out);
445
            }
446
        }
447

    
448
        return connection.getInputStream();   
449
    }
450
    
451
    /**
452
     * create a mime multipart message from object and sysmeta
453
     */
454
    private MimeMultipart createMimeMultipart(InputStream object)
455
      throws Exception
456
    {
457
        final MimeMultipart mmp = new MimeMultipart();
458
        MimeBodyPart objectPart = new MimeBodyPart();
459
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
460
        objectPart.setFileName("doctext");
461
        DataSource ds = new InputStreamDataSource("doctext", object);
462
        DataHandler dh = new DataHandler(ds);
463
        objectPart.setDataHandler(dh);
464
        mmp.addBodyPart(objectPart);
465
        return mmp;
466
    }
467
    
468
    /**
469
     * parse a metacat query response and return a vector of docids
470
     * @param response
471
     * @return
472
     */
473
    private Vector<Document> parseResponse(String response)
474
    {
475
        Vector<Document> v = new Vector<Document>();
476
        int dstart = response.indexOf("<document>");
477
        int dend = response.indexOf("</document>", dstart);
478
        while(dstart != -1)
479
        {
480
            String doc = response.substring(dstart + "<document>".length(), dend);
481
            //System.out.println("adding " + docid);
482
            Document d = new Document(getFieldFromDoc(doc, "docid"),
483
                    getFieldFromDoc(doc, "doctype"),
484
                    getFieldFromDoc(doc, "createdate"),
485
                    getFieldFromDoc(doc, "updatedate"));
486
            v.add(d);
487
            dstart = response.indexOf("<document>", dend);
488
            dend = response.indexOf("</document>", dstart);
489
        }
490
        
491
        return v;
492
    }
493
    
494
    private String getFieldFromDoc(String doc, String fieldname)
495
    {
496
        String field = "<" + fieldname + ">";
497
        String fieldend = "</" + fieldname + ">";
498
        int start = doc.indexOf(field);
499
        int end = doc.indexOf(fieldend);
500
        String s = doc.substring(start + field.length(), end);
501
        //System.out.println("field: " + fieldname + " : " + s);
502
        return s;
503
    }
504
    
505
    /**
506
     * login the source
507
     * @return
508
     * @throws Exception
509
     */
510
    private String loginSource()
511
      throws Exception
512
    {
513
        return login(sourceUrl);
514
    }
515
    
516
    /**
517
     * login the destination
518
     * @return
519
     * @throws Exception
520
     */
521
    private String loginDest()
522
        throws Exception
523
    {
524
        return login(destinationUrl);
525
    }
526
    
527
    /**
528
     * returns a sessionid
529
     * @return
530
     */
531
    private String login(String sourceUrl)
532
      throws Exception
533
    {
534
        InputStream is = getResponse(sourceUrl, "/metacat", 
535
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
536
        "POST");
537
        String response = streamToString(is);
538
        //System.out.println("response: " + response);
539
        if(response.indexOf("sessionId") == -1)
540
        {
541
            throw new Exception("Error logging into " + sourceUrl);
542
        }
543
        
544
        String sessionid = response.substring(
545
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
546
                response.indexOf("</sessionId>"));
547
        System.out.println("sessionid: " + sessionid);
548
        return sessionid;
549
    }
550
    
551
    /**
552
     * logout both the source and destination
553
     * @throws Exception
554
     */
555
    private void logout()
556
        throws Exception
557
    {
558
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
559
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
560
    }
561
    
562
    /**
563
     * get an http response
564
     * @param contextRootUrl
565
     * @param resource
566
     * @param urlParameters
567
     * @param method
568
     * @return
569
     * @throws Exception
570
     */
571
    private InputStream getResponse(String contextRootUrl, String resource, 
572
            String urlParameters, String method)
573
      throws Exception
574
    {
575
        HttpURLConnection connection = null ;
576

    
577
        String restURL = contextRootUrl+resource;
578

    
579
        if (urlParameters != null) {
580
            if (restURL.indexOf("?") == -1)             
581
                restURL += "?";
582
            restURL += urlParameters; 
583
            if(restURL.indexOf(" ") != -1)
584
            {
585
                restURL = restURL.replaceAll("\\s", "%20");
586
            }
587
        }
588

    
589
        URL u = null;
590
        InputStream content = null;            
591
        System.out.println("url: " + restURL);
592
        System.out.println("method: " + method);
593
        u = new URL(restURL);
594
        connection = (HttpURLConnection) u.openConnection();
595
        connection.setDoOutput(true);
596
        connection.setDoInput(true);
597
        connection.setRequestMethod(method);
598
        content = connection.getInputStream();
599
        return content;
600
    }
601
    
602
    private String streamToString(InputStream is)
603
        throws Exception
604
    {
605
        byte b[] = new byte[1024];
606
        int numread = is.read(b, 0, 1024);
607
        String response = new String();
608
        while(numread != -1)
609
        {
610
            response += new String(b, 0, numread);
611
            numread = is.read(b, 0, 1024);
612
        }
613
        return response;
614
    }
615
    
616
    private InputStream stringToStream(String s)
617
      throws Exception
618
    {
619
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
620
        return bais;
621
    }
622
    
623
    private class Document
624
    {
625
        public String docid;
626
        public String doctype;
627
        public String createDate;
628
        public String updateDate;
629
        public String doctext;
630
        
631
        public Document(String docid, String doctype, String createDate, String updateDate)
632
        {
633
            this.docid = docid.trim();
634
            this.doctype = doctype.trim();
635
            this.createDate = createDate.trim();
636
            this.updateDate = updateDate.trim();
637
        }
638
    }
639
}
(8-8/16)