Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.io.ByteArrayInputStream;
29
import java.io.InputStream;
30
import java.io.OutputStream;
31
import java.net.HttpURLConnection;
32
import java.net.URL;
33
import java.security.MessageDigest;
34
import java.util.Calendar;
35
import java.util.Date;
36
import java.util.Vector;
37

    
38
import javax.activation.DataHandler;
39
import javax.activation.DataSource;
40
import javax.mail.internet.MimeBodyPart;
41
import javax.mail.internet.MimeMultipart;
42

    
43
import org.apache.commons.io.IOUtils;
44
import org.dataone.client.D1Client;
45
import org.dataone.client.MNode;
46
import org.dataone.client.ObjectFormatCache;
47
import org.dataone.service.types.AuthToken;
48
import org.dataone.service.types.Checksum;
49
import org.dataone.service.types.ChecksumAlgorithm;
50
import org.dataone.service.types.Identifier;
51
import org.dataone.service.types.NodeReference;
52
import org.dataone.service.types.ObjectFormat;
53
import org.dataone.service.types.Subject;
54
import org.dataone.service.types.SystemMetadata;
55
import org.ecoinformatics.datamanager.DataManager;
56
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
57
import org.ecoinformatics.datamanager.parser.DataPackage;
58

    
59
import edu.ucsb.nceas.metacat.MetaCatServlet;
60
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
61
import edu.ucsb.nceas.metacat.properties.PropertyService;
62
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
63

    
64
/**
65
 * @author berkley
66
 * A class to populate a metacat instance based on documents returned from a query
67
 */
68
public class MetacatPopulator
69
{
70
    private String sourceUrl = null;
71
    private String destinationUrl = null;
72
    private String query = null;
73
    private String username = null;
74
    private String password = null;
75
    
76
    /**
77
     * create a new MetacatPopulator with given source and destination urls.  
78
     * These should be
79
     * of the form "http://<url>/<metacat_instance>"
80
     * If username and/or password is null, the query will be run as public
81
     * @param sourceUrl
82
     * @param destUrl
83
     * @param query
84
     * @param username
85
     * @param password
86
     */
87
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
88
    {
89
        this.sourceUrl = sourceUrl;
90
        this.query = query;
91
        this.username = username;
92
        this.password = password;
93
        this.destinationUrl = destUrl;
94
    }
95
    
96
    /**
97
     * populate from the source
98
     */
99
    public void populate()
100
      throws Exception
101
    {
102
        printHeader("Source login");
103
        String sourceSessionid = loginSource();
104
        
105
        //do a query
106
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
107
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
108
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
109
        params += "action=query&";
110
        params += "qformat=xml&";
111
        params += "anyfield=" + query;
112
        
113
        printHeader("Searching source");
114
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
115
        InputStream is = getResponse(sourceUrl, "/metacat",
116
                params, "POST");
117
        String response = streamToString(is);
118
        //System.out.println("response: " + response);
119
        Vector<Document> docs = parseResponse(response);
120
        
121
        
122
        printHeader("Parsing source results");
123
        System.out.println("creating MN with url: " + destinationUrl + "/");
124
        MNode mn = D1Client.getMN(destinationUrl + "/");
125
        
126
        printHeader("Processing " + docs.size() + " results.");
127
        printHeader("logging in to the destination " + destinationUrl);
128
        AuthToken authtoken = mn.login(username, password);
129
        System.out.println("authtoken: " + authtoken.getToken());
130
        for(int i=0; i<docs.size(); i++)
131
        {
132
            //for each document in the query
133
            Document doc = docs.get(i);
134
            String docid = doc.docid;
135
            //get the doc from source
136
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
137
            params = "action=read&qformat=xml&docid=" + docid;
138
            is = getResponse(sourceUrl, "/metacat", params, "POST");
139
            String doctext = streamToString(is);
140
            System.out.println("doctext: " + doctext);
141
            is = stringToStream(doctext);
142
            //parse the document
143
            DatabaseConnectionPoolInterface connectionPool = MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
144
        	DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
145
        	DataPackage dataPackage = dataManager.parseMetadata(is);
146
        	
147
            if(dataPackage == null)
148
            {
149
                continue;
150
            }
151
            //go through the DistributionMetadata and download any described data
152
            
153
            is = stringToStream(doctext);
154
            doc.doctext = doctext;
155

    
156
            printHeader("creating document on destination " + destinationUrl);            
157
            SystemMetadata sysmeta = generateSystemMetadata(doc);
158
            for(int j=0; j < dataPackage.getEntityList().length; j++)
159
            {
160
                String dataDocUrl = dataPackage.getEntityList()[j].getURL();
161
                String dataDocMimeType = 
162
                	dataPackage.getEntityList()[j].getDataFormat();
163
                if (dataDocMimeType == null) {
164
	                dataDocMimeType = 
165
	                	ObjectFormatCache.getFormat("application/octet-stream").getFmtid().getValue();
166
                }
167
                String dataDocLocalId = "";
168
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
169
                { //we only handle ecogrid urls right now
170
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
171
                            "ecogrid://knb/".length(), dataDocUrl.length());
172
                    //get the file
173
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
174
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
175
                    String dataDocText = streamToString(dataDocIs);
176
                    
177
                    //set the id
178
                    Identifier did = new Identifier();
179
                    did.setValue(dataDocLocalId);
180
                    
181
                    //add the desribeby to the eml's sysmeta
182
                    System.out.println("adding describe for doc " + 
183
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
184
                    sysmeta.addDescribe(did);
185
                    
186
                    //create sysmeta for the data doc                    
187
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
188
                    //overwrite the bogus values from the last call 
189
                    dataDocSysMeta.setIdentifier(did);
190
                    dataDocSysMeta.setObjectFormat(ObjectFormatCache.getFormat(dataDocMimeType));
191
                    Checksum checksum = new Checksum();
192
                    dataDocIs = stringToStream(dataDocText);
193
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
194
                    checksum.setAlgorithm(ca);
195
                    checksum.setValue(checksum(dataDocIs));
196
                    dataDocSysMeta.setChecksum(checksum);
197
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
198
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
199
                    boolean error = false;
200
                    //create the data doc on d1
201
                    try
202
                    {
203
                        mn.create(authtoken, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
204
                        mn.setAccess(authtoken, dataDocSysMeta.getIdentifier(), "public", "read", "allow", "allowFirst");
205
                    }
206
                    catch(Exception e)
207
                    {
208
                        error = true;
209
                        System.out.println("ERROR: Could not create data document with id " + 
210
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
211
                    }
212
                    finally
213
                    {
214
                        if(error)
215
                        {
216
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
217
                                    "FAILED.");
218
                        }
219
                        else
220
                        {
221
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
222
                                " which is described by " + sysmeta.getIdentifier().getValue());
223
                        }
224
                    }
225
                }
226
                else
227
                {
228
                    System.out.println("WARNING: Could not process describes url " +
229
                            dataDocUrl + " for document " + doc.docid + 
230
                    ".  Only ecogrid://knb/ urls are currently supported.");
231
                }
232
            }
233
            
234
            try
235
            {
236
              Identifier id = mn.create(authtoken, sysmeta.getIdentifier(), 
237
                    IOUtils.toInputStream(doc.doctext), sysmeta);
238
              System.out.println("Success inserting document " + id.getValue());
239
              
240
            }
241
            catch(Exception e)
242
            {
243
                e.printStackTrace();
244
                System.out.println("Could not create document with id " + 
245
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
246
                
247
            }
248
            finally
249
            {
250
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
251
            }
252
        }
253
        
254
        logout();
255
    }
256
    
257

    
258
    
259
    /**
260
     * @param doc
261
     * @return
262
     */
263
    private SystemMetadata generateSystemMetadata(Document doc)
264
      throws Exception
265
    {
266
        SystemMetadata sm = new SystemMetadata();
267
        //set the id
268
        Identifier id = new Identifier();
269
        id.setValue(doc.docid.trim());
270
        sm.setIdentifier(id);
271
        
272
        //set the object format
273
        ObjectFormat format = ObjectFormatCache.getFormat(doc.doctype);
274
        if(format == null)
275
        {
276
            if(doc.doctype.trim().equals("BIN"))
277
            {
278
                format = ObjectFormatCache.getFormat("application/octet-stream");
279
            }
280
            else
281
            {
282
                format = ObjectFormatCache.getFormat("text/plain");
283
            }
284
        }
285
        sm.setObjectFormat(format);
286
        
287
        //create the checksum
288
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
289
        String checksumS = checksum(bais);
290
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
291
        Checksum checksum = new Checksum();
292
        checksum.setValue(checksumS);
293
        checksum.setAlgorithm(ca);
294
        sm.setChecksum(checksum);
295
        
296
        //set the size
297
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
298
        
299
        //submitter
300
        Subject p = new Subject();
301
        p.setValue("unknown");
302
        sm.setSubmitter(p);
303
        sm.setRightsHolder(p);
304
        try
305
        {
306
            Date dateCreated = parseMetacatDate(doc.createDate);
307
            sm.setDateUploaded(dateCreated);
308
            Date dateUpdated = parseMetacatDate(doc.updateDate);
309
            sm.setDateSysMetadataModified(dateUpdated);
310
        }
311
        catch(Exception e)
312
        {
313
            System.out.println("couldn't parse a date: " + e.getMessage());
314
            Date dateCreated = new Date();
315
            sm.setDateUploaded(dateCreated);
316
            Date dateUpdated = new Date();
317
            sm.setDateSysMetadataModified(dateUpdated);
318
        }
319
        NodeReference nr = new NodeReference();
320
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
321
        sm.setOriginMemberNode(nr);
322
        sm.setAuthoritativeMemberNode(nr);
323
        
324
        return sm;
325
    }
326
    
327
    private void printHeader(String s)
328
    {
329
        System.out.println("****** " + s + " *******");
330
    }
331
    
332
    /**
333
     * produce an md5 checksum for item
334
     */
335
    private String checksum(InputStream is)
336
      throws Exception
337
    {        
338
        byte[] buffer = new byte[1024];
339
        MessageDigest complete = MessageDigest.getInstance("MD5");
340
        int numRead;
341
        
342
        do 
343
        {
344
          numRead = is.read(buffer);
345
          if (numRead > 0) 
346
          {
347
            complete.update(buffer, 0, numRead);
348
          }
349
        } while (numRead != -1);
350
        
351
        
352
        return getHex(complete.digest());
353
    }
354
    
355
    /**
356
     * convert a byte array to a hex string
357
     */
358
    private static String getHex( byte [] raw ) 
359
    {
360
        final String HEXES = "0123456789ABCDEF";
361
        if ( raw == null ) {
362
          return null;
363
        }
364
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
365
        for ( final byte b : raw ) {
366
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
367
             .append(HEXES.charAt((b & 0x0F)));
368
        }
369
        return hex.toString();
370
    }
371
    
372
    /**
373
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
374
     * a proper date object
375
     * @param date
376
     * @return
377
     */
378
    private Date parseMetacatDate(String date)
379
    {
380
        String year = date.substring(0, 4);
381
        String month = date.substring(5, 7);
382
        String day = date.substring(8, 10);
383
        Calendar c = Calendar.getInstance();
384
        c.set(new Integer(year).intValue(), 
385
              new Integer(month).intValue(), 
386
              new Integer(day).intValue());
387
        return c.getTime();
388
    }
389

    
390
    /**
391
     * send a request to the resource
392
     */
393
    private InputStream sendRequest(String contextRootUrl, String resource, 
394
            String sessionid, String method, String urlParamaters, 
395
            String contentType, InputStream dataStream) 
396
        throws Exception 
397
    {
398
        
399
        HttpURLConnection connection = null ;
400
        String restURL = contextRootUrl + resource;
401

    
402
        if (urlParamaters != null) {
403
            if (restURL.indexOf("?") == -1)             
404
                restURL += "?";
405
            restURL += urlParamaters; 
406
            if(restURL.indexOf(" ") != -1)
407
            {
408
                restURL = restURL.replaceAll("\\s", "%20");
409
            }
410
        }
411
        
412
        if(sessionid != null)
413
        {
414
            if(restURL.indexOf("?") == -1)
415
            {
416
                restURL += "?sessionid=" + sessionid;
417
            }
418
            else
419
            {
420
                restURL += "&sessionid=" + sessionid;
421
            }
422
        }
423

    
424
        URL u = null;
425
        InputStream content = null;
426
        System.out.println("url: " + restURL);
427
        System.out.println("method: " + method);
428
        u = new URL(restURL);
429
        connection = (HttpURLConnection) u.openConnection();
430
        if (contentType!=null) {
431
            connection.setRequestProperty("Content-Type",contentType);
432
        }
433

    
434
        connection.setDoOutput(true);
435
        connection.setDoInput(true);
436
        connection.setRequestMethod(method);
437

    
438
        if (!method.equals("GET")) {
439
            if (dataStream != null) {
440
                OutputStream out = connection.getOutputStream();
441
                IOUtils.copy(dataStream, out);
442
            }
443
        }
444

    
445
        return connection.getInputStream();   
446
    }
447
    
448
    /**
449
     * create a mime multipart message from object and sysmeta
450
     */
451
    private MimeMultipart createMimeMultipart(InputStream object)
452
      throws Exception
453
    {
454
        final MimeMultipart mmp = new MimeMultipart();
455
        MimeBodyPart objectPart = new MimeBodyPart();
456
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
457
        objectPart.setFileName("doctext");
458
        DataSource ds = new InputStreamDataSource("doctext", object);
459
        DataHandler dh = new DataHandler(ds);
460
        objectPart.setDataHandler(dh);
461
        mmp.addBodyPart(objectPart);
462
        return mmp;
463
    }
464
    
465
    /**
466
     * parse a metacat query response and return a vector of docids
467
     * @param response
468
     * @return
469
     */
470
    private Vector<Document> parseResponse(String response)
471
    {
472
        Vector<Document> v = new Vector<Document>();
473
        int dstart = response.indexOf("<document>");
474
        int dend = response.indexOf("</document>", dstart);
475
        while(dstart != -1)
476
        {
477
            String doc = response.substring(dstart + "<document>".length(), dend);
478
            //System.out.println("adding " + docid);
479
            Document d = new Document(getFieldFromDoc(doc, "docid"),
480
                    getFieldFromDoc(doc, "doctype"),
481
                    getFieldFromDoc(doc, "createdate"),
482
                    getFieldFromDoc(doc, "updatedate"));
483
            v.add(d);
484
            dstart = response.indexOf("<document>", dend);
485
            dend = response.indexOf("</document>", dstart);
486
        }
487
        
488
        return v;
489
    }
490
    
491
    private String getFieldFromDoc(String doc, String fieldname)
492
    {
493
        String field = "<" + fieldname + ">";
494
        String fieldend = "</" + fieldname + ">";
495
        int start = doc.indexOf(field);
496
        int end = doc.indexOf(fieldend);
497
        String s = doc.substring(start + field.length(), end);
498
        //System.out.println("field: " + fieldname + " : " + s);
499
        return s;
500
    }
501
    
502
    /**
503
     * login the source
504
     * @return
505
     * @throws Exception
506
     */
507
    private String loginSource()
508
      throws Exception
509
    {
510
        return login(sourceUrl);
511
    }
512
    
513
    /**
514
     * login the destination
515
     * @return
516
     * @throws Exception
517
     */
518
    private String loginDest()
519
        throws Exception
520
    {
521
        return login(destinationUrl);
522
    }
523
    
524
    /**
525
     * returns a sessionid
526
     * @return
527
     */
528
    private String login(String sourceUrl)
529
      throws Exception
530
    {
531
        InputStream is = getResponse(sourceUrl, "/metacat", 
532
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
533
        "POST");
534
        String response = streamToString(is);
535
        //System.out.println("response: " + response);
536
        if(response.indexOf("sessionId") == -1)
537
        {
538
            throw new Exception("Error logging into " + sourceUrl);
539
        }
540
        
541
        String sessionid = response.substring(
542
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
543
                response.indexOf("</sessionId>"));
544
        System.out.println("sessionid: " + sessionid);
545
        return sessionid;
546
    }
547
    
548
    /**
549
     * logout both the source and destination
550
     * @throws Exception
551
     */
552
    private void logout()
553
        throws Exception
554
    {
555
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
556
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
557
    }
558
    
559
    /**
560
     * get an http response
561
     * @param contextRootUrl
562
     * @param resource
563
     * @param urlParameters
564
     * @param method
565
     * @return
566
     * @throws Exception
567
     */
568
    private InputStream getResponse(String contextRootUrl, String resource, 
569
            String urlParameters, String method)
570
      throws Exception
571
    {
572
        HttpURLConnection connection = null ;
573

    
574
        String restURL = contextRootUrl+resource;
575

    
576
        if (urlParameters != null) {
577
            if (restURL.indexOf("?") == -1)             
578
                restURL += "?";
579
            restURL += urlParameters; 
580
            if(restURL.indexOf(" ") != -1)
581
            {
582
                restURL = restURL.replaceAll("\\s", "%20");
583
            }
584
        }
585

    
586
        URL u = null;
587
        InputStream content = null;            
588
        System.out.println("url: " + restURL);
589
        System.out.println("method: " + method);
590
        u = new URL(restURL);
591
        connection = (HttpURLConnection) u.openConnection();
592
        connection.setDoOutput(true);
593
        connection.setDoInput(true);
594
        connection.setRequestMethod(method);
595
        content = connection.getInputStream();
596
        return content;
597
    }
598
    
599
    private String streamToString(InputStream is)
600
        throws Exception
601
    {
602
        byte b[] = new byte[1024];
603
        int numread = is.read(b, 0, 1024);
604
        String response = new String();
605
        while(numread != -1)
606
        {
607
            response += new String(b, 0, numread);
608
            numread = is.read(b, 0, 1024);
609
        }
610
        return response;
611
    }
612
    
613
    private InputStream stringToStream(String s)
614
      throws Exception
615
    {
616
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
617
        return bais;
618
    }
619
    
620
    private class Document
621
    {
622
        public String docid;
623
        public String doctype;
624
        public String createDate;
625
        public String updateDate;
626
        public String doctext;
627
        
628
        public Document(String docid, String doctype, String createDate, String updateDate)
629
        {
630
            this.docid = docid.trim();
631
            this.doctype = doctype.trim();
632
            this.createDate = createDate.trim();
633
            this.updateDate = updateDate.trim();
634
        }
635
    }
636
}
(8-8/16)