Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.io.ByteArrayInputStream;
29
import java.io.InputStream;
30
import java.io.OutputStream;
31
import java.net.HttpURLConnection;
32
import java.net.URL;
33
import java.security.MessageDigest;
34
import java.util.Calendar;
35
import java.util.Date;
36
import java.util.Vector;
37

    
38
import javax.activation.DataHandler;
39
import javax.activation.DataSource;
40
import javax.mail.internet.MimeBodyPart;
41
import javax.mail.internet.MimeMultipart;
42

    
43
import org.apache.commons.io.IOUtils;
44
import org.dataone.client.D1Client;
45
import org.dataone.client.MNode;
46
import org.dataone.client.ObjectFormatCache;
47
import org.dataone.client.auth.CertificateManager;
48
import org.dataone.service.types.AccessPolicy;
49
import org.dataone.service.types.AccessRule;
50
import org.dataone.service.types.Checksum;
51
import org.dataone.service.types.ChecksumAlgorithm;
52
import org.dataone.service.types.Identifier;
53
import org.dataone.service.types.NodeReference;
54
import org.dataone.service.types.ObjectFormat;
55
import org.dataone.service.types.Permission;
56
import org.dataone.service.types.Session;
57
import org.dataone.service.types.Subject;
58
import org.dataone.service.types.SystemMetadata;
59
import org.ecoinformatics.datamanager.DataManager;
60
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
61
import org.ecoinformatics.datamanager.parser.DataPackage;
62

    
63
import edu.ucsb.nceas.metacat.MetaCatServlet;
64
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
65
import edu.ucsb.nceas.metacat.properties.PropertyService;
66
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
67

    
68
/**
69
 * @author berkley
70
 * A class to populate a metacat instance based on documents returned from a query
71
 */
72
public class MetacatPopulator
73
{
74
    private String sourceUrl = null;
75
    private String destinationUrl = null;
76
    private String query = null;
77
    private String username = null;
78
    private String password = null;
79
    private Session session = null;
80
    
81
    /**
82
     * create a new MetacatPopulator with given source and destination urls.  
83
     * These should be
84
     * of the form "http://<url>/<metacat_instance>"
85
     * If username and/or password is null, the query will be run as public
86
     * @param sourceUrl
87
     * @param destUrl
88
     * @param query
89
     * @param username
90
     * @param password
91
     */
92
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
93
    {
94
        this.sourceUrl = sourceUrl;
95
        this.query = query;
96
        this.username = username;
97
        this.password = password;
98
        this.destinationUrl = destUrl;
99
        // TODO: set up certificate for D1 interaction
100
        CertificateManager.getInstance();
101
        this.session = new Session();
102
        Subject subject = new Subject();
103
        subject.setValue(username);
104
    }
105
    
106
    /**
107
     * populate from the source
108
     */
109
    public void populate()
110
      throws Exception
111
    {
112
        printHeader("Source login");
113
        String sourceSessionid = loginSource();
114
        
115
        //do a query
116
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
117
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
118
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
119
        params += "action=query&";
120
        params += "qformat=xml&";
121
        params += "anyfield=" + query;
122
        
123
        printHeader("Searching source");
124
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
125
        InputStream is = getResponse(sourceUrl, "/metacat",
126
                params, "POST");
127
        String response = streamToString(is);
128
        //System.out.println("response: " + response);
129
        Vector<Document> docs = parseResponse(response);
130
        
131
        
132
        printHeader("Parsing source results");
133
        System.out.println("creating MN with url: " + destinationUrl + "/");
134
        MNode mn = D1Client.getMN(destinationUrl + "/");
135
        
136
        printHeader("Processing " + docs.size() + " results.");
137
        printHeader("logging in to the destination " + destinationUrl);
138
        
139
        System.out.println("session: " + session.getSubject());
140
        for(int i=0; i<docs.size(); i++)
141
        {
142
            //for each document in the query
143
            Document doc = docs.get(i);
144
            String docid = doc.docid;
145
            //get the doc from source
146
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
147
            params = "action=read&qformat=xml&docid=" + docid;
148
            is = getResponse(sourceUrl, "/metacat", params, "POST");
149
            String doctext = streamToString(is);
150
            System.out.println("doctext: " + doctext);
151
            is = stringToStream(doctext);
152
            //parse the document
153
            DatabaseConnectionPoolInterface connectionPool = MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
154
        	DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
155
        	DataPackage dataPackage = dataManager.parseMetadata(is);
156
        	
157
            if(dataPackage == null)
158
            {
159
                continue;
160
            }
161
            //go through the DistributionMetadata and download any described data
162
            
163
            is = stringToStream(doctext);
164
            doc.doctext = doctext;
165

    
166
            printHeader("creating document on destination " + destinationUrl);            
167
            SystemMetadata sysmeta = generateSystemMetadata(doc);
168
            for(int j=0; j < dataPackage.getEntityList().length; j++)
169
            {
170
                String dataDocUrl = dataPackage.getEntityList()[j].getURL();
171
                String dataDocMimeType = 
172
                	dataPackage.getEntityList()[j].getDataFormat();
173
                if (dataDocMimeType == null) {
174
	                dataDocMimeType = 
175
	                	ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFmtid().getValue();
176
                }
177
                String dataDocLocalId = "";
178
                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
179
                { //we only handle ecogrid urls right now
180
                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
181
                            "ecogrid://knb/".length(), dataDocUrl.length());
182
                    //get the file
183
                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
184
                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
185
                    String dataDocText = streamToString(dataDocIs);
186
                    
187
                    //set the id
188
                    Identifier did = new Identifier();
189
                    did.setValue(dataDocLocalId);
190
                    
191
                    //add the desribeby to the eml's sysmeta
192
                    System.out.println("adding describe for doc " + 
193
                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
194
                    sysmeta.addDescribe(did);
195
                    
196
                    //create sysmeta for the data doc                    
197
                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
198
                    //overwrite the bogus values from the last call 
199
                    dataDocSysMeta.setIdentifier(did);
200
                    dataDocSysMeta.setObjectFormat(ObjectFormatCache.getInstance().getFormat(dataDocMimeType));
201
                    Checksum checksum = new Checksum();
202
                    dataDocIs = stringToStream(dataDocText);
203
                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
204
                    checksum.setAlgorithm(ca);
205
                    checksum.setValue(checksum(dataDocIs));
206
                    dataDocSysMeta.setChecksum(checksum);
207
                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
208
                    dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
209
                    boolean error = false;
210
                    
211
                    // create access policy
212
                    //"public", "read", "allow", "allowFirst"
213
                    AccessPolicy accessPolicy = new AccessPolicy();
214
                    AccessRule accessRule = new AccessRule();
215
					accessRule.addPermission(Permission.READ);
216
                    Subject subject = new Subject();
217
                    subject.setValue("public");
218
					accessRule.addSubject(subject );
219
					accessPolicy.addAllow(accessRule );
220
                    //create the data doc on d1
221
                    try
222
                    {
223
                        mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
224
						mn.setAccessPolicy(session, dataDocSysMeta.getIdentifier(), accessPolicy);
225
                    }
226
                    catch(Exception e)
227
                    {
228
                        error = true;
229
                        System.out.println("ERROR: Could not create data document with id " + 
230
                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
231
                    }
232
                    finally
233
                    {
234
                        if(error)
235
                        {
236
                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
237
                                    "FAILED.");
238
                        }
239
                        else
240
                        {
241
                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
242
                                " which is described by " + sysmeta.getIdentifier().getValue());
243
                        }
244
                    }
245
                }
246
                else
247
                {
248
                    System.out.println("WARNING: Could not process describes url " +
249
                            dataDocUrl + " for document " + doc.docid + 
250
                    ".  Only ecogrid://knb/ urls are currently supported.");
251
                }
252
            }
253
            
254
            try
255
            {
256
              Identifier id = mn.create(session, sysmeta.getIdentifier(), 
257
                    IOUtils.toInputStream(doc.doctext), sysmeta);
258
              System.out.println("Success inserting document " + id.getValue());
259
              
260
            }
261
            catch(Exception e)
262
            {
263
                e.printStackTrace();
264
                System.out.println("Could not create document with id " + 
265
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
266
                
267
            }
268
            finally
269
            {
270
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
271
            }
272
        }
273
        
274
        logout();
275
    }
276
    
277

    
278
    
279
    /**
280
     * @param doc
281
     * @return
282
     */
283
    private SystemMetadata generateSystemMetadata(Document doc)
284
      throws Exception
285
    {
286
        SystemMetadata sm = new SystemMetadata();
287
        //set the id
288
        Identifier id = new Identifier();
289
        id.setValue(doc.docid.trim());
290
        sm.setIdentifier(id);
291
        
292
        //set the object format
293
        ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype);
294
        if(format == null)
295
        {
296
            if(doc.doctype.trim().equals("BIN"))
297
            {
298
                format = ObjectFormatCache.getInstance().getFormat("application/octet-stream");
299
            }
300
            else
301
            {
302
                format = ObjectFormatCache.getInstance().getFormat("text/plain");
303
            }
304
        }
305
        sm.setObjectFormat(format);
306
        
307
        //create the checksum
308
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
309
        String checksumS = checksum(bais);
310
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
311
        Checksum checksum = new Checksum();
312
        checksum.setValue(checksumS);
313
        checksum.setAlgorithm(ca);
314
        sm.setChecksum(checksum);
315
        
316
        //set the size
317
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
318
        
319
        //submitter
320
        Subject p = new Subject();
321
        p.setValue("unknown");
322
        sm.setSubmitter(p);
323
        sm.setRightsHolder(p);
324
        try
325
        {
326
            Date dateCreated = parseMetacatDate(doc.createDate);
327
            sm.setDateUploaded(dateCreated);
328
            Date dateUpdated = parseMetacatDate(doc.updateDate);
329
            sm.setDateSysMetadataModified(dateUpdated);
330
        }
331
        catch(Exception e)
332
        {
333
            System.out.println("couldn't parse a date: " + e.getMessage());
334
            Date dateCreated = new Date();
335
            sm.setDateUploaded(dateCreated);
336
            Date dateUpdated = new Date();
337
            sm.setDateSysMetadataModified(dateUpdated);
338
        }
339
        NodeReference nr = new NodeReference();
340
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
341
        sm.setOriginMemberNode(nr);
342
        sm.setAuthoritativeMemberNode(nr);
343
        
344
        return sm;
345
    }
346
    
347
    private void printHeader(String s)
348
    {
349
        System.out.println("****** " + s + " *******");
350
    }
351
    
352
    /**
353
     * produce an md5 checksum for item
354
     */
355
    private String checksum(InputStream is)
356
      throws Exception
357
    {        
358
        byte[] buffer = new byte[1024];
359
        MessageDigest complete = MessageDigest.getInstance("MD5");
360
        int numRead;
361
        
362
        do 
363
        {
364
          numRead = is.read(buffer);
365
          if (numRead > 0) 
366
          {
367
            complete.update(buffer, 0, numRead);
368
          }
369
        } while (numRead != -1);
370
        
371
        
372
        return getHex(complete.digest());
373
    }
374
    
375
    /**
376
     * convert a byte array to a hex string
377
     */
378
    private static String getHex( byte [] raw ) 
379
    {
380
        final String HEXES = "0123456789ABCDEF";
381
        if ( raw == null ) {
382
          return null;
383
        }
384
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
385
        for ( final byte b : raw ) {
386
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
387
             .append(HEXES.charAt((b & 0x0F)));
388
        }
389
        return hex.toString();
390
    }
391
    
392
    /**
393
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
394
     * a proper date object
395
     * @param date
396
     * @return
397
     */
398
    private Date parseMetacatDate(String date)
399
    {
400
        String year = date.substring(0, 4);
401
        String month = date.substring(5, 7);
402
        String day = date.substring(8, 10);
403
        Calendar c = Calendar.getInstance();
404
        c.set(new Integer(year).intValue(), 
405
              new Integer(month).intValue(), 
406
              new Integer(day).intValue());
407
        return c.getTime();
408
    }
409

    
410
    /**
411
     * send a request to the resource
412
     */
413
    private InputStream sendRequest(String contextRootUrl, String resource, 
414
            String sessionid, String method, String urlParamaters, 
415
            String contentType, InputStream dataStream) 
416
        throws Exception 
417
    {
418
        
419
        HttpURLConnection connection = null ;
420
        String restURL = contextRootUrl + resource;
421

    
422
        if (urlParamaters != null) {
423
            if (restURL.indexOf("?") == -1)             
424
                restURL += "?";
425
            restURL += urlParamaters; 
426
            if(restURL.indexOf(" ") != -1)
427
            {
428
                restURL = restURL.replaceAll("\\s", "%20");
429
            }
430
        }
431
        
432
        if(sessionid != null)
433
        {
434
            if(restURL.indexOf("?") == -1)
435
            {
436
                restURL += "?sessionid=" + sessionid;
437
            }
438
            else
439
            {
440
                restURL += "&sessionid=" + sessionid;
441
            }
442
        }
443

    
444
        URL u = null;
445
        InputStream content = null;
446
        System.out.println("url: " + restURL);
447
        System.out.println("method: " + method);
448
        u = new URL(restURL);
449
        connection = (HttpURLConnection) u.openConnection();
450
        if (contentType!=null) {
451
            connection.setRequestProperty("Content-Type",contentType);
452
        }
453

    
454
        connection.setDoOutput(true);
455
        connection.setDoInput(true);
456
        connection.setRequestMethod(method);
457

    
458
        if (!method.equals("GET")) {
459
            if (dataStream != null) {
460
                OutputStream out = connection.getOutputStream();
461
                IOUtils.copy(dataStream, out);
462
            }
463
        }
464

    
465
        return connection.getInputStream();   
466
    }
467
    
468
    /**
469
     * create a mime multipart message from object and sysmeta
470
     */
471
    private MimeMultipart createMimeMultipart(InputStream object)
472
      throws Exception
473
    {
474
        final MimeMultipart mmp = new MimeMultipart();
475
        MimeBodyPart objectPart = new MimeBodyPart();
476
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
477
        objectPart.setFileName("doctext");
478
        DataSource ds = new InputStreamDataSource("doctext", object);
479
        DataHandler dh = new DataHandler(ds);
480
        objectPart.setDataHandler(dh);
481
        mmp.addBodyPart(objectPart);
482
        return mmp;
483
    }
484
    
485
    /**
486
     * parse a metacat query response and return a vector of docids
487
     * @param response
488
     * @return
489
     */
490
    private Vector<Document> parseResponse(String response)
491
    {
492
        Vector<Document> v = new Vector<Document>();
493
        int dstart = response.indexOf("<document>");
494
        int dend = response.indexOf("</document>", dstart);
495
        while(dstart != -1)
496
        {
497
            String doc = response.substring(dstart + "<document>".length(), dend);
498
            //System.out.println("adding " + docid);
499
            Document d = new Document(getFieldFromDoc(doc, "docid"),
500
                    getFieldFromDoc(doc, "doctype"),
501
                    getFieldFromDoc(doc, "createdate"),
502
                    getFieldFromDoc(doc, "updatedate"));
503
            v.add(d);
504
            dstart = response.indexOf("<document>", dend);
505
            dend = response.indexOf("</document>", dstart);
506
        }
507
        
508
        return v;
509
    }
510
    
511
    private String getFieldFromDoc(String doc, String fieldname)
512
    {
513
        String field = "<" + fieldname + ">";
514
        String fieldend = "</" + fieldname + ">";
515
        int start = doc.indexOf(field);
516
        int end = doc.indexOf(fieldend);
517
        String s = doc.substring(start + field.length(), end);
518
        //System.out.println("field: " + fieldname + " : " + s);
519
        return s;
520
    }
521
    
522
    /**
523
     * login the source
524
     * @return
525
     * @throws Exception
526
     */
527
    private String loginSource()
528
      throws Exception
529
    {
530
        return login(sourceUrl);
531
    }
532
    
533
    
534
    /**
535
     * returns a sessionid
536
     * @return
537
     */
538
    private String login(String sourceUrl)
539
      throws Exception
540
    {
541
        InputStream is = getResponse(sourceUrl, "/metacat", 
542
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", "POST");
543
        String response = streamToString(is);
544
        //System.out.println("response: " + response);
545
        if(response.indexOf("sessionId") == -1)
546
        {
547
            throw new Exception("Error logging into " + sourceUrl);
548
        }
549
        
550
        String sessionid = response.substring(
551
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
552
                response.indexOf("</sessionId>"));
553
        System.out.println("sessionid: " + sessionid);
554
        return sessionid;
555
    }
556
    
557
    /**
558
     * logout both the source and destination
559
     * @throws Exception
560
     */
561
    private void logout()
562
        throws Exception
563
    {
564
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
565
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
566
    }
567
    
568
    /**
569
     * get an http response
570
     * @param contextRootUrl
571
     * @param resource
572
     * @param urlParameters
573
     * @param method
574
     * @return
575
     * @throws Exception
576
     */
577
    private InputStream getResponse(String contextRootUrl, String resource, 
578
            String urlParameters, String method)
579
      throws Exception
580
    {
581
        HttpURLConnection connection = null ;
582

    
583
        String restURL = contextRootUrl+resource;
584

    
585
        if (urlParameters != null) {
586
            if (restURL.indexOf("?") == -1)             
587
                restURL += "?";
588
            restURL += urlParameters; 
589
            if(restURL.indexOf(" ") != -1)
590
            {
591
                restURL = restURL.replaceAll("\\s", "%20");
592
            }
593
        }
594

    
595
        URL u = null;
596
        InputStream content = null;            
597
        System.out.println("url: " + restURL);
598
        System.out.println("method: " + method);
599
        u = new URL(restURL);
600
        connection = (HttpURLConnection) u.openConnection();
601
        connection.setDoOutput(true);
602
        connection.setDoInput(true);
603
        connection.setRequestMethod(method);
604
        content = connection.getInputStream();
605
        return content;
606
    }
607
    
608
    private String streamToString(InputStream is)
609
        throws Exception
610
    {
611
        byte b[] = new byte[1024];
612
        int numread = is.read(b, 0, 1024);
613
        String response = new String();
614
        while(numread != -1)
615
        {
616
            response += new String(b, 0, numread);
617
            numread = is.read(b, 0, 1024);
618
        }
619
        return response;
620
    }
621
    
622
    private InputStream stringToStream(String s)
623
      throws Exception
624
    {
625
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
626
        return bais;
627
    }
628
    
629
    private class Document
630
    {
631
        public String docid;
632
        public String doctype;
633
        public String createDate;
634
        public String updateDate;
635
        public String doctext;
636
        
637
        public Document(String docid, String doctype, String createDate, String updateDate)
638
        {
639
            this.docid = docid.trim();
640
            this.doctype = doctype.trim();
641
            this.createDate = createDate.trim();
642
            this.updateDate = updateDate.trim();
643
        }
644
    }
645
}
(8-8/16)