Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.io.ByteArrayInputStream;
29
import java.io.InputStream;
30
import java.io.OutputStream;
31
import java.net.HttpURLConnection;
32
import java.net.URL;
33
import java.security.MessageDigest;
34
import java.util.Calendar;
35
import java.util.Date;
36
import java.util.Vector;
37

    
38
import javax.activation.DataHandler;
39
import javax.activation.DataSource;
40
import javax.mail.internet.MimeBodyPart;
41
import javax.mail.internet.MimeMultipart;
42

    
43
import org.apache.commons.io.IOUtils;
44
import org.dataone.client.D1Client;
45
import org.dataone.client.MNode;
46
import org.dataone.client.ObjectFormatCache;
47
import org.dataone.client.auth.CertificateManager;
48
import org.dataone.service.exceptions.NotFound;
49
import org.dataone.service.types.v1.AccessPolicy;
50
import org.dataone.service.types.v1.AccessRule;
51
import org.dataone.service.types.v1.Checksum;
52
import org.dataone.service.types.v1.ChecksumAlgorithm;
53
import org.dataone.service.types.v1.Identifier;
54
import org.dataone.service.types.v1.NodeReference;
55
import org.dataone.service.types.v1.ObjectFormat;
56
import org.dataone.service.types.v1.Permission;
57
import org.dataone.service.types.v1.Session;
58
import org.dataone.service.types.v1.Subject;
59
import org.dataone.service.types.v1.SystemMetadata;
60
import org.ecoinformatics.datamanager.DataManager;
61
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
62
import org.ecoinformatics.datamanager.parser.DataPackage;
63

    
64
import edu.ucsb.nceas.metacat.MetaCatServlet;
65
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
66
import edu.ucsb.nceas.metacat.properties.PropertyService;
67
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
68

    
69
/**
70
 * @author berkley
71
 * A class to populate a metacat instance based on documents returned from a query
72
 */
73
public class MetacatPopulator
74
{
75
    private String sourceUrl = null;
76
    private String destinationUrl = null;
77
    private String query = null;
78
    private String username = null;
79
    private String password = null;
80
    private Session session = null;
81
    
82
    /**
83
     * create a new MetacatPopulator with given source and destination urls.  
84
     * These should be
85
     * of the form "http://<url>/<metacat_instance>"
86
     * If username and/or password is null, the query will be run as public
87
     * @param sourceUrl
88
     * @param destUrl
89
     * @param query
90
     * @param username
91
     * @param password
92
     */
93
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
94
    {
95
        this.sourceUrl = sourceUrl;
96
        this.query = query;
97
        this.username = username;
98
        this.password = password;
99
        this.destinationUrl = destUrl;
100
        // TODO: set up certificate for D1 interaction
101
        CertificateManager.getInstance();
102
        this.session = new Session();
103
        Subject subject = new Subject();
104
        subject.setValue(username);
105
    }
106
    
107
    /**
108
     * populate from the source
109
     */
110
    public void populate()
111
      throws Exception
112
    {
113
        printHeader("Source login");
114
        String sourceSessionid = loginSource();
115
        
116
        //do a query
117
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
118
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
119
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
120
        params += "action=query&";
121
        params += "qformat=xml&";
122
        params += "anyfield=" + query;
123
        
124
        printHeader("Searching source");
125
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
126
        InputStream is = getResponse(sourceUrl, "/metacat",
127
                params, "POST");
128
        String response = streamToString(is);
129
        //System.out.println("response: " + response);
130
        Vector<Document> docs = parseResponse(response);
131
        
132
        
133
        printHeader("Parsing source results");
134
        System.out.println("creating MN with url: " + destinationUrl + "/");
135
        MNode mn = D1Client.getMN(destinationUrl + "/");
136
        
137
        printHeader("Processing " + docs.size() + " results.");
138
        printHeader("logging in to the destination " + destinationUrl);
139
        
140
        System.out.println("session: " + session.getSubject());
141
        for(int i=0; i<docs.size(); i++)
142
        {
143
            //for each document in the query
144
            Document doc = docs.get(i);
145
            String docid = doc.docid;
146
            //get the doc from source
147
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
148
            params = "action=read&qformat=xml&docid=" + docid;
149
            is = getResponse(sourceUrl, "/metacat", params, "POST");
150
            String doctext = streamToString(is);
151
            System.out.println("doctext: " + doctext);
152
            is = stringToStream(doctext);
153
            //parse the document
154
            DatabaseConnectionPoolInterface connectionPool = MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
155
        	DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
156
        	DataPackage dataPackage = dataManager.parseMetadata(is);
157
        	
158
            if(dataPackage == null)
159
            {
160
                continue;
161
            }
162
            //go through the DistributionMetadata and download any described data
163
            
164
            is = stringToStream(doctext);
165
            doc.doctext = doctext;
166

    
167
            printHeader("creating document on destination " + destinationUrl);            
168
            SystemMetadata sysmeta = generateSystemMetadata(doc);
169
            if (dataPackage.getEntityList() != null) {
170
	            for(int j=0; j < dataPackage.getEntityList().length; j++)
171
	            {
172
	                String dataDocUrl = dataPackage.getEntityList()[j].getURL();
173
	                String dataDocMimeType = 
174
	                	dataPackage.getEntityList()[j].getDataFormat();
175
	                if (dataDocMimeType == null) {
176
		                dataDocMimeType = 
177
		                	ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFmtid().getValue();
178
	                }
179
	                String dataDocLocalId = "";
180
	                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
181
	                { //we only handle ecogrid urls right now
182
	                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
183
	                            "ecogrid://knb/".length(), dataDocUrl.length());
184
	                    //get the file
185
	                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
186
	                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
187
	                    String dataDocText = streamToString(dataDocIs);
188
	                    
189
	                    //set the id
190
	                    Identifier did = new Identifier();
191
	                    did.setValue(dataDocLocalId);
192
	                    
193
	                    //add the desribeby to the eml's sysmeta
194
	                    // TODO Use ORE
195
//	                    System.out.println("adding describe for doc " + 
196
//	                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
197
//	                    sysmeta.addDescribe(did);
198
	                    
199
	                    //create sysmeta for the data doc                    
200
	                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
201
	                    //overwrite the bogus values from the last call 
202
	                    dataDocSysMeta.setIdentifier(did);
203
	                    ObjectFormat format = null;
204
	                    try {
205
	                    	format = ObjectFormatCache.getInstance().getFormat(dataDocMimeType);
206
	                    } catch (NotFound e) {
207
							System.out.println(e.getMessage());
208
						}
209
						dataDocSysMeta.setObjectFormat(format);
210
	                    Checksum checksum = new Checksum();
211
	                    dataDocIs = stringToStream(dataDocText);
212
	                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
213
	                    checksum.setAlgorithm(ca);
214
	                    checksum.setValue(checksum(dataDocIs));
215
	                    dataDocSysMeta.setChecksum(checksum);
216
	                    dataDocSysMeta.setSize(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
217
	                    // TODO use ORE map
218
	                    //dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
219
	                    boolean error = false;
220
	                    
221
	                    // create access policy
222
	                    //"public", "read", "allow", "allowFirst"
223
	                    AccessPolicy accessPolicy = new AccessPolicy();
224
	                    AccessRule accessRule = new AccessRule();
225
						accessRule.addPermission(Permission.READ);
226
	                    Subject subject = new Subject();
227
	                    subject.setValue("public");
228
						accessRule.addSubject(subject );
229
						accessPolicy.addAllow(accessRule );
230
	                    //create the data doc on d1
231
	                    try
232
	                    {
233
	                        mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
234
							mn.setAccessPolicy(session, dataDocSysMeta.getIdentifier(), accessPolicy);
235
	                    }
236
	                    catch(Exception e)
237
	                    {
238
	                        error = true;
239
	                        System.out.println("ERROR: Could not create data document with id " + 
240
	                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
241
	                    }
242
	                    finally
243
	                    {
244
	                        if(error)
245
	                        {
246
	                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
247
	                                    "FAILED.");
248
	                        }
249
	                        else
250
	                        {
251
	                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
252
	                                " which is described by " + sysmeta.getIdentifier().getValue());
253
	                        }
254
	                    }
255
	                }
256
	                else
257
	                {
258
	                    System.out.println("WARNING: Could not process describes url " +
259
	                            dataDocUrl + " for document " + doc.docid + 
260
	                    ".  Only ecogrid://knb/ urls are currently supported.");
261
	                }
262
	            }
263
            }
264
            
265
            try
266
            {
267
              Identifier id = mn.create(session, sysmeta.getIdentifier(), 
268
                    IOUtils.toInputStream(doc.doctext), sysmeta);
269
              System.out.println("Success inserting document " + id.getValue());
270
              
271
            }
272
            catch(Exception e)
273
            {
274
                e.printStackTrace();
275
                System.out.println("Could not create document with id " + 
276
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
277
                
278
            }
279
            finally
280
            {
281
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
282
            }
283
        }
284
        
285
        logout();
286
    }
287
    
288

    
289
    
290
    /**
291
     * @param doc
292
     * @return
293
     */
294
    private SystemMetadata generateSystemMetadata(Document doc)
295
      throws Exception
296
    {
297
        SystemMetadata sm = new SystemMetadata();
298
        //set the id
299
        Identifier id = new Identifier();
300
        id.setValue(doc.docid.trim());
301
        sm.setIdentifier(id);
302
        
303
        //set the object format
304
        ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype);
305
        if(format == null)
306
        {
307
            if(doc.doctype.trim().equals("BIN"))
308
            {
309
                format = ObjectFormatCache.getInstance().getFormat("application/octet-stream");
310
            }
311
            else
312
            {
313
                format = ObjectFormatCache.getInstance().getFormat("text/plain");
314
            }
315
        }
316
        sm.setObjectFormat(format);
317
        
318
        //create the checksum
319
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
320
        String checksumS = checksum(bais);
321
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
322
        Checksum checksum = new Checksum();
323
        checksum.setValue(checksumS);
324
        checksum.setAlgorithm(ca);
325
        sm.setChecksum(checksum);
326
        
327
        //set the size
328
        sm.setSize(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
329
        
330
        //submitter
331
        Subject p = new Subject();
332
        p.setValue("unknown");
333
        sm.setSubmitter(p);
334
        sm.setRightsHolder(p);
335
        try
336
        {
337
            Date dateCreated = parseMetacatDate(doc.createDate);
338
            sm.setDateUploaded(dateCreated);
339
            Date dateUpdated = parseMetacatDate(doc.updateDate);
340
            sm.setDateSysMetadataModified(dateUpdated);
341
        }
342
        catch(Exception e)
343
        {
344
            System.out.println("couldn't parse a date: " + e.getMessage());
345
            Date dateCreated = new Date();
346
            sm.setDateUploaded(dateCreated);
347
            Date dateUpdated = new Date();
348
            sm.setDateSysMetadataModified(dateUpdated);
349
        }
350
        NodeReference nr = new NodeReference();
351
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
352
        sm.setOriginMemberNode(nr);
353
        sm.setAuthoritativeMemberNode(nr);
354
        
355
        return sm;
356
    }
357
    
358
    private void printHeader(String s)
359
    {
360
        System.out.println("****** " + s + " *******");
361
    }
362
    
363
    /**
364
     * produce an md5 checksum for item
365
     */
366
    private String checksum(InputStream is)
367
      throws Exception
368
    {        
369
        byte[] buffer = new byte[1024];
370
        MessageDigest complete = MessageDigest.getInstance("MD5");
371
        int numRead;
372
        
373
        do 
374
        {
375
          numRead = is.read(buffer);
376
          if (numRead > 0) 
377
          {
378
            complete.update(buffer, 0, numRead);
379
          }
380
        } while (numRead != -1);
381
        
382
        
383
        return getHex(complete.digest());
384
    }
385
    
386
    /**
387
     * convert a byte array to a hex string
388
     */
389
    private static String getHex( byte [] raw ) 
390
    {
391
        final String HEXES = "0123456789ABCDEF";
392
        if ( raw == null ) {
393
          return null;
394
        }
395
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
396
        for ( final byte b : raw ) {
397
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
398
             .append(HEXES.charAt((b & 0x0F)));
399
        }
400
        return hex.toString();
401
    }
402
    
403
    /**
404
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
405
     * a proper date object
406
     * @param date
407
     * @return
408
     */
409
    private Date parseMetacatDate(String date)
410
    {
411
        String year = date.substring(0, 4);
412
        String month = date.substring(5, 7);
413
        String day = date.substring(8, 10);
414
        Calendar c = Calendar.getInstance();
415
        c.set(new Integer(year).intValue(), 
416
              new Integer(month).intValue(), 
417
              new Integer(day).intValue());
418
        return c.getTime();
419
    }
420

    
421
    /**
422
     * send a request to the resource
423
     */
424
    private InputStream sendRequest(String contextRootUrl, String resource, 
425
            String sessionid, String method, String urlParamaters, 
426
            String contentType, InputStream dataStream) 
427
        throws Exception 
428
    {
429
        
430
        HttpURLConnection connection = null ;
431
        String restURL = contextRootUrl + resource;
432

    
433
        if (urlParamaters != null) {
434
            if (restURL.indexOf("?") == -1)             
435
                restURL += "?";
436
            restURL += urlParamaters; 
437
            if(restURL.indexOf(" ") != -1)
438
            {
439
                restURL = restURL.replaceAll("\\s", "%20");
440
            }
441
        }
442
        
443
        if(sessionid != null)
444
        {
445
            if(restURL.indexOf("?") == -1)
446
            {
447
                restURL += "?sessionid=" + sessionid;
448
            }
449
            else
450
            {
451
                restURL += "&sessionid=" + sessionid;
452
            }
453
        }
454

    
455
        URL u = null;
456
        InputStream content = null;
457
        System.out.println("url: " + restURL);
458
        System.out.println("method: " + method);
459
        u = new URL(restURL);
460
        connection = (HttpURLConnection) u.openConnection();
461
        if (contentType!=null) {
462
            connection.setRequestProperty("Content-Type",contentType);
463
        }
464

    
465
        connection.setDoOutput(true);
466
        connection.setDoInput(true);
467
        connection.setRequestMethod(method);
468

    
469
        if (!method.equals("GET")) {
470
            if (dataStream != null) {
471
                OutputStream out = connection.getOutputStream();
472
                IOUtils.copy(dataStream, out);
473
            }
474
        }
475

    
476
        return connection.getInputStream();   
477
    }
478
    
479
    /**
480
     * create a mime multipart message from object and sysmeta
481
     */
482
    private MimeMultipart createMimeMultipart(InputStream object)
483
      throws Exception
484
    {
485
        final MimeMultipart mmp = new MimeMultipart();
486
        MimeBodyPart objectPart = new MimeBodyPart();
487
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
488
        objectPart.setFileName("doctext");
489
        DataSource ds = new InputStreamDataSource("doctext", object);
490
        DataHandler dh = new DataHandler(ds);
491
        objectPart.setDataHandler(dh);
492
        mmp.addBodyPart(objectPart);
493
        return mmp;
494
    }
495
    
496
    /**
497
     * parse a metacat query response and return a vector of docids
498
     * @param response
499
     * @return
500
     */
501
    private Vector<Document> parseResponse(String response)
502
    {
503
        Vector<Document> v = new Vector<Document>();
504
        int dstart = response.indexOf("<document>");
505
        int dend = response.indexOf("</document>", dstart);
506
        while(dstart != -1)
507
        {
508
            String doc = response.substring(dstart + "<document>".length(), dend);
509
            //System.out.println("adding " + docid);
510
            Document d = new Document(getFieldFromDoc(doc, "docid"),
511
                    getFieldFromDoc(doc, "doctype"),
512
                    getFieldFromDoc(doc, "createdate"),
513
                    getFieldFromDoc(doc, "updatedate"));
514
            v.add(d);
515
            dstart = response.indexOf("<document>", dend);
516
            dend = response.indexOf("</document>", dstart);
517
        }
518
        
519
        return v;
520
    }
521
    
522
    private String getFieldFromDoc(String doc, String fieldname)
523
    {
524
        String field = "<" + fieldname + ">";
525
        String fieldend = "</" + fieldname + ">";
526
        int start = doc.indexOf(field);
527
        int end = doc.indexOf(fieldend);
528
        String s = doc.substring(start + field.length(), end);
529
        //System.out.println("field: " + fieldname + " : " + s);
530
        return s;
531
    }
532
    
533
    /**
534
     * login the source
535
     * @return
536
     * @throws Exception
537
     */
538
    private String loginSource()
539
      throws Exception
540
    {
541
        return login(sourceUrl);
542
    }
543
    
544
    
545
    /**
546
     * returns a sessionid
547
     * @return
548
     */
549
    private String login(String sourceUrl)
550
      throws Exception
551
    {
552
        InputStream is = getResponse(sourceUrl, "/metacat", 
553
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", "POST");
554
        String response = streamToString(is);
555
        //System.out.println("response: " + response);
556
        if(response.indexOf("sessionId") == -1)
557
        {
558
            throw new Exception("Error logging into " + sourceUrl);
559
        }
560
        
561
        String sessionid = response.substring(
562
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
563
                response.indexOf("</sessionId>"));
564
        System.out.println("sessionid: " + sessionid);
565
        return sessionid;
566
    }
567
    
568
    /**
569
     * logout both the source and destination
570
     * @throws Exception
571
     */
572
    private void logout()
573
        throws Exception
574
    {
575
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
576
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
577
    }
578
    
579
    /**
580
     * get an http response
581
     * @param contextRootUrl
582
     * @param resource
583
     * @param urlParameters
584
     * @param method
585
     * @return
586
     * @throws Exception
587
     */
588
    private InputStream getResponse(String contextRootUrl, String resource, 
589
            String urlParameters, String method)
590
      throws Exception
591
    {
592
        HttpURLConnection connection = null ;
593

    
594
        String restURL = contextRootUrl+resource;
595

    
596
        if (urlParameters != null) {
597
            if (restURL.indexOf("?") == -1)             
598
                restURL += "?";
599
            restURL += urlParameters; 
600
            if(restURL.indexOf(" ") != -1)
601
            {
602
                restURL = restURL.replaceAll("\\s", "%20");
603
            }
604
        }
605

    
606
        URL u = null;
607
        InputStream content = null;            
608
        System.out.println("url: " + restURL);
609
        System.out.println("method: " + method);
610
        u = new URL(restURL);
611
        connection = (HttpURLConnection) u.openConnection();
612
        connection.setDoOutput(true);
613
        connection.setDoInput(true);
614
        connection.setRequestMethod(method);
615
        content = connection.getInputStream();
616
        return content;
617
    }
618
    
619
    private String streamToString(InputStream is)
620
        throws Exception
621
    {
622
        byte b[] = new byte[1024];
623
        int numread = is.read(b, 0, 1024);
624
        String response = new String();
625
        while(numread != -1)
626
        {
627
            response += new String(b, 0, numread);
628
            numread = is.read(b, 0, 1024);
629
        }
630
        return response;
631
    }
632
    
633
    private InputStream stringToStream(String s)
634
      throws Exception
635
    {
636
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
637
        return bais;
638
    }
639
    
640
    private class Document
641
    {
642
        public String docid;
643
        public String doctype;
644
        public String createDate;
645
        public String updateDate;
646
        public String doctext;
647
        
648
        public Document(String docid, String doctype, String createDate, String updateDate)
649
        {
650
            this.docid = docid.trim();
651
            this.doctype = doctype.trim();
652
            this.createDate = createDate.trim();
653
            this.updateDate = updateDate.trim();
654
        }
655
    }
656
}
(8-8/16)