Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.io.ByteArrayInputStream;
29
import java.io.InputStream;
30
import java.io.OutputStream;
31
import java.math.BigInteger;
32
import java.net.HttpURLConnection;
33
import java.net.URL;
34
import java.security.MessageDigest;
35
import java.util.Calendar;
36
import java.util.Date;
37
import java.util.Vector;
38

    
39
import javax.activation.DataHandler;
40
import javax.activation.DataSource;
41
import javax.mail.internet.MimeBodyPart;
42
import javax.mail.internet.MimeMultipart;
43

    
44
import org.apache.commons.io.IOUtils;
45
import org.dataone.client.D1Client;
46
import org.dataone.client.MNode;
47
import org.dataone.client.ObjectFormatCache;
48
import org.dataone.client.auth.CertificateManager;
49
import org.dataone.service.exceptions.NotFound;
50
import org.dataone.service.types.v1.AccessPolicy;
51
import org.dataone.service.types.v1.AccessRule;
52
import org.dataone.service.types.v1.Checksum;
53
import org.dataone.service.types.v1.ChecksumAlgorithm;
54
import org.dataone.service.types.v1.Identifier;
55
import org.dataone.service.types.v1.NodeReference;
56
import org.dataone.service.types.v1.ObjectFormat;
57
import org.dataone.service.types.v1.Permission;
58
import org.dataone.service.types.v1.Session;
59
import org.dataone.service.types.v1.Subject;
60
import org.dataone.service.types.v1.SystemMetadata;
61
import org.ecoinformatics.datamanager.DataManager;
62
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
63
import org.ecoinformatics.datamanager.parser.DataPackage;
64

    
65
import edu.ucsb.nceas.metacat.MetaCatServlet;
66
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
67
import edu.ucsb.nceas.metacat.properties.PropertyService;
68
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
69

    
70
/**
71
 * @author berkley
72
 * A class to populate a metacat instance based on documents returned from a query
73
 */
74
public class MetacatPopulator
75
{
76
    private String sourceUrl = null;
77
    private String destinationUrl = null;
78
    private String query = null;
79
    private String username = null;
80
    private String password = null;
81
    private Session session = null;
82
    
83
    /**
84
     * create a new MetacatPopulator with given source and destination urls.  
85
     * These should be
86
     * of the form "http://<url>/<metacat_instance>"
87
     * If username and/or password is null, the query will be run as public
88
     * @param sourceUrl
89
     * @param destUrl
90
     * @param query
91
     * @param username
92
     * @param password
93
     */
94
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
95
    {
96
        this.sourceUrl = sourceUrl;
97
        this.query = query;
98
        this.username = username;
99
        this.password = password;
100
        this.destinationUrl = destUrl;
101
        // TODO: set up certificate for D1 interaction
102
        CertificateManager.getInstance();
103
        this.session = new Session();
104
        Subject subject = new Subject();
105
        subject.setValue(username);
106
    }
107
    
108
    /**
109
     * populate from the source
110
     */
111
    public void populate()
112
      throws Exception
113
    {
114
        printHeader("Source login");
115
        String sourceSessionid = loginSource();
116
        
117
        //do a query
118
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
119
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
120
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
121
        params += "action=query&";
122
        params += "qformat=xml&";
123
        params += "anyfield=" + query;
124
        
125
        printHeader("Searching source");
126
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
127
        InputStream is = getResponse(sourceUrl, "/metacat",
128
                params, "POST");
129
        String response = streamToString(is);
130
        //System.out.println("response: " + response);
131
        Vector<Document> docs = parseResponse(response);
132
        
133
        
134
        printHeader("Parsing source results");
135
        System.out.println("creating MN with url: " + destinationUrl + "/");
136
        MNode mn = D1Client.getMN(destinationUrl + "/");
137
        
138
        printHeader("Processing " + docs.size() + " results.");
139
        printHeader("logging in to the destination " + destinationUrl);
140
        
141
        System.out.println("session: " + session.getSubject());
142
        for(int i=0; i<docs.size(); i++)
143
        {
144
            //for each document in the query
145
            Document doc = docs.get(i);
146
            String docid = doc.docid;
147
            //get the doc from source
148
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
149
            params = "action=read&qformat=xml&docid=" + docid;
150
            is = getResponse(sourceUrl, "/metacat", params, "POST");
151
            String doctext = streamToString(is);
152
            System.out.println("doctext: " + doctext);
153
            is = stringToStream(doctext);
154
            //parse the document
155
            DatabaseConnectionPoolInterface connectionPool = MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
156
        	DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
157
        	DataPackage dataPackage = dataManager.parseMetadata(is);
158
        	
159
            if(dataPackage == null)
160
            {
161
                continue;
162
            }
163
            //go through the DistributionMetadata and download any described data
164
            
165
            is = stringToStream(doctext);
166
            doc.doctext = doctext;
167

    
168
            printHeader("creating document on destination " + destinationUrl);            
169
            SystemMetadata sysmeta = generateSystemMetadata(doc);
170
            if (dataPackage.getEntityList() != null) {
171
	            for(int j=0; j < dataPackage.getEntityList().length; j++)
172
	            {
173
	                String dataDocUrl = dataPackage.getEntityList()[j].getURL();
174
	                String dataDocMimeType = 
175
	                	dataPackage.getEntityList()[j].getDataFormat();
176
	                if (dataDocMimeType == null) {
177
		                dataDocMimeType = 
178
		                	ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFmtid().getValue();
179
	                }
180
	                String dataDocLocalId = "";
181
	                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
182
	                { //we only handle ecogrid urls right now
183
	                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
184
	                            "ecogrid://knb/".length(), dataDocUrl.length());
185
	                    //get the file
186
	                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
187
	                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
188
	                    String dataDocText = streamToString(dataDocIs);
189
	                    
190
	                    //set the id
191
	                    Identifier did = new Identifier();
192
	                    did.setValue(dataDocLocalId);
193
	                    
194
	                    //add the desribeby to the eml's sysmeta
195
	                    // TODO Use ORE
196
//	                    System.out.println("adding describe for doc " + 
197
//	                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
198
//	                    sysmeta.addDescribe(did);
199
	                    
200
	                    //create sysmeta for the data doc                    
201
	                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
202
	                    //overwrite the bogus values from the last call 
203
	                    dataDocSysMeta.setIdentifier(did);
204
	                    ObjectFormat format = null;
205
	                    try {
206
	                    	format = ObjectFormatCache.getInstance().getFormat(dataDocMimeType);
207
	                    } catch (NotFound e) {
208
							System.out.println(e.getMessage());
209
						}
210
						dataDocSysMeta.setFmtid(format.getFmtid());
211
	                    Checksum checksum = new Checksum();
212
	                    dataDocIs = stringToStream(dataDocText);
213
	                    ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
214
	                    checksum.setAlgorithm(ca);
215
	                    checksum.setValue(checksum(dataDocIs));
216
	                    dataDocSysMeta.setChecksum(checksum);
217
	                    String sizeStr = 
218
	                    	Long.toString(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
219
	                    dataDocSysMeta.setSize(new BigInteger(sizeStr));
220
	                    // TODO use ORE map
221
	                    //dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
222
	                    boolean error = false;
223
	                    
224
	                    // create access policy
225
	                    //"public", "read", "allow", "allowFirst"
226
	                    AccessPolicy accessPolicy = new AccessPolicy();
227
	                    AccessRule accessRule = new AccessRule();
228
						accessRule.addPermission(Permission.READ);
229
	                    Subject subject = new Subject();
230
	                    subject.setValue("public");
231
						accessRule.addSubject(subject );
232
						accessPolicy.addAllow(accessRule );
233
	                    //create the data doc on d1
234
	                    try
235
	                    {
236
	                        mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
237
							mn.setAccessPolicy(session, dataDocSysMeta.getIdentifier(), accessPolicy);
238
	                    }
239
	                    catch(Exception e)
240
	                    {
241
	                        error = true;
242
	                        System.out.println("ERROR: Could not create data document with id " + 
243
	                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
244
	                    }
245
	                    finally
246
	                    {
247
	                        if(error)
248
	                        {
249
	                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
250
	                                    "FAILED.");
251
	                        }
252
	                        else
253
	                        {
254
	                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
255
	                                " which is described by " + sysmeta.getIdentifier().getValue());
256
	                        }
257
	                    }
258
	                }
259
	                else
260
	                {
261
	                    System.out.println("WARNING: Could not process describes url " +
262
	                            dataDocUrl + " for document " + doc.docid + 
263
	                    ".  Only ecogrid://knb/ urls are currently supported.");
264
	                }
265
	            }
266
            }
267
            
268
            try
269
            {
270
              Identifier id = mn.create(session, sysmeta.getIdentifier(), 
271
                    IOUtils.toInputStream(doc.doctext), sysmeta);
272
              System.out.println("Success inserting document " + id.getValue());
273
              
274
            }
275
            catch(Exception e)
276
            {
277
                e.printStackTrace();
278
                System.out.println("Could not create document with id " + 
279
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
280
                
281
            }
282
            finally
283
            {
284
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
285
            }
286
        }
287
        
288
        logout();
289
    }
290
    
291

    
292
    
293
    /**
294
     * @param doc
295
     * @return
296
     */
297
    private SystemMetadata generateSystemMetadata(Document doc)
298
      throws Exception
299
    {
300
        SystemMetadata sm = new SystemMetadata();
301
        //set the id
302
        Identifier id = new Identifier();
303
        id.setValue(doc.docid.trim());
304
        sm.setIdentifier(id);
305
        
306
        //set the object format
307
        ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype);
308
        if(format == null)
309
        {
310
            if(doc.doctype.trim().equals("BIN"))
311
            {
312
                format = ObjectFormatCache.getInstance().getFormat("application/octet-stream");
313
            }
314
            else
315
            {
316
                format = ObjectFormatCache.getInstance().getFormat("text/plain");
317
            }
318
        }
319
        sm.setFmtid(format.getFmtid());
320
        
321
        //create the checksum
322
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
323
        String checksumS = checksum(bais);
324
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
325
        Checksum checksum = new Checksum();
326
        checksum.setValue(checksumS);
327
        checksum.setAlgorithm(ca);
328
        sm.setChecksum(checksum);
329
        
330
        //set the size
331
        String sizeStr = Long.toString(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
332
        sm.setSize(new BigInteger(sizeStr));
333
        
334
        //submitter
335
        Subject p = new Subject();
336
        p.setValue("unknown");
337
        sm.setSubmitter(p);
338
        sm.setRightsHolder(p);
339
        try
340
        {
341
            Date dateCreated = parseMetacatDate(doc.createDate);
342
            sm.setDateUploaded(dateCreated);
343
            Date dateUpdated = parseMetacatDate(doc.updateDate);
344
            sm.setDateSysMetadataModified(dateUpdated);
345
        }
346
        catch(Exception e)
347
        {
348
            System.out.println("couldn't parse a date: " + e.getMessage());
349
            Date dateCreated = new Date();
350
            sm.setDateUploaded(dateCreated);
351
            Date dateUpdated = new Date();
352
            sm.setDateSysMetadataModified(dateUpdated);
353
        }
354
        NodeReference nr = new NodeReference();
355
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
356
        sm.setOriginMemberNode(nr);
357
        sm.setAuthoritativeMemberNode(nr);
358
        
359
        return sm;
360
    }
361
    
362
    private void printHeader(String s)
363
    {
364
        System.out.println("****** " + s + " *******");
365
    }
366
    
367
    /**
368
     * produce an md5 checksum for item
369
     */
370
    private String checksum(InputStream is)
371
      throws Exception
372
    {        
373
        byte[] buffer = new byte[1024];
374
        MessageDigest complete = MessageDigest.getInstance("MD5");
375
        int numRead;
376
        
377
        do 
378
        {
379
          numRead = is.read(buffer);
380
          if (numRead > 0) 
381
          {
382
            complete.update(buffer, 0, numRead);
383
          }
384
        } while (numRead != -1);
385
        
386
        
387
        return getHex(complete.digest());
388
    }
389
    
390
    /**
391
     * convert a byte array to a hex string
392
     */
393
    private static String getHex( byte [] raw ) 
394
    {
395
        final String HEXES = "0123456789ABCDEF";
396
        if ( raw == null ) {
397
          return null;
398
        }
399
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
400
        for ( final byte b : raw ) {
401
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
402
             .append(HEXES.charAt((b & 0x0F)));
403
        }
404
        return hex.toString();
405
    }
406
    
407
    /**
408
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
409
     * a proper date object
410
     * @param date
411
     * @return
412
     */
413
    private Date parseMetacatDate(String date)
414
    {
415
        String year = date.substring(0, 4);
416
        String month = date.substring(5, 7);
417
        String day = date.substring(8, 10);
418
        Calendar c = Calendar.getInstance();
419
        c.set(new Integer(year).intValue(), 
420
              new Integer(month).intValue(), 
421
              new Integer(day).intValue());
422
        return c.getTime();
423
    }
424

    
425
    /**
426
     * send a request to the resource
427
     */
428
    private InputStream sendRequest(String contextRootUrl, String resource, 
429
            String sessionid, String method, String urlParamaters, 
430
            String contentType, InputStream dataStream) 
431
        throws Exception 
432
    {
433
        
434
        HttpURLConnection connection = null ;
435
        String restURL = contextRootUrl + resource;
436

    
437
        if (urlParamaters != null) {
438
            if (restURL.indexOf("?") == -1)             
439
                restURL += "?";
440
            restURL += urlParamaters; 
441
            if(restURL.indexOf(" ") != -1)
442
            {
443
                restURL = restURL.replaceAll("\\s", "%20");
444
            }
445
        }
446
        
447
        if(sessionid != null)
448
        {
449
            if(restURL.indexOf("?") == -1)
450
            {
451
                restURL += "?sessionid=" + sessionid;
452
            }
453
            else
454
            {
455
                restURL += "&sessionid=" + sessionid;
456
            }
457
        }
458

    
459
        URL u = null;
460
        InputStream content = null;
461
        System.out.println("url: " + restURL);
462
        System.out.println("method: " + method);
463
        u = new URL(restURL);
464
        connection = (HttpURLConnection) u.openConnection();
465
        if (contentType!=null) {
466
            connection.setRequestProperty("Content-Type",contentType);
467
        }
468

    
469
        connection.setDoOutput(true);
470
        connection.setDoInput(true);
471
        connection.setRequestMethod(method);
472

    
473
        if (!method.equals("GET")) {
474
            if (dataStream != null) {
475
                OutputStream out = connection.getOutputStream();
476
                IOUtils.copy(dataStream, out);
477
            }
478
        }
479

    
480
        return connection.getInputStream();   
481
    }
482
    
483
    /**
484
     * create a mime multipart message from object and sysmeta
485
     */
486
    private MimeMultipart createMimeMultipart(InputStream object)
487
      throws Exception
488
    {
489
        final MimeMultipart mmp = new MimeMultipart();
490
        MimeBodyPart objectPart = new MimeBodyPart();
491
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
492
        objectPart.setFileName("doctext");
493
        DataSource ds = new InputStreamDataSource("doctext", object);
494
        DataHandler dh = new DataHandler(ds);
495
        objectPart.setDataHandler(dh);
496
        mmp.addBodyPart(objectPart);
497
        return mmp;
498
    }
499
    
500
    /**
501
     * parse a metacat query response and return a vector of docids
502
     * @param response
503
     * @return
504
     */
505
    private Vector<Document> parseResponse(String response)
506
    {
507
        Vector<Document> v = new Vector<Document>();
508
        int dstart = response.indexOf("<document>");
509
        int dend = response.indexOf("</document>", dstart);
510
        while(dstart != -1)
511
        {
512
            String doc = response.substring(dstart + "<document>".length(), dend);
513
            //System.out.println("adding " + docid);
514
            Document d = new Document(getFieldFromDoc(doc, "docid"),
515
                    getFieldFromDoc(doc, "doctype"),
516
                    getFieldFromDoc(doc, "createdate"),
517
                    getFieldFromDoc(doc, "updatedate"));
518
            v.add(d);
519
            dstart = response.indexOf("<document>", dend);
520
            dend = response.indexOf("</document>", dstart);
521
        }
522
        
523
        return v;
524
    }
525
    
526
    private String getFieldFromDoc(String doc, String fieldname)
527
    {
528
        String field = "<" + fieldname + ">";
529
        String fieldend = "</" + fieldname + ">";
530
        int start = doc.indexOf(field);
531
        int end = doc.indexOf(fieldend);
532
        String s = doc.substring(start + field.length(), end);
533
        //System.out.println("field: " + fieldname + " : " + s);
534
        return s;
535
    }
536
    
537
    /**
538
     * login the source
539
     * @return
540
     * @throws Exception
541
     */
542
    private String loginSource()
543
      throws Exception
544
    {
545
        return login(sourceUrl);
546
    }
547
    
548
    
549
    /**
550
     * returns a sessionid
551
     * @return
552
     */
553
    private String login(String sourceUrl)
554
      throws Exception
555
    {
556
        InputStream is = getResponse(sourceUrl, "/metacat", 
557
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", "POST");
558
        String response = streamToString(is);
559
        //System.out.println("response: " + response);
560
        if(response.indexOf("sessionId") == -1)
561
        {
562
            throw new Exception("Error logging into " + sourceUrl);
563
        }
564
        
565
        String sessionid = response.substring(
566
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
567
                response.indexOf("</sessionId>"));
568
        System.out.println("sessionid: " + sessionid);
569
        return sessionid;
570
    }
571
    
572
    /**
573
     * logout both the source and destination
574
     * @throws Exception
575
     */
576
    private void logout()
577
        throws Exception
578
    {
579
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
580
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
581
    }
582
    
583
    /**
584
     * get an http response
585
     * @param contextRootUrl
586
     * @param resource
587
     * @param urlParameters
588
     * @param method
589
     * @return
590
     * @throws Exception
591
     */
592
    private InputStream getResponse(String contextRootUrl, String resource, 
593
            String urlParameters, String method)
594
      throws Exception
595
    {
596
        HttpURLConnection connection = null ;
597

    
598
        String restURL = contextRootUrl+resource;
599

    
600
        if (urlParameters != null) {
601
            if (restURL.indexOf("?") == -1)             
602
                restURL += "?";
603
            restURL += urlParameters; 
604
            if(restURL.indexOf(" ") != -1)
605
            {
606
                restURL = restURL.replaceAll("\\s", "%20");
607
            }
608
        }
609

    
610
        URL u = null;
611
        InputStream content = null;            
612
        System.out.println("url: " + restURL);
613
        System.out.println("method: " + method);
614
        u = new URL(restURL);
615
        connection = (HttpURLConnection) u.openConnection();
616
        connection.setDoOutput(true);
617
        connection.setDoInput(true);
618
        connection.setRequestMethod(method);
619
        content = connection.getInputStream();
620
        return content;
621
    }
622
    
623
    private String streamToString(InputStream is)
624
        throws Exception
625
    {
626
        byte b[] = new byte[1024];
627
        int numread = is.read(b, 0, 1024);
628
        String response = new String();
629
        while(numread != -1)
630
        {
631
            response += new String(b, 0, numread);
632
            numread = is.read(b, 0, 1024);
633
        }
634
        return response;
635
    }
636
    
637
    private InputStream stringToStream(String s)
638
      throws Exception
639
    {
640
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
641
        return bais;
642
    }
643
    
644
    private class Document
645
    {
646
        public String docid;
647
        public String doctype;
648
        public String createDate;
649
        public String updateDate;
650
        public String doctext;
651
        
652
        public Document(String docid, String doctype, String createDate, String updateDate)
653
        {
654
            this.docid = docid.trim();
655
            this.doctype = doctype.trim();
656
            this.createDate = createDate.trim();
657
            this.updateDate = updateDate.trim();
658
        }
659
    }
660
}
(8-8/16)