Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.io.ByteArrayInputStream;
29
import java.io.InputStream;
30
import java.io.OutputStream;
31
import java.math.BigInteger;
32
import java.net.HttpURLConnection;
33
import java.net.URL;
34
import java.util.Calendar;
35
import java.util.Date;
36
import java.util.Vector;
37

    
38
import javax.activation.DataHandler;
39
import javax.activation.DataSource;
40
import javax.mail.internet.MimeBodyPart;
41
import javax.mail.internet.MimeMultipart;
42

    
43
import org.apache.commons.io.IOUtils;
44
import org.dataone.client.D1Client;
45
import org.dataone.client.MNode;
46
import org.dataone.client.ObjectFormatCache;
47
import org.dataone.client.auth.CertificateManager;
48
import org.dataone.service.exceptions.NotFound;
49
import org.dataone.service.types.v1.AccessPolicy;
50
import org.dataone.service.types.v1.AccessRule;
51
import org.dataone.service.types.v1.Checksum;
52
import org.dataone.service.types.v1.Identifier;
53
import org.dataone.service.types.v1.NodeReference;
54
import org.dataone.service.types.v1.ObjectFormat;
55
import org.dataone.service.types.v1.Permission;
56
import org.dataone.service.types.v1.Session;
57
import org.dataone.service.types.v1.Subject;
58
import org.dataone.service.types.v1.SystemMetadata;
59
import org.dataone.service.types.v1.util.ChecksumUtil;
60
import org.dataone.service.util.Constants;
61
import org.ecoinformatics.datamanager.DataManager;
62
import org.ecoinformatics.datamanager.database.DatabaseConnectionPoolInterface;
63
import org.ecoinformatics.datamanager.parser.DataPackage;
64

    
65
import edu.ucsb.nceas.metacat.MetaCatServlet;
66
import edu.ucsb.nceas.metacat.dataquery.MetacatDatabaseConnectionPoolFactory;
67
import edu.ucsb.nceas.metacat.properties.PropertyService;
68
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
69

    
70
/**
71
 * @author berkley
72
 * A class to populate a metacat instance based on documents returned from a query
73
 */
74
public class MetacatPopulator
75
{
76
    private String sourceUrl = null;
77
    private String destinationUrl = null;
78
    private String query = null;
79
    private String username = null;
80
    private String password = null;
81
    private Session session = null;
82
    
83
    /**
84
     * create a new MetacatPopulator with given source and destination urls.  
85
     * These should be
86
     * of the form "http://<url>/<metacat_instance>"
87
     * If username and/or password is null, the query will be run as public
88
     * @param sourceUrl
89
     * @param destUrl
90
     * @param query
91
     * @param username
92
     * @param password
93
     */
94
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
95
    {
96
        this.sourceUrl = sourceUrl;
97
        this.query = query;
98
        this.username = username;
99
        this.password = password;
100
        this.destinationUrl = destUrl;
101
        // TODO: use specific certificate?
102
        this.session = null; //new Session();
103
    }
104
    
105
    /**
106
     * populate from the source
107
     */
108
    public void populate()
109
      throws Exception
110
    {
111
        //String sourceSessionid = loginSource();
112
        
113
        //do a query
114
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.1.0&" +
115
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
116
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&";
117
        params += "action=query&";
118
        params += "qformat=xml&";
119
        params += "anyfield=" + query;
120
        
121
        printHeader("Searching source");
122
        System.out.println("searching '" + sourceUrl + "' for '" + query + "'");
123
        InputStream is = getResponse(sourceUrl, "/metacat", params, "POST");
124
        String response = streamToString(is);
125
        //System.out.println("response: " + response);
126
        Vector<Document> docs = parseResponse(response);
127
        
128
        printHeader("Parsing source results");
129
        System.out.println("creating MN with url: " + destinationUrl + "/");
130
        MNode mn = D1Client.getMN(destinationUrl + "/");
131
        
132
        printHeader("Processing " + docs.size() + " results.");
133
        for(int i=0; i<docs.size(); i++)
134
        {
135
            //for each document in the query
136
            Document doc = docs.get(i);
137
            String docid = doc.docid;
138
            //get the doc from source
139
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
140
            params = "action=read&qformat=xml&docid=" + docid;
141
            is = getResponse(sourceUrl, "/metacat", params, "POST");
142
            String doctext = streamToString(is);
143
            System.out.println("doctext: " + doctext);
144
            is = stringToStream(doctext);
145
            //parse the document
146
            DatabaseConnectionPoolInterface connectionPool = MetacatDatabaseConnectionPoolFactory.getDatabaseConnectionPoolInterface();
147
        	DataManager dataManager = DataManager.getInstance(connectionPool, connectionPool.getDBAdapterName());
148
        	DataPackage dataPackage = dataManager.parseMetadata(is);
149
        	
150
            if (dataPackage == null)
151
            {
152
                continue;
153
            }
154
            //go through the DistributionMetadata and download any described data
155
            
156
            is = stringToStream(doctext);
157
            doc.doctext = doctext;
158

    
159
            printHeader("creating document on destination " + destinationUrl);            
160
            SystemMetadata sysmeta = generateSystemMetadata(doc);
161
            if (dataPackage.getEntityList() != null) {
162
	            for(int j=0; j < dataPackage.getEntityList().length; j++)
163
	            {
164
	                String dataDocUrl = dataPackage.getEntityList()[j].getURL();
165
	                String dataDocMimeType = 
166
	                	dataPackage.getEntityList()[j].getDataFormat();
167
	                if (dataDocMimeType == null) {
168
		                dataDocMimeType = 
169
		                	ObjectFormatCache.getInstance().getFormat("application/octet-stream").getFmtid().getValue();
170
	                }
171
	                String dataDocLocalId = "";
172
	                if(dataDocUrl.trim().startsWith("ecogrid://knb/"))
173
	                { //we only handle ecogrid urls right now
174
	                    dataDocLocalId = dataDocUrl.substring(dataDocUrl.indexOf("ecogrid://knb/") + 
175
	                            "ecogrid://knb/".length(), dataDocUrl.length());
176
	                    //get the file
177
	                    params = "action=read&qformat=xml&docid=" + dataDocLocalId;
178
	                    InputStream dataDocIs = getResponse(sourceUrl, "/metacat", params, "POST");
179
	                    String dataDocText = streamToString(dataDocIs);
180
	                    
181
	                    //set the id
182
	                    Identifier did = new Identifier();
183
	                    did.setValue(dataDocLocalId);
184
	                    
185
	                    //add the desribeby to the eml's sysmeta
186
	                    // TODO Use ORE
187
//	                    System.out.println("adding describe for doc " + 
188
//	                            sysmeta.getIdentifier().getValue() + " :" + did.getValue());
189
//	                    sysmeta.addDescribe(did);
190
	                    
191
	                    //create sysmeta for the data doc                    
192
	                    SystemMetadata dataDocSysMeta = generateSystemMetadata(doc);
193
	                    //overwrite the bogus values from the last call 
194
	                    dataDocSysMeta.setIdentifier(did);
195
	                    ObjectFormat format = null;
196
	                    try {
197
	                    	format = ObjectFormatCache.getInstance().getFormat(dataDocMimeType);
198
	                    } catch (NotFound e) {
199
							System.out.println(e.getMessage());
200
						}
201
						dataDocSysMeta.setFmtid(format.getFmtid());
202
	                    dataDocIs = stringToStream(dataDocText);
203
	                    Checksum checksum = ChecksumUtil.checksum(dataDocIs, "MD5");
204
	                    dataDocSysMeta.setChecksum(checksum);
205
	                    String sizeStr = 
206
	                    	Long.toString(dataDocText.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
207
	                    dataDocSysMeta.setSize(new BigInteger(sizeStr));
208
	                    // TODO use ORE map
209
	                    //dataDocSysMeta.addDescribedBy(sysmeta.getIdentifier());
210
	                    boolean error = false;
211
	                    
212
	                    //create the data doc on d1
213
	                    try
214
	                    {
215
	                        mn.create(session, dataDocSysMeta.getIdentifier(), IOUtils.toInputStream(dataDocText), dataDocSysMeta);
216
	                    }
217
	                    catch(Exception e)
218
	                    {
219
	                        error = true;
220
	                        System.out.println("ERROR: Could not create data document with id " + 
221
	                                dataDocSysMeta.getIdentifier().getValue() + " : " + e.getMessage());
222
	                    }
223
	                    finally
224
	                    {
225
	                        if (error)
226
	                        {
227
	                            printHeader("Insertion of document " + dataDocSysMeta.getIdentifier().getValue() + 
228
	                                    "FAILED.");
229
	                        }
230
	                        else
231
	                        {
232
	                            printHeader("Done inserting document " + dataDocSysMeta.getIdentifier().getValue() +
233
	                                " which is described by " + sysmeta.getIdentifier().getValue());
234
	                        }
235
	                    }
236
	                }
237
	                else
238
	                {
239
	                    System.out.println("WARNING: Could not process describes url " +
240
	                            dataDocUrl + " for document " + doc.docid + 
241
	                    ".  Only ecogrid://knb/ urls are currently supported.");
242
	                }
243
	            }
244
            }
245
            
246
            try
247
            {
248
              Identifier id = mn.create(session, sysmeta.getIdentifier(), 
249
                    IOUtils.toInputStream(doc.doctext), sysmeta);
250
              System.out.println("Success inserting document " + id.getValue());
251
              
252
            }
253
            catch(Exception e)
254
            {
255
                e.printStackTrace();
256
                System.out.println("Could not create document with id " + 
257
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
258
                
259
            }
260
            finally
261
            {
262
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
263
            }
264
        }
265
        
266
        //logout();
267
    }
268
    
269

    
270
    
271
    /**
272
     * @param doc
273
     * @return
274
     */
275
    private SystemMetadata generateSystemMetadata(Document doc)
276
      throws Exception
277
    {
278
        SystemMetadata sm = new SystemMetadata();
279
        //set the id
280
        Identifier id = new Identifier();
281
        id.setValue(doc.docid.trim());
282
        sm.setIdentifier(id);
283
        
284
        //set the object format
285
        ObjectFormat format = ObjectFormatCache.getInstance().getFormat(doc.doctype);
286
        if(format == null)
287
        {
288
            if(doc.doctype.trim().equals("BIN"))
289
            {
290
                format = ObjectFormatCache.getInstance().getFormat("application/octet-stream");
291
            }
292
            else
293
            {
294
                format = ObjectFormatCache.getInstance().getFormat("text/plain");
295
            }
296
        }
297
        sm.setFmtid(format.getFmtid());
298
        
299
        //create the checksum
300
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING));
301
        Checksum checksum = ChecksumUtil.checksum(bais, "MD5");
302
        sm.setChecksum(checksum);
303
        
304
        //set the size
305
        String sizeStr = Long.toString(doc.doctext.getBytes(MetaCatServlet.DEFAULT_ENCODING).length);
306
        sm.setSize(new BigInteger(sizeStr));
307
        
308
        //submitter
309
        Subject p = new Subject();
310
        p.setValue("unknown");
311
        sm.setSubmitter(p);
312
        sm.setRightsHolder(p);
313
        try
314
        {
315
            Date dateCreated = parseMetacatDate(doc.createDate);
316
            sm.setDateUploaded(dateCreated);
317
            Date dateUpdated = parseMetacatDate(doc.updateDate);
318
            sm.setDateSysMetadataModified(dateUpdated);
319
        }
320
        catch(Exception e)
321
        {
322
            System.out.println("couldn't parse a date: " + e.getMessage());
323
            Date dateCreated = new Date();
324
            sm.setDateUploaded(dateCreated);
325
            Date dateUpdated = new Date();
326
            sm.setDateSysMetadataModified(dateUpdated);
327
        }
328
        NodeReference nr = new NodeReference();
329
        nr.setValue(PropertyService.getProperty("dataone.memberNodeId"));
330
        sm.setOriginMemberNode(nr);
331
        sm.setAuthoritativeMemberNode(nr);
332
        
333
        // create access policy
334
        AccessPolicy accessPolicy = new AccessPolicy();
335
        AccessRule accessRule = new AccessRule();
336
		accessRule.addPermission(Permission.READ);
337
        Subject subject = new Subject();
338
        subject.setValue(Constants.SUBJECT_PUBLIC);
339
		accessRule.addSubject(subject);
340
		accessPolicy.addAllow(accessRule);
341
		sm.setAccessPolicy(accessPolicy);
342
        
343
        return sm;
344
    }
345
    
346
    private void printHeader(String s)
347
    {
348
        System.out.println("****** " + s + " *******");
349
    }
350
    
351
    
352
    
353
    /**
354
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
355
     * a proper date object
356
     * @param date
357
     * @return
358
     */
359
    private Date parseMetacatDate(String date)
360
    {
361
        String year = date.substring(0, 4);
362
        String month = date.substring(5, 7);
363
        String day = date.substring(8, 10);
364
        Calendar c = Calendar.getInstance();
365
        c.set(new Integer(year).intValue(), 
366
              new Integer(month).intValue(), 
367
              new Integer(day).intValue());
368
        return c.getTime();
369
    }
370

    
371
    /**
372
     * send a request to the resource
373
     */
374
    private InputStream sendRequest(String contextRootUrl, String resource, 
375
            String sessionid, String method, String urlParamaters, 
376
            String contentType, InputStream dataStream) 
377
        throws Exception 
378
    {
379
        
380
        HttpURLConnection connection = null ;
381
        String restURL = contextRootUrl + resource;
382

    
383
        if (urlParamaters != null) {
384
            if (restURL.indexOf("?") == -1)             
385
                restURL += "?";
386
            restURL += urlParamaters; 
387
            if(restURL.indexOf(" ") != -1)
388
            {
389
                restURL = restURL.replaceAll("\\s", "%20");
390
            }
391
        }
392
        
393
        if(sessionid != null)
394
        {
395
            if(restURL.indexOf("?") == -1)
396
            {
397
                restURL += "?sessionid=" + sessionid;
398
            }
399
            else
400
            {
401
                restURL += "&sessionid=" + sessionid;
402
            }
403
        }
404

    
405
        URL u = null;
406
        InputStream content = null;
407
        System.out.println("url: " + restURL);
408
        System.out.println("method: " + method);
409
        u = new URL(restURL);
410
        connection = (HttpURLConnection) u.openConnection();
411
        if (contentType!=null) {
412
            connection.setRequestProperty("Content-Type",contentType);
413
        }
414

    
415
        connection.setDoOutput(true);
416
        connection.setDoInput(true);
417
        connection.setRequestMethod(method);
418

    
419
        if (!method.equals("GET")) {
420
            if (dataStream != null) {
421
                OutputStream out = connection.getOutputStream();
422
                IOUtils.copy(dataStream, out);
423
            }
424
        }
425

    
426
        return connection.getInputStream();   
427
    }
428
    
429
    /**
430
     * create a mime multipart message from object and sysmeta
431
     */
432
    private MimeMultipart createMimeMultipart(InputStream object)
433
      throws Exception
434
    {
435
        final MimeMultipart mmp = new MimeMultipart();
436
        MimeBodyPart objectPart = new MimeBodyPart();
437
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
438
        objectPart.setFileName("doctext");
439
        DataSource ds = new InputStreamDataSource("doctext", object);
440
        DataHandler dh = new DataHandler(ds);
441
        objectPart.setDataHandler(dh);
442
        mmp.addBodyPart(objectPart);
443
        return mmp;
444
    }
445
    
446
    /**
447
     * parse a metacat query response and return a vector of docids
448
     * @param response
449
     * @return
450
     */
451
    private Vector<Document> parseResponse(String response)
452
    {
453
        Vector<Document> v = new Vector<Document>();
454
        int dstart = response.indexOf("<document>");
455
        int dend = response.indexOf("</document>", dstart);
456
        while(dstart != -1)
457
        {
458
            String doc = response.substring(dstart + "<document>".length(), dend);
459
            //System.out.println("adding " + docid);
460
            Document d = new Document(getFieldFromDoc(doc, "docid"),
461
                    getFieldFromDoc(doc, "doctype"),
462
                    getFieldFromDoc(doc, "createdate"),
463
                    getFieldFromDoc(doc, "updatedate"));
464
            v.add(d);
465
            dstart = response.indexOf("<document>", dend);
466
            dend = response.indexOf("</document>", dstart);
467
        }
468
        
469
        return v;
470
    }
471
    
472
    private String getFieldFromDoc(String doc, String fieldname)
473
    {
474
        String field = "<" + fieldname + ">";
475
        String fieldend = "</" + fieldname + ">";
476
        int start = doc.indexOf(field);
477
        int end = doc.indexOf(fieldend);
478
        String s = doc.substring(start + field.length(), end);
479
        //System.out.println("field: " + fieldname + " : " + s);
480
        return s;
481
    }
482
    
483
    /**
484
     * login the source
485
     * @return
486
     * @throws Exception
487
     */
488
    private String loginSource()
489
      throws Exception
490
    {
491
        return login(sourceUrl);
492
    }
493
    
494
    
495
    /**
496
     * returns a sessionid
497
     * @return
498
     */
499
    private String login(String sourceUrl)
500
      throws Exception
501
    {
502
        InputStream is = getResponse(sourceUrl, "/metacat", 
503
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", "POST");
504
        String response = streamToString(is);
505
        //System.out.println("response: " + response);
506
        if(response.indexOf("sessionId") == -1)
507
        {
508
            throw new Exception("Error logging into " + sourceUrl);
509
        }
510
        
511
        String sessionid = response.substring(
512
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
513
                response.indexOf("</sessionId>"));
514
        System.out.println("sessionid: " + sessionid);
515
        return sessionid;
516
    }
517
    
518
    /**
519
     * logout both the source and destination
520
     * @throws Exception
521
     */
522
    private void logout()
523
        throws Exception
524
    {
525
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
526
    }
527
    
528
    /**
529
     * get an http response
530
     * @param contextRootUrl
531
     * @param resource
532
     * @param urlParameters
533
     * @param method
534
     * @return
535
     * @throws Exception
536
     */
537
    private InputStream getResponse(String contextRootUrl, String resource, 
538
            String urlParameters, String method)
539
      throws Exception
540
    {
541
        HttpURLConnection connection = null ;
542

    
543
        String restURL = contextRootUrl+resource;
544

    
545
        if (urlParameters != null) {
546
            if (restURL.indexOf("?") == -1)             
547
                restURL += "?";
548
            restURL += urlParameters; 
549
            if(restURL.indexOf(" ") != -1)
550
            {
551
                restURL = restURL.replaceAll("\\s", "%20");
552
            }
553
        }
554

    
555
        URL u = null;
556
        InputStream content = null;            
557
        System.out.println("url: " + restURL);
558
        System.out.println("method: " + method);
559
        u = new URL(restURL);
560
        connection = (HttpURLConnection) u.openConnection();
561
        connection.setDoOutput(true);
562
        connection.setDoInput(true);
563
        connection.setRequestMethod(method);
564
        content = connection.getInputStream();
565
        return content;
566
    }
567
    
568
    private String streamToString(InputStream is)
569
        throws Exception
570
    {
571
        byte b[] = new byte[1024];
572
        int numread = is.read(b, 0, 1024);
573
        String response = new String();
574
        while(numread != -1)
575
        {
576
            response += new String(b, 0, numread);
577
            numread = is.read(b, 0, 1024);
578
        }
579
        return response;
580
    }
581
    
582
    private InputStream stringToStream(String s)
583
      throws Exception
584
    {
585
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes(MetaCatServlet.DEFAULT_ENCODING));
586
        return bais;
587
    }
588
    
589
    private class Document
590
    {
591
        public String docid;
592
        public String doctype;
593
        public String createDate;
594
        public String updateDate;
595
        public String doctext;
596
        
597
        public Document(String docid, String doctype, String createDate, String updateDate)
598
        {
599
            this.docid = docid.trim();
600
            this.doctype = doctype.trim();
601
            this.createDate = createDate.trim();
602
            this.updateDate = updateDate.trim();
603
        }
604
    }
605
}
(8-8/16)