Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetacatHandler;
45
import edu.ucsb.nceas.metacat.MetacatResultSet;
46
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
47
import edu.ucsb.nceas.metacat.dataone.CrudService;
48
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
49

    
50
import org.dataone.service.exceptions.InvalidSystemMetadata;
51
import org.dataone.service.exceptions.ServiceFailure;
52
import org.dataone.service.types.AuthToken;
53
import org.dataone.service.types.Checksum;
54
import org.dataone.service.types.ChecksumAlgorithm;
55
import org.dataone.service.types.NodeReference;
56
import org.dataone.service.types.ObjectFormat;
57
import org.dataone.service.types.Principal;
58
import org.dataone.service.types.SystemMetadata;
59
import org.dataone.service.types.Identifier;
60
import org.dataone.client.D1Client;
61

    
62
//import sun.tools.jstat.Identifier;
63

    
64
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
65

    
66
/**
67
 * @author berkley
68
 * A class to populate a metacat instance based on documents returned from a query
69
 */
70
public class MetacatPopulator
71
{
72
    private String sourceUrl = null;
73
    private String destinationUrl = null;
74
    private String query = null;
75
    private String username = null;
76
    private String password = null;
77
    
78
    /**
79
     * create a new MetacatPopulator with given source and destination urls.  
80
     * These should be
81
     * of the form "http://<url>/<metacat_instance>"
82
     * If username and/or password is null, the query will be run as public
83
     * @param sourceUrl
84
     * @param destUrl
85
     * @param query
86
     * @param username
87
     * @param password
88
     */
89
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
90
    {
91
        this.sourceUrl = sourceUrl;
92
        this.query = query;
93
        this.username = username;
94
        this.password = password;
95
        this.destinationUrl = destUrl;
96
    }
97
    
98
    /**
99
     * populate from the source
100
     */
101
    public void populate()
102
      throws Exception
103
    {
104
        String sourceSessionid = loginSource();
105
        String destSessionid = loginDest();
106
        //do a query
107
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
108
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&" +
109
                        "returndoctype=BIN&" +
110
                        "returndoctype=http://dataone.org/service/types/SystemMetadata/0.1&";
111
        params += "action=query&";
112
        params += "qformat=xml&";
113
        params += "anyfield=" + query;
114
        
115
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
116
        InputStream is = getResponse(sourceUrl, "/metacat",
117
                params, "POST");
118
        String response = streamToString(is);
119
        //System.out.println("response: " + response);
120
        Vector<Document> docs = parseResponse(response);
121

    
122
        for(int i=0; i<docs.size(); i++)
123
        {
124
            //for each document in the query
125
            Document doc = docs.get(i);
126
            String docid = doc.docid;
127
            //get the doc from source
128
            System.out.println("retrieving doc " + docid);
129
            params = "action=read&qformat=xml&docid=" + docid;
130
            is = getResponse(sourceUrl, "/metacat", params, "POST");
131
            String doctext = streamToString(is);
132
            //System.out.println("Done retrieving document: " + doctext);
133
            is = stringToStream(doctext);
134
            doc.doctext = doctext;
135
            //params += "&doctext=" + doc;
136
            //is = getResponse(sourceUrl, "/metacat",
137
            //        params, "POST");
138
            //System.out.println("done with upload: " + streamToString(is));
139
            
140
            D1Client d1 = new D1Client(destinationUrl + "/");
141
            SystemMetadata sysmeta = generateSystemMetadata(doc);
142
            AuthToken authtoken = d1.login(username, password);
143
            Identifier fakeId = new Identifier();
144
            fakeId.setValue("XXX");
145
            Identifier id = d1.create(authtoken, /*sysmeta.getIdentifier()*/fakeId, 
146
                  IOUtils.toInputStream(doc.doctext), sysmeta);
147
            System.out.println("Document created with id " + id.getValue());
148
            //insert it into destination
149
            /*System.out.println("Creating multipart message");
150
            final MimeMultipart mmp = createMimeMultipart(is);
151
            final InputStreamFromOutputStream<String> multipartStream = 
152
                new InputStreamFromOutputStream<String>() {
153
                @Override
154
                public String produce(final OutputStream dataSink) throws Exception {
155
                    mmp.writeTo(dataSink);
156
                    IOUtils.closeQuietly(dataSink);
157
                    return "Completed";
158
                }
159
            };
160
            System.out.println("uploading document to " + destinationUrl + 
161
                    "with docid " + docid + " with sessionid " + destSessionid);
162
            InputStream uploadResponse = sendRequest(destinationUrl, "/object", 
163
                    destSessionid, "PUT", "action=inser&docid=" + docid, 
164
                    "multipart/form-data", multipartStream);
165
            String res = streamToString(uploadResponse);*/
166
            /*CrudService cs = new CrudService();
167
            cs.setContextUrl(destinationUrl);
168
            Identifier guid = new Identifier();
169
            guid.setValue(docid);
170
            cs.create(new AuthToken(destSessionid), guid, is, null);*/
171
            
172
            //System.out.println("uploading complete: " + res);
173
            //System.out.println("Done with document " + docid);
174
        }
175
        
176
        logout();
177
    }
178
    
179
    /**
180
     * produce an md5 checksum for item
181
     */
182
    private String checksum(InputStream is)
183
      throws Exception
184
    {        
185
        byte[] buffer = new byte[1024];
186
        MessageDigest complete = MessageDigest.getInstance("MD5");
187
        int numRead;
188
        
189
        do 
190
        {
191
          numRead = is.read(buffer);
192
          if (numRead > 0) 
193
          {
194
            complete.update(buffer, 0, numRead);
195
          }
196
        } while (numRead != -1);
197
        
198
        
199
        return getHex(complete.digest());
200
    }
201
    
202
    /**
203
     * convert a byte array to a hex string
204
     */
205
    private static String getHex( byte [] raw ) 
206
    {
207
        final String HEXES = "0123456789ABCDEF";
208
        if ( raw == null ) {
209
          return null;
210
        }
211
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
212
        for ( final byte b : raw ) {
213
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
214
             .append(HEXES.charAt((b & 0x0F)));
215
        }
216
        return hex.toString();
217
    }
218
    
219
    /**
220
     * @param doc
221
     * @return
222
     */
223
    private SystemMetadata generateSystemMetadata(Document doc)
224
      throws Exception
225
    {
226
        SystemMetadata sm = new SystemMetadata();
227
        //set the id
228
        Identifier id = new Identifier();
229
        id.setValue(doc.docid);
230
        sm.setIdentifier(id);
231
        
232
        //set the object format
233
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
234
        if(format == null)
235
        {
236
            if(doc.doctype.trim().equals("BIN"))
237
            {
238
                format = ObjectFormat.APPLICATIONOCTETSTREAM;
239
            }
240
            else
241
            {
242
                format = ObjectFormat.convert("text/plain");
243
            }
244
        }
245
        sm.setObjectFormat(format);
246
        
247
        //create the checksum
248
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes());
249
        String checksumS = checksum(bais);
250
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
251
        Checksum checksum = new Checksum();
252
        checksum.setValue(checksumS);
253
        checksum.setAlgorithm(ca);
254
        sm.setChecksum(checksum);
255
        
256
        //set the size
257
        sm.setSize(doc.doctext.getBytes().length);
258
        
259
        //submitter
260
        Principal p = new Principal();
261
        p.setValue("");
262
        sm.setSubmitter(p);
263
        sm.setRightsHolder(p);
264
        try
265
        {
266
            Date dateCreated = parseMetacatDate(doc.createDate);
267
            sm.setDateUploaded(dateCreated);
268
            Date dateUpdated = parseMetacatDate(doc.updateDate);
269
            sm.setDateSysMetadataModified(dateUpdated);
270
        }
271
        catch(Exception e)
272
        {
273
            System.out.println("couldn't parse a date: " + e.getMessage());
274
            Date dateCreated = new Date();
275
            sm.setDateUploaded(dateCreated);
276
            Date dateUpdated = new Date();
277
            sm.setDateSysMetadataModified(dateUpdated);
278
        }
279
        NodeReference nr = new NodeReference();
280
        nr.setValue(sourceUrl);
281
        sm.setOriginMemberNode(nr);
282
        sm.setAuthoritativeMemberNode(nr);
283
        return sm;
284
    }
285
    
286
    /**
287
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
288
     * a proper date object
289
     * @param date
290
     * @return
291
     */
292
    private Date parseMetacatDate(String date)
293
    {
294
        String year = date.substring(0, 4);
295
        String month = date.substring(5, 7);
296
        String day = date.substring(8, 10);
297
        Calendar c = Calendar.getInstance();
298
        c.set(new Integer(year).intValue(), 
299
              new Integer(month).intValue(), 
300
              new Integer(day).intValue());
301
        return c.getTime();
302
    }
303

    
304
    /**
305
     * send a request to the resource
306
     */
307
    private InputStream sendRequest(String contextRootUrl, String resource, 
308
            String sessionid, String method, String urlParamaters, 
309
            String contentType, InputStream dataStream) 
310
        throws Exception 
311
    {
312
        
313
        HttpURLConnection connection = null ;
314
        String restURL = contextRootUrl + resource;
315

    
316
        if (urlParamaters != null) {
317
            if (restURL.indexOf("?") == -1)             
318
                restURL += "?";
319
            restURL += urlParamaters; 
320
            if(restURL.indexOf(" ") != -1)
321
            {
322
                restURL = restURL.replaceAll("\\s", "%20");
323
            }
324
        }
325
        
326
        if(sessionid != null)
327
        {
328
            if(restURL.indexOf("?") == -1)
329
            {
330
                restURL += "?sessionid=" + sessionid;
331
            }
332
            else
333
            {
334
                restURL += "&sessionid=" + sessionid;
335
            }
336
        }
337

    
338
        URL u = null;
339
        InputStream content = null;
340
        System.out.println("url: " + restURL);
341
        System.out.println("method: " + method);
342
        u = new URL(restURL);
343
        connection = (HttpURLConnection) u.openConnection();
344
        if (contentType!=null) {
345
            connection.setRequestProperty("Content-Type",contentType);
346
        }
347

    
348
        connection.setDoOutput(true);
349
        connection.setDoInput(true);
350
        connection.setRequestMethod(method);
351

    
352
        if (!method.equals("GET")) {
353
            if (dataStream != null) {
354
                OutputStream out = connection.getOutputStream();
355
                IOUtils.copy(dataStream, out);
356
            }
357
        }
358

    
359
        return connection.getInputStream();   
360
    }
361
    
362
    /**
363
     * create a mime multipart message from object and sysmeta
364
     */
365
    private MimeMultipart createMimeMultipart(InputStream object)
366
      throws Exception
367
    {
368
        final MimeMultipart mmp = new MimeMultipart();
369
        MimeBodyPart objectPart = new MimeBodyPart();
370
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
371
        objectPart.setFileName("doctext");
372
        DataSource ds = new InputStreamDataSource("doctext", object);
373
        DataHandler dh = new DataHandler(ds);
374
        objectPart.setDataHandler(dh);
375
        mmp.addBodyPart(objectPart);
376
        return mmp;
377
    }
378
    
379
    /**
380
     * parse a metacat query response and return a vector of docids
381
     * @param response
382
     * @return
383
     */
384
    private Vector<Document> parseResponse(String response)
385
    {
386
        Vector<Document> v = new Vector<Document>();
387
        int dstart = response.indexOf("<document>");
388
        int dend = response.indexOf("</document>", dstart);
389
        while(dstart != -1)
390
        {
391
            String doc = response.substring(dstart + "<document>".length(), dend);
392
            //System.out.println("adding " + docid);
393
            Document d = new Document(getFieldFromDoc(doc, "docid"),
394
                    getFieldFromDoc(doc, "doctype"),
395
                    getFieldFromDoc(doc, "createdate"),
396
                    getFieldFromDoc(doc, "updatedate"));
397
            v.add(d);
398
            dstart = response.indexOf("<document>", dend);
399
            dend = response.indexOf("</document>", dstart);
400
        }
401
        
402
        return v;
403
    }
404
    
405
    private String getFieldFromDoc(String doc, String fieldname)
406
    {
407
        String field = "<" + fieldname + ">";
408
        String fieldend = "</" + fieldname + ">";
409
        int start = doc.indexOf(field);
410
        int end = doc.indexOf(fieldend);
411
        String s = doc.substring(start + field.length(), end);
412
        System.out.println("field: " + fieldname + " : " + s);
413
        return s;
414
    }
415
    
416
    /**
417
     * login the source
418
     * @return
419
     * @throws Exception
420
     */
421
    private String loginSource()
422
      throws Exception
423
    {
424
        return login(sourceUrl);
425
    }
426
    
427
    /**
428
     * login the destination
429
     * @return
430
     * @throws Exception
431
     */
432
    private String loginDest()
433
        throws Exception
434
    {
435
        return login(destinationUrl);
436
    }
437
    
438
    /**
439
     * returns a sessionid
440
     * @return
441
     */
442
    private String login(String sourceUrl)
443
      throws Exception
444
    {
445
        InputStream is = getResponse(sourceUrl, "/metacat", 
446
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
447
        "POST");
448
        String response = streamToString(is);
449
        //System.out.println("response: " + response);
450
        String sessionid = response.substring(
451
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
452
                response.indexOf("</sessionId>"));
453
        System.out.println("sessionid: " + sessionid);
454
        return sessionid;
455
    }
456
    
457
    /**
458
     * logout both the source and destination
459
     * @throws Exception
460
     */
461
    private void logout()
462
        throws Exception
463
    {
464
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
465
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
466
    }
467
    
468
    /**
469
     * get an http response
470
     * @param contextRootUrl
471
     * @param resource
472
     * @param urlParameters
473
     * @param method
474
     * @return
475
     * @throws Exception
476
     */
477
    private InputStream getResponse(String contextRootUrl, String resource, 
478
            String urlParameters, String method)
479
      throws Exception
480
    {
481
        HttpURLConnection connection = null ;
482

    
483
        String restURL = contextRootUrl+resource;
484

    
485
        if (urlParameters != null) {
486
            if (restURL.indexOf("?") == -1)             
487
                restURL += "?";
488
            restURL += urlParameters; 
489
            if(restURL.indexOf(" ") != -1)
490
            {
491
                restURL = restURL.replaceAll("\\s", "%20");
492
            }
493
        }
494

    
495
        URL u = null;
496
        InputStream content = null;            
497
        System.out.println("url: " + restURL);
498
        System.out.println("method: " + method);
499
        u = new URL(restURL);
500
        connection = (HttpURLConnection) u.openConnection();
501
        connection.setDoOutput(true);
502
        connection.setDoInput(true);
503
        connection.setRequestMethod(method);
504
        content = connection.getInputStream();
505
        return content;
506
    }
507
    
508
    private String streamToString(InputStream is)
509
        throws Exception
510
    {
511
        byte b[] = new byte[1024];
512
        int numread = is.read(b, 0, 1024);
513
        String response = new String();
514
        while(numread != -1)
515
        {
516
            response += new String(b, 0, numread);
517
            numread = is.read(b, 0, 1024);
518
        }
519
        return response;
520
    }
521
    
522
    private InputStream stringToStream(String s)
523
      throws Exception
524
    {
525
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes());
526
        return bais;
527
    }
528
    
529
    private class Document
530
    {
531
        public String docid;
532
        public String doctype;
533
        public String createDate;
534
        public String updateDate;
535
        public String doctext;
536
        
537
        public Document(String docid, String doctype, String createDate, String updateDate)
538
        {
539
            this.docid = docid;
540
            this.doctype = doctype;
541
            this.createDate = createDate;
542
            this.updateDate = updateDate;
543
        }
544
    }
545
}
(8-8/15)