Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods 
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 * 
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27

    
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32

    
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41

    
42
import org.apache.commons.io.IOUtils;
43

    
44
import edu.ucsb.nceas.metacat.MetacatHandler;
45
import edu.ucsb.nceas.metacat.MetacatResultSet;
46
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
47
import edu.ucsb.nceas.metacat.dataone.CrudService;
48
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
49

    
50
import org.dataone.service.exceptions.InvalidSystemMetadata;
51
import org.dataone.service.exceptions.ServiceFailure;
52
import org.dataone.service.types.AuthToken;
53
import org.dataone.service.types.Checksum;
54
import org.dataone.service.types.ChecksumAlgorithm;
55
import org.dataone.service.types.NodeReference;
56
import org.dataone.service.types.ObjectFormat;
57
import org.dataone.service.types.Principal;
58
import org.dataone.service.types.SystemMetadata;
59
import org.dataone.service.types.Identifier;
60
import org.dataone.client.D1Client;
61

    
62
//import sun.tools.jstat.Identifier;
63

    
64
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
65

    
66
/**
67
 * @author berkley
68
 * A class to populate a metacat instance based on documents returned from a query
69
 */
70
public class MetacatPopulator
71
{
72
    private String sourceUrl = null;
73
    private String destinationUrl = null;
74
    private String query = null;
75
    private String username = null;
76
    private String password = null;
77
    
78
    /**
79
     * create a new MetacatPopulator with given source and destination urls.  
80
     * These should be
81
     * of the form "http://<url>/<metacat_instance>"
82
     * If username and/or password is null, the query will be run as public
83
     * @param sourceUrl
84
     * @param destUrl
85
     * @param query
86
     * @param username
87
     * @param password
88
     */
89
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
90
    {
91
        this.sourceUrl = sourceUrl;
92
        this.query = query;
93
        this.username = username;
94
        this.password = password;
95
        this.destinationUrl = destUrl;
96
    }
97
    
98
    /**
99
     * populate from the source
100
     */
101
    public void populate()
102
      throws Exception
103
    {
104
        printHeader("Source login");
105
        String sourceSessionid = loginSource();
106
        
107
        //do a query
108
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
109
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&" +
110
                        "returndoctype=BIN&" +
111
                        "returndoctype=http://dataone.org/service/types/SystemMetadata/0.1&";
112
        params += "action=query&";
113
        params += "qformat=xml&";
114
        params += "anyfield=" + query;
115
        
116
        printHeader("Searching source");
117
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
118
        InputStream is = getResponse(sourceUrl, "/metacat",
119
                params, "POST");
120
        String response = streamToString(is);
121
        //System.out.println("response: " + response);
122
        Vector<Document> docs = parseResponse(response);
123
        
124
        
125
        printHeader("Parsing source results");
126
        D1Client d1 = new D1Client(destinationUrl + "/");
127
        printHeader("Processing " + docs.size() + " results.");
128
        printHeader("logging in to the destination " + destinationUrl);
129
        AuthToken authtoken = d1.login(username, password);
130
        for(int i=0; i<docs.size(); i++)
131
        {
132
            //for each document in the query
133
            Document doc = docs.get(i);
134
            String docid = doc.docid;
135
            //get the doc from source
136
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
137
            params = "action=read&qformat=xml&docid=" + docid;
138
            is = getResponse(sourceUrl, "/metacat", params, "POST");
139
            String doctext = streamToString(is);
140
            //System.out.println("Done retrieving document: " + doctext);
141
            is = stringToStream(doctext);
142
            doc.doctext = doctext;
143

    
144
            printHeader("creating document on destination " + destinationUrl);            
145
            SystemMetadata sysmeta = generateSystemMetadata(doc);
146
            try
147
            {
148
              Identifier id = d1.create(authtoken, sysmeta.getIdentifier(), 
149
                    IOUtils.toInputStream(doc.doctext), sysmeta);
150
              System.out.println("Success inserting document " + id.getValue());
151
            }
152
            catch(Exception e)
153
            {
154
                System.out.println("Could not create document with id " + 
155
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
156
            }
157
            finally
158
            {
159
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
160
            }
161
        }
162
        
163
        logout();
164
    }
165
    
166
    private void printHeader(String s)
167
    {
168
        System.out.println("****** " + s + " *******");
169
    }
170
    
171
    /**
172
     * produce an md5 checksum for item
173
     */
174
    private String checksum(InputStream is)
175
      throws Exception
176
    {        
177
        byte[] buffer = new byte[1024];
178
        MessageDigest complete = MessageDigest.getInstance("MD5");
179
        int numRead;
180
        
181
        do 
182
        {
183
          numRead = is.read(buffer);
184
          if (numRead > 0) 
185
          {
186
            complete.update(buffer, 0, numRead);
187
          }
188
        } while (numRead != -1);
189
        
190
        
191
        return getHex(complete.digest());
192
    }
193
    
194
    /**
195
     * convert a byte array to a hex string
196
     */
197
    private static String getHex( byte [] raw ) 
198
    {
199
        final String HEXES = "0123456789ABCDEF";
200
        if ( raw == null ) {
201
          return null;
202
        }
203
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
204
        for ( final byte b : raw ) {
205
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
206
             .append(HEXES.charAt((b & 0x0F)));
207
        }
208
        return hex.toString();
209
    }
210
    
211
    /**
212
     * @param doc
213
     * @return
214
     */
215
    private SystemMetadata generateSystemMetadata(Document doc)
216
      throws Exception
217
    {
218
        SystemMetadata sm = new SystemMetadata();
219
        //set the id
220
        Identifier id = new Identifier();
221
        id.setValue(doc.docid.trim());
222
        sm.setIdentifier(id);
223
        
224
        //set the object format
225
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
226
        if(format == null)
227
        {
228
            if(doc.doctype.trim().equals("BIN"))
229
            {
230
                format = ObjectFormat.OCTET_STREAM;
231
            }
232
            else
233
            {
234
                format = ObjectFormat.convert("text/plain");
235
            }
236
        }
237
        sm.setObjectFormat(format);
238
        
239
        //create the checksum
240
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes());
241
        String checksumS = checksum(bais);
242
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
243
        Checksum checksum = new Checksum();
244
        checksum.setValue(checksumS);
245
        checksum.setAlgorithm(ca);
246
        sm.setChecksum(checksum);
247
        
248
        //set the size
249
        sm.setSize(doc.doctext.getBytes().length);
250
        
251
        //submitter
252
        Principal p = new Principal();
253
        p.setValue("unknown");
254
        sm.setSubmitter(p);
255
        sm.setRightsHolder(p);
256
        try
257
        {
258
            Date dateCreated = parseMetacatDate(doc.createDate);
259
            sm.setDateUploaded(dateCreated);
260
            Date dateUpdated = parseMetacatDate(doc.updateDate);
261
            sm.setDateSysMetadataModified(dateUpdated);
262
        }
263
        catch(Exception e)
264
        {
265
            System.out.println("couldn't parse a date: " + e.getMessage());
266
            Date dateCreated = new Date();
267
            sm.setDateUploaded(dateCreated);
268
            Date dateUpdated = new Date();
269
            sm.setDateSysMetadataModified(dateUpdated);
270
        }
271
        NodeReference nr = new NodeReference();
272
        nr.setValue(sourceUrl);
273
        sm.setOriginMemberNode(nr);
274
        sm.setAuthoritativeMemberNode(nr);
275
        return sm;
276
    }
277
    
278
    /**
279
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
280
     * a proper date object
281
     * @param date
282
     * @return
283
     */
284
    private Date parseMetacatDate(String date)
285
    {
286
        String year = date.substring(0, 4);
287
        String month = date.substring(5, 7);
288
        String day = date.substring(8, 10);
289
        Calendar c = Calendar.getInstance();
290
        c.set(new Integer(year).intValue(), 
291
              new Integer(month).intValue(), 
292
              new Integer(day).intValue());
293
        return c.getTime();
294
    }
295

    
296
    /**
297
     * send a request to the resource
298
     */
299
    private InputStream sendRequest(String contextRootUrl, String resource, 
300
            String sessionid, String method, String urlParamaters, 
301
            String contentType, InputStream dataStream) 
302
        throws Exception 
303
    {
304
        
305
        HttpURLConnection connection = null ;
306
        String restURL = contextRootUrl + resource;
307

    
308
        if (urlParamaters != null) {
309
            if (restURL.indexOf("?") == -1)             
310
                restURL += "?";
311
            restURL += urlParamaters; 
312
            if(restURL.indexOf(" ") != -1)
313
            {
314
                restURL = restURL.replaceAll("\\s", "%20");
315
            }
316
        }
317
        
318
        if(sessionid != null)
319
        {
320
            if(restURL.indexOf("?") == -1)
321
            {
322
                restURL += "?sessionid=" + sessionid;
323
            }
324
            else
325
            {
326
                restURL += "&sessionid=" + sessionid;
327
            }
328
        }
329

    
330
        URL u = null;
331
        InputStream content = null;
332
        System.out.println("url: " + restURL);
333
        System.out.println("method: " + method);
334
        u = new URL(restURL);
335
        connection = (HttpURLConnection) u.openConnection();
336
        if (contentType!=null) {
337
            connection.setRequestProperty("Content-Type",contentType);
338
        }
339

    
340
        connection.setDoOutput(true);
341
        connection.setDoInput(true);
342
        connection.setRequestMethod(method);
343

    
344
        if (!method.equals("GET")) {
345
            if (dataStream != null) {
346
                OutputStream out = connection.getOutputStream();
347
                IOUtils.copy(dataStream, out);
348
            }
349
        }
350

    
351
        return connection.getInputStream();   
352
    }
353
    
354
    /**
355
     * create a mime multipart message from object and sysmeta
356
     */
357
    private MimeMultipart createMimeMultipart(InputStream object)
358
      throws Exception
359
    {
360
        final MimeMultipart mmp = new MimeMultipart();
361
        MimeBodyPart objectPart = new MimeBodyPart();
362
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
363
        objectPart.setFileName("doctext");
364
        DataSource ds = new InputStreamDataSource("doctext", object);
365
        DataHandler dh = new DataHandler(ds);
366
        objectPart.setDataHandler(dh);
367
        mmp.addBodyPart(objectPart);
368
        return mmp;
369
    }
370
    
371
    /**
372
     * parse a metacat query response and return a vector of docids
373
     * @param response
374
     * @return
375
     */
376
    private Vector<Document> parseResponse(String response)
377
    {
378
        Vector<Document> v = new Vector<Document>();
379
        int dstart = response.indexOf("<document>");
380
        int dend = response.indexOf("</document>", dstart);
381
        while(dstart != -1)
382
        {
383
            String doc = response.substring(dstart + "<document>".length(), dend);
384
            //System.out.println("adding " + docid);
385
            Document d = new Document(getFieldFromDoc(doc, "docid"),
386
                    getFieldFromDoc(doc, "doctype"),
387
                    getFieldFromDoc(doc, "createdate"),
388
                    getFieldFromDoc(doc, "updatedate"));
389
            v.add(d);
390
            dstart = response.indexOf("<document>", dend);
391
            dend = response.indexOf("</document>", dstart);
392
        }
393
        
394
        return v;
395
    }
396
    
397
    private String getFieldFromDoc(String doc, String fieldname)
398
    {
399
        String field = "<" + fieldname + ">";
400
        String fieldend = "</" + fieldname + ">";
401
        int start = doc.indexOf(field);
402
        int end = doc.indexOf(fieldend);
403
        String s = doc.substring(start + field.length(), end);
404
        //System.out.println("field: " + fieldname + " : " + s);
405
        return s;
406
    }
407
    
408
    /**
409
     * login the source
410
     * @return
411
     * @throws Exception
412
     */
413
    private String loginSource()
414
      throws Exception
415
    {
416
        return login(sourceUrl);
417
    }
418
    
419
    /**
420
     * login the destination
421
     * @return
422
     * @throws Exception
423
     */
424
    private String loginDest()
425
        throws Exception
426
    {
427
        return login(destinationUrl);
428
    }
429
    
430
    /**
431
     * returns a sessionid
432
     * @return
433
     */
434
    private String login(String sourceUrl)
435
      throws Exception
436
    {
437
        InputStream is = getResponse(sourceUrl, "/metacat", 
438
                "action=login&username=" + username + "&password=" + password + "&qformat=xml", 
439
        "POST");
440
        String response = streamToString(is);
441
        //System.out.println("response: " + response);
442
        if(response.indexOf("sessionId") == -1)
443
        {
444
            throw new Exception("Error logging into " + sourceUrl);
445
        }
446
        
447
        String sessionid = response.substring(
448
                response.indexOf("<sessionId>") + "<sessionId>".length(), 
449
                response.indexOf("</sessionId>"));
450
        System.out.println("sessionid: " + sessionid);
451
        return sessionid;
452
    }
453
    
454
    /**
455
     * logout both the source and destination
456
     * @throws Exception
457
     */
458
    private void logout()
459
        throws Exception
460
    {
461
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
462
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
463
    }
464
    
465
    /**
466
     * get an http response
467
     * @param contextRootUrl
468
     * @param resource
469
     * @param urlParameters
470
     * @param method
471
     * @return
472
     * @throws Exception
473
     */
474
    private InputStream getResponse(String contextRootUrl, String resource, 
475
            String urlParameters, String method)
476
      throws Exception
477
    {
478
        HttpURLConnection connection = null ;
479

    
480
        String restURL = contextRootUrl+resource;
481

    
482
        if (urlParameters != null) {
483
            if (restURL.indexOf("?") == -1)             
484
                restURL += "?";
485
            restURL += urlParameters; 
486
            if(restURL.indexOf(" ") != -1)
487
            {
488
                restURL = restURL.replaceAll("\\s", "%20");
489
            }
490
        }
491

    
492
        URL u = null;
493
        InputStream content = null;            
494
        System.out.println("url: " + restURL);
495
        System.out.println("method: " + method);
496
        u = new URL(restURL);
497
        connection = (HttpURLConnection) u.openConnection();
498
        connection.setDoOutput(true);
499
        connection.setDoInput(true);
500
        connection.setRequestMethod(method);
501
        content = connection.getInputStream();
502
        return content;
503
    }
504
    
505
    private String streamToString(InputStream is)
506
        throws Exception
507
    {
508
        byte b[] = new byte[1024];
509
        int numread = is.read(b, 0, 1024);
510
        String response = new String();
511
        while(numread != -1)
512
        {
513
            response += new String(b, 0, numread);
514
            numread = is.read(b, 0, 1024);
515
        }
516
        return response;
517
    }
518
    
519
    private InputStream stringToStream(String s)
520
      throws Exception
521
    {
522
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes());
523
        return bais;
524
    }
525
    
526
    private class Document
527
    {
528
        public String docid;
529
        public String doctype;
530
        public String createDate;
531
        public String updateDate;
532
        public String doctext;
533
        
534
        public Document(String docid, String doctype, String createDate, String updateDate)
535
        {
536
            this.docid = docid.trim();
537
            this.doctype = doctype.trim();
538
            this.createDate = createDate.trim();
539
            this.updateDate = updateDate.trim();
540
        }
541
    }
542
}
(8-8/15)