Project

General

Profile

1 5394 berkley
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A Class that implements administrative methods
4
 *  Copyright: 2010 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Michael Daigle
7
 *
8
 *   '$Author: berkley $'
9
 *     '$Date: 2010-06-08 12:34:30 -0700 (Tue, 08 Jun 2010) $'
10
 * '$Revision: 5374 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26
package edu.ucsb.nceas.metacat.util;
27
28
import java.security.MessageDigest;
29
import java.util.*;
30
import java.io.*;
31
import java.net.*;
32
33
import javax.activation.DataHandler;
34
import javax.activation.DataSource;
35
import javax.mail.MessagingException;
36
import javax.mail.internet.MimeBodyPart;
37
import javax.mail.internet.MimeMultipart;
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41
42
import org.apache.commons.io.IOUtils;
43
44
import edu.ucsb.nceas.metacat.MetacatHandler;
45
import edu.ucsb.nceas.metacat.MetacatResultSet;
46
import edu.ucsb.nceas.metacat.MetacatResultSet.Document;
47
import edu.ucsb.nceas.metacat.dataone.CrudService;
48
import edu.ucsb.nceas.metacat.restservice.InputStreamDataSource;
49
50
import org.dataone.service.exceptions.InvalidSystemMetadata;
51
import org.dataone.service.exceptions.ServiceFailure;
52
import org.dataone.service.types.AuthToken;
53
import org.dataone.service.types.Checksum;
54
import org.dataone.service.types.ChecksumAlgorithm;
55
import org.dataone.service.types.NodeReference;
56
import org.dataone.service.types.ObjectFormat;
57
import org.dataone.service.types.Principal;
58
import org.dataone.service.types.SystemMetadata;
59
import org.dataone.service.types.Identifier;
60
import org.dataone.client.D1Client;
61
62
//import sun.tools.jstat.Identifier;
63
64
import com.gc.iotools.stream.is.InputStreamFromOutputStream;
65
66
/**
67
 * @author berkley
68
 * A class to populate a metacat instance based on documents returned from a query
69
 */
70
public class MetacatPopulator
71
{
72
    private String sourceUrl = null;
73
    private String destinationUrl = null;
74
    private String query = null;
75
    private String username = null;
76
    private String password = null;
77
78
    /**
79
     * create a new MetacatPopulator with given source and destination urls.
80
     * These should be
81
     * of the form "http://<url>/<metacat_instance>"
82
     * If username and/or password is null, the query will be run as public
83
     * @param sourceUrl
84
     * @param destUrl
85
     * @param query
86
     * @param username
87
     * @param password
88
     */
89
    public MetacatPopulator(String sourceUrl, String destUrl, String query, String username, String password)
90
    {
91
        this.sourceUrl = sourceUrl;
92
        this.query = query;
93
        this.username = username;
94
        this.password = password;
95
        this.destinationUrl = destUrl;
96
    }
97
98
    /**
99
     * populate from the source
100
     */
101
    public void populate()
102
      throws Exception
103
    {
104 5397 berkley
        printHeader("Source login");
105 5394 berkley
        String sourceSessionid = loginSource();
106 5397 berkley
107 5394 berkley
        //do a query
108
        String params = "returndoctype=eml://ecoinformatics.org/eml-2.0.1&" +
109
                        "returndoctype=eml://ecoinformatics.org/eml-2.0.0&" +
110
                        "returndoctype=BIN&" +
111
                        "returndoctype=http://dataone.org/service/types/SystemMetadata/0.1&";
112
        params += "action=query&";
113
        params += "qformat=xml&";
114
        params += "anyfield=" + query;
115
116 5397 berkley
        printHeader("Searching source");
117 5394 berkley
        System.out.println("searching '" + sourceUrl + "' for '" + query + "' with sessionid '" + sourceSessionid + "'");
118
        InputStream is = getResponse(sourceUrl, "/metacat",
119
                params, "POST");
120
        String response = streamToString(is);
121
        //System.out.println("response: " + response);
122
        Vector<Document> docs = parseResponse(response);
123 5397 berkley
        printHeader("Parsing source results");
124
        D1Client d1 = new D1Client(destinationUrl + "/");
125
        printHeader("logging in to the destination " + destinationUrl);
126
        AuthToken authtoken = d1.login(username, password);
127 5394 berkley
        for(int i=0; i<docs.size(); i++)
128
        {
129
            //for each document in the query
130
            Document doc = docs.get(i);
131
            String docid = doc.docid;
132
            //get the doc from source
133 5397 berkley
            printHeader("Getting document " + doc.docid + " from source " + sourceUrl);
134 5394 berkley
            params = "action=read&qformat=xml&docid=" + docid;
135
            is = getResponse(sourceUrl, "/metacat", params, "POST");
136
            String doctext = streamToString(is);
137
            //System.out.println("Done retrieving document: " + doctext);
138
            is = stringToStream(doctext);
139
            doc.doctext = doctext;
140 5397 berkley
141
            printHeader("creating document on destination " + destinationUrl);
142 5394 berkley
            SystemMetadata sysmeta = generateSystemMetadata(doc);
143 5397 berkley
            try
144
            {
145
              Identifier id = d1.create(authtoken, sysmeta.getIdentifier(),
146
                    IOUtils.toInputStream(doc.doctext), sysmeta);
147
              System.out.println("Success inserting document " + id.getValue());
148
            }
149
            catch(Exception e)
150
            {
151
                System.out.println("Could not create document with id " +
152
                        sysmeta.getIdentifier().getValue() + " : " + e.getMessage());
153
            }
154
            finally
155
            {
156
                printHeader("Done inserting document " + sysmeta.getIdentifier().getValue());
157
            }
158 5394 berkley
        }
159
160
        logout();
161
    }
162
163 5397 berkley
    private void printHeader(String s)
164
    {
165
        System.out.println("****** " + s + " *******");
166
    }
167
168 5394 berkley
    /**
169
     * produce an md5 checksum for item
170
     */
171
    private String checksum(InputStream is)
172
      throws Exception
173
    {
174
        byte[] buffer = new byte[1024];
175
        MessageDigest complete = MessageDigest.getInstance("MD5");
176
        int numRead;
177
178
        do
179
        {
180
          numRead = is.read(buffer);
181
          if (numRead > 0)
182
          {
183
            complete.update(buffer, 0, numRead);
184
          }
185
        } while (numRead != -1);
186
187
188
        return getHex(complete.digest());
189
    }
190
191
    /**
192
     * convert a byte array to a hex string
193
     */
194
    private static String getHex( byte [] raw )
195
    {
196
        final String HEXES = "0123456789ABCDEF";
197
        if ( raw == null ) {
198
          return null;
199
        }
200
        final StringBuilder hex = new StringBuilder( 2 * raw.length );
201
        for ( final byte b : raw ) {
202
          hex.append(HEXES.charAt((b & 0xF0) >> 4))
203
             .append(HEXES.charAt((b & 0x0F)));
204
        }
205
        return hex.toString();
206
    }
207
208
    /**
209
     * @param doc
210
     * @return
211
     */
212
    private SystemMetadata generateSystemMetadata(Document doc)
213
      throws Exception
214
    {
215
        SystemMetadata sm = new SystemMetadata();
216
        //set the id
217
        Identifier id = new Identifier();
218
        id.setValue(doc.docid);
219
        sm.setIdentifier(id);
220
221
        //set the object format
222
        ObjectFormat format = ObjectFormat.convert(doc.doctype);
223
        if(format == null)
224
        {
225
            if(doc.doctype.trim().equals("BIN"))
226
            {
227
                format = ObjectFormat.APPLICATIONOCTETSTREAM;
228
            }
229
            else
230
            {
231
                format = ObjectFormat.convert("text/plain");
232
            }
233
        }
234
        sm.setObjectFormat(format);
235
236
        //create the checksum
237
        ByteArrayInputStream bais = new ByteArrayInputStream(doc.doctext.getBytes());
238
        String checksumS = checksum(bais);
239
        ChecksumAlgorithm ca = ChecksumAlgorithm.convert("MD5");
240
        Checksum checksum = new Checksum();
241
        checksum.setValue(checksumS);
242
        checksum.setAlgorithm(ca);
243
        sm.setChecksum(checksum);
244
245
        //set the size
246
        sm.setSize(doc.doctext.getBytes().length);
247
248
        //submitter
249
        Principal p = new Principal();
250
        p.setValue("");
251
        sm.setSubmitter(p);
252
        sm.setRightsHolder(p);
253
        try
254
        {
255
            Date dateCreated = parseMetacatDate(doc.createDate);
256
            sm.setDateUploaded(dateCreated);
257
            Date dateUpdated = parseMetacatDate(doc.updateDate);
258
            sm.setDateSysMetadataModified(dateUpdated);
259
        }
260
        catch(Exception e)
261
        {
262
            System.out.println("couldn't parse a date: " + e.getMessage());
263
            Date dateCreated = new Date();
264
            sm.setDateUploaded(dateCreated);
265
            Date dateUpdated = new Date();
266
            sm.setDateSysMetadataModified(dateUpdated);
267
        }
268
        NodeReference nr = new NodeReference();
269
        nr.setValue(sourceUrl);
270
        sm.setOriginMemberNode(nr);
271
        sm.setAuthoritativeMemberNode(nr);
272
        return sm;
273
    }
274
275
    /**
276
     * parse the metacat date which looks like 2010-06-08 (YYYY-MM-DD) into
277
     * a proper date object
278
     * @param date
279
     * @return
280
     */
281
    private Date parseMetacatDate(String date)
282
    {
283
        String year = date.substring(0, 4);
284
        String month = date.substring(5, 7);
285
        String day = date.substring(8, 10);
286
        Calendar c = Calendar.getInstance();
287
        c.set(new Integer(year).intValue(),
288
              new Integer(month).intValue(),
289
              new Integer(day).intValue());
290
        return c.getTime();
291
    }
292
293
    /**
294
     * send a request to the resource
295
     */
296
    private InputStream sendRequest(String contextRootUrl, String resource,
297
            String sessionid, String method, String urlParamaters,
298
            String contentType, InputStream dataStream)
299
        throws Exception
300
    {
301
302
        HttpURLConnection connection = null ;
303
        String restURL = contextRootUrl + resource;
304
305
        if (urlParamaters != null) {
306
            if (restURL.indexOf("?") == -1)
307
                restURL += "?";
308
            restURL += urlParamaters;
309
            if(restURL.indexOf(" ") != -1)
310
            {
311
                restURL = restURL.replaceAll("\\s", "%20");
312
            }
313
        }
314
315
        if(sessionid != null)
316
        {
317
            if(restURL.indexOf("?") == -1)
318
            {
319
                restURL += "?sessionid=" + sessionid;
320
            }
321
            else
322
            {
323
                restURL += "&sessionid=" + sessionid;
324
            }
325
        }
326
327
        URL u = null;
328
        InputStream content = null;
329
        System.out.println("url: " + restURL);
330
        System.out.println("method: " + method);
331
        u = new URL(restURL);
332
        connection = (HttpURLConnection) u.openConnection();
333
        if (contentType!=null) {
334
            connection.setRequestProperty("Content-Type",contentType);
335
        }
336
337
        connection.setDoOutput(true);
338
        connection.setDoInput(true);
339
        connection.setRequestMethod(method);
340
341
        if (!method.equals("GET")) {
342
            if (dataStream != null) {
343
                OutputStream out = connection.getOutputStream();
344
                IOUtils.copy(dataStream, out);
345
            }
346
        }
347
348
        return connection.getInputStream();
349
    }
350
351
    /**
352
     * create a mime multipart message from object and sysmeta
353
     */
354
    private MimeMultipart createMimeMultipart(InputStream object)
355
      throws Exception
356
    {
357
        final MimeMultipart mmp = new MimeMultipart();
358
        MimeBodyPart objectPart = new MimeBodyPart();
359
        objectPart.addHeaderLine("Content-Transfer-Encoding: base64");
360
        objectPart.setFileName("doctext");
361
        DataSource ds = new InputStreamDataSource("doctext", object);
362
        DataHandler dh = new DataHandler(ds);
363
        objectPart.setDataHandler(dh);
364
        mmp.addBodyPart(objectPart);
365
        return mmp;
366
    }
367
368
    /**
369
     * parse a metacat query response and return a vector of docids
370
     * @param response
371
     * @return
372
     */
373
    private Vector<Document> parseResponse(String response)
374
    {
375
        Vector<Document> v = new Vector<Document>();
376
        int dstart = response.indexOf("<document>");
377
        int dend = response.indexOf("</document>", dstart);
378
        while(dstart != -1)
379
        {
380
            String doc = response.substring(dstart + "<document>".length(), dend);
381
            //System.out.println("adding " + docid);
382
            Document d = new Document(getFieldFromDoc(doc, "docid"),
383
                    getFieldFromDoc(doc, "doctype"),
384
                    getFieldFromDoc(doc, "createdate"),
385
                    getFieldFromDoc(doc, "updatedate"));
386
            v.add(d);
387
            dstart = response.indexOf("<document>", dend);
388
            dend = response.indexOf("</document>", dstart);
389
        }
390
391
        return v;
392
    }
393
394
    private String getFieldFromDoc(String doc, String fieldname)
395
    {
396
        String field = "<" + fieldname + ">";
397
        String fieldend = "</" + fieldname + ">";
398
        int start = doc.indexOf(field);
399
        int end = doc.indexOf(fieldend);
400
        String s = doc.substring(start + field.length(), end);
401
        System.out.println("field: " + fieldname + " : " + s);
402
        return s;
403
    }
404
405
    /**
406
     * login the source
407
     * @return
408
     * @throws Exception
409
     */
410
    private String loginSource()
411
      throws Exception
412
    {
413
        return login(sourceUrl);
414
    }
415
416
    /**
417
     * login the destination
418
     * @return
419
     * @throws Exception
420
     */
421
    private String loginDest()
422
        throws Exception
423
    {
424
        return login(destinationUrl);
425
    }
426
427
    /**
428
     * returns a sessionid
429
     * @return
430
     */
431
    private String login(String sourceUrl)
432
      throws Exception
433
    {
434
        InputStream is = getResponse(sourceUrl, "/metacat",
435
                "action=login&username=" + username + "&password=" + password + "&qformat=xml",
436
        "POST");
437
        String response = streamToString(is);
438
        //System.out.println("response: " + response);
439 5397 berkley
        if(response.indexOf("sessionId") == -1)
440
        {
441
            throw new Exception("Error logging into " + sourceUrl);
442
        }
443
444 5394 berkley
        String sessionid = response.substring(
445
                response.indexOf("<sessionId>") + "<sessionId>".length(),
446
                response.indexOf("</sessionId>"));
447
        System.out.println("sessionid: " + sessionid);
448
        return sessionid;
449
    }
450
451
    /**
452
     * logout both the source and destination
453
     * @throws Exception
454
     */
455
    private void logout()
456
        throws Exception
457
    {
458
        getResponse(sourceUrl, "/metacat", "action=logout&username=" + username, "POST");
459
        getResponse(destinationUrl, "/metacat", "action=logout&username=" + username, "POST");
460
    }
461
462
    /**
463
     * get an http response
464
     * @param contextRootUrl
465
     * @param resource
466
     * @param urlParameters
467
     * @param method
468
     * @return
469
     * @throws Exception
470
     */
471
    private InputStream getResponse(String contextRootUrl, String resource,
472
            String urlParameters, String method)
473
      throws Exception
474
    {
475
        HttpURLConnection connection = null ;
476
477
        String restURL = contextRootUrl+resource;
478
479
        if (urlParameters != null) {
480
            if (restURL.indexOf("?") == -1)
481
                restURL += "?";
482
            restURL += urlParameters;
483
            if(restURL.indexOf(" ") != -1)
484
            {
485
                restURL = restURL.replaceAll("\\s", "%20");
486
            }
487
        }
488
489
        URL u = null;
490
        InputStream content = null;
491
        System.out.println("url: " + restURL);
492
        System.out.println("method: " + method);
493
        u = new URL(restURL);
494
        connection = (HttpURLConnection) u.openConnection();
495
        connection.setDoOutput(true);
496
        connection.setDoInput(true);
497
        connection.setRequestMethod(method);
498
        content = connection.getInputStream();
499
        return content;
500
    }
501
502
    private String streamToString(InputStream is)
503
        throws Exception
504
    {
505
        byte b[] = new byte[1024];
506
        int numread = is.read(b, 0, 1024);
507
        String response = new String();
508
        while(numread != -1)
509
        {
510
            response += new String(b, 0, numread);
511
            numread = is.read(b, 0, 1024);
512
        }
513
        return response;
514
    }
515
516
    private InputStream stringToStream(String s)
517
      throws Exception
518
    {
519
        ByteArrayInputStream bais = new ByteArrayInputStream(s.getBytes());
520
        return bais;
521
    }
522
523
    private class Document
524
    {
525
        public String docid;
526
        public String doctype;
527
        public String createDate;
528
        public String updateDate;
529
        public String doctext;
530
531
        public Document(String docid, String doctype, String createDate, String updateDate)
532
        {
533
            this.docid = docid;
534
            this.doctype = doctype;
535
            this.createDate = createDate;
536
            this.updateDate = updateDate;
537
        }
538
    }
539
}