Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *
8
 *   '$Author: daigle $'
9
 *     '$Date: 2009-08-04 14:32:58 -0700 (Tue, 04 Aug 2009) $'
10
 * '$Revision: 5015 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat;
28

    
29
import java.io.StringReader;
30
import java.sql.PreparedStatement;
31
import java.sql.ResultSet;
32
import java.sql.SQLException;
33
import java.util.Hashtable;
34
import java.util.Vector;
35

    
36
import org.apache.log4j.Logger;
37
import org.apache.xpath.objects.XObject;
38
import org.apache.xpath.XPathAPI;
39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
41
import org.w3c.dom.Attr;
42
import org.w3c.dom.NamedNodeMap;
43
import org.w3c.dom.NodeList;
44
import org.w3c.dom.Document;
45
import org.w3c.dom.Node;
46
import org.w3c.dom.NodeList;
47
import org.w3c.dom.DocumentType;
48
import org.xml.sax.InputSource;
49

    
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

    
57
import org.ecoinformatics.eml.EMLParser;
58

    
59
import edu.ucsb.nceas.metacat.database.DBConnection;
60
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
61
import edu.ucsb.nceas.metacat.service.PropertyService;
62
import edu.ucsb.nceas.metacat.util.MetacatUtil;
63
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
64
/**
65
 * This class will figure out which content type it is for a given data file.
66
 * First, from xml_relation to get all relative files to this data file.
67
 * Then from xml_documents to get physical files. From physical file pull out
68
 * the content type
69
 */
70
public class ContentTypeProvider
71
{
72
  private String dataFileId = null;
73
  private String contentType = null;
74
  private String packageType = null;
75
  private Hashtable contentTypeHash = new Hashtable();
76

    
77
  //Constant
78
  private String BETA = "beta";
79
  private String EML2 = "eml2";
80
  private static String DEFAULTCONTENTTYPE;
81
  static {
82
		try {
83
			DEFAULTCONTENTTYPE = PropertyService.getProperty("replication.defaultcontenttype");
84
		} catch (PropertyNotFoundException pnfe) {
85
			System.err.println("Could not get property DEFAULTCONTENTTYPE:" 
86
					+ pnfe.getMessage());
87
		}
88
	}
89
  private String FORMATPATH = "//format";
90
  private String TEXT       = "text";
91
  private String TEXTYPE    ="text/plain";
92
  private String XML        = "xml";
93
  private String XMLTYPE    = "text/xml";
94
  private String HTML       = "HTML";
95
  private String HTMLTYPE   = "text/html";
96
  private String GIF        = "gif";
97
  private String JPEG       = "jpeg";
98
  private String JPEGTYPE   = "image/jpeg";
99
  private String GIFTYPE    = "image/gif";
100
  private String BMP        = "bmp";
101
  private String BMPTYPE    = "image/bmp";
102
  private String TAR        = "tar";
103
  private String TARTYPE    ="application/x-tar";
104
  private String ZIP        = "zip";
105
  private String ZIPTYPE    = "application/x-zip-compressed";
106
  private String BINARY     = "binary";
107
  private String BINARYTYPE = "application/octet-stream";
108

    
109
  private String ENTITYDOCTYPE = "xml.entitydoctype";
110
  private String PHYSICALDOCTYPE = "xml.physicaldoctype";
111
  private String EML2DOCTYPE = "eml2namespace";
112
  private String DATAFORMAT = "dataFormat";
113
  private String TEXTFORMAT = "textFormat";
114
  private String EXTENALFORMAT = "externallyDefinedFormat";
115
  private String FORMATNAME = "formatName";
116
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
117

    
118
  private String DATAFILEPATH ="//physical/distribution/online/url";
119
  private static Logger logMetacat = Logger.getLogger(ContentTypeProvider.class);
120

    
121
  /**
122
   * Constructor of ContentTypeProvider
123
   */
124
  public ContentTypeProvider(String docIdWithRevision)
125
  {
126
    dataFileId = MetacatUtil.getDocIdFromString(docIdWithRevision);
127
    //get relative doclist for data file and package type
128
    Vector docLists = null;
129
    docLists = getRelativeDocIdList(dataFileId);
130

    
131
    if ( packageType == null)
132
    {
133
      // other situation, contenetype is default value
134
      contentType = DEFAULTCONTENTTYPE;
135
    }
136
    else if (packageType.equals(BETA))
137
    {
138
      // for beta package and get entity docid for the data file
139
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
140
      // get physical docid for data file
141
      docLists = getRelativeDocIdList(entityDocid);
142
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
143
      // if no physical docid assign to this data file, content type is default
144
      if (physicalDocId == null)
145
      {
146

    
147
        contentType = DEFAULTCONTENTTYPE;
148
      }
149
      else
150
      {
151

    
152
        parsePhysicalDocumentForBeta(physicalDocId);
153
      }
154
    }
155
    else if (packageType.equals(EML2))
156
    {
157
      // for eml2 package
158
      // get eml document for data file
159
      //String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
160
      String eml2Docid = (String)docLists.elementAt(0);
161
      findContentTypeInEML2(eml2Docid);
162

    
163
    }
164

    
165
  }
166

    
167
  /** Method to get content type */
168
  public String getContentType()
169
  {
170
    return contentType;
171
  }//getContentType
172

    
173
  /* Method to find content type base on data format*/
174
  private void findContentTypeInEML2(String eml2DocId)
175
  {
176
    if (eml2DocId == null)
177
    {
178
      contentType = DEFAULTCONTENTTYPE;
179
      return;
180
    }
181
    DocumentImpl xmlDoc = null;
182
    String xmlString = null;
183
    StringReader read = null;
184
    InputSource in = null;
185
    DocumentBuilderFactory dfactory = null;
186
    Document doc = null;
187
    // create xml document
188
    try
189
    {
190
      String accNumber = eml2DocId + PropertyService.getProperty("document.accNumSeparator") +
191
                    DBUtil.getLatestRevisionInDocumentTable(eml2DocId);
192
      //System.out.println("the acc number is !!!!!!!!!!!!!!!!!"+accNumber);
193
      xmlDoc = new DocumentImpl(accNumber);
194
      xmlString = xmlDoc.toString();
195
      //System.out.println("the xml doc is "+xmlDoc);
196
      // create dom tree
197
      read = new StringReader(xmlString);
198
      in = new InputSource(read);
199
      dfactory = DocumentBuilderFactory.newInstance();
200
      dfactory.setNamespaceAware(false);
201
      doc = dfactory.newDocumentBuilder().parse(in);
202
    }
203
    catch (Exception e)
204
    {
205
      // if faild, set default value
206
      contentType = DEFAULTCONTENTTYPE;
207
      logMetacat.error("Error in ContentTypeProvider." +
208
                         "findContentTypeInEML2()" + e.getMessage());
209
      return;
210
    }
211
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH,
212
                                                   dataFileId);
213
    if (dataFormatNode == null)
214
    {
215
      contentType = DEFAULTCONTENTTYPE;
216
      logMetacat.info("Couldn't find data format node");
217
      return;
218

    
219
    }
220
    NodeList childList  = dataFormatNode.getChildNodes();
221
    // go through childList
222
    for (int i = 0; i<childList.getLength(); i++)
223
    {
224
      Node child = childList.item(i);
225

    
226
      // if has text format child set to text/plain
227
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
228
      {
229
        logMetacat.info("in text format");
230
        contentType = TEXTYPE;
231
      }
232

    
233
      //external format
234
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
235
      {
236
        logMetacat.info("in external format ");
237
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
238
        logMetacat.info("The format is: "+format);
239
        // if we can find the format in the contentTypeHash table
240
        contentType = (String)lookUpContentType(format);
241
        if (contentType == null)
242
        {
243
          contentType = BINARYTYPE;
244
        }
245
      }
246

    
247
      // binaryRasterFormat
248
      if (child.getNodeName() != null && child.getNodeName().
249
          equals(BINARYRASTERFORMAT))
250
      {
251
        contentType = BINARYTYPE;
252
      }//if
253
    }//for
254
    //if contentype still be null, set default value
255
    if (contentType == null)
256
    {
257
      contentType = DEFAULTCONTENTTYPE;
258
    }
259
  }
260

    
261
  /* Method get text value of given child tagname*/
262
  private String getTextValueForGivenChildTag(Node parentNode,
263
                                              String childTagName)
264
  {
265
    String textValue = null;
266
    NodeList childList = parentNode.getChildNodes();
267
    for (int i= 0; i<childList.getLength();i++)
268
    {
269
      Node child = childList.item(i);
270
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
271
      {
272
        logMetacat.info("Find child node: " + childTagName);
273
        Node textNode = child.getFirstChild();
274
        if (textNode.getNodeType() == Node.TEXT_NODE)
275
        {
276
          textValue = textNode.getNodeValue();
277
        }//if
278
      }//if
279
    }//for
280
    logMetacat.info("The text value for element- " + childTagName +
281
                             " is " + textValue);
282
    return textValue;
283
  }//getTExtValueForGivenChildTag
284

    
285
  /* Find the data format node in eml2 document */
286
  private Node findDataFormatNodeInEML2(Document xml, String xPath,
287
                                       String targetDocId)
288
  {
289
    Node targetNode = null;
290
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
291
    if (node != null)
292
    {
293
      // get the phycial the prent is online, grandparent is distribution
294
      // the grand'parent is physical
295
      Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
296
      NodeList list = phyicalNode.getChildNodes();
297
      for (int i = 0; i < list.getLength(); i++)
298
      {
299
        Node kid = list.item(i);
300
        // find dataFormat node
301
        if (kid.getNodeType() == node.ELEMENT_NODE &&
302
            kid.getNodeName().equals(DATAFORMAT))
303
        {
304
          targetNode = kid;
305
          break;
306
        } //if
307
      } //for
308
      if (targetNode != null)
309
      {
310
        logMetacat.info("dataFormat node'name: " +
311
                                 targetNode.getNodeName());
312
      }
313
    }//if
314
    return targetNode;
315
  }
316
  /* Find the datafile node */
317
  private Node findDataFileNodeInEML2(Document xml, String xPath,
318
                                String targetDocId)
319
  {
320
    Node dataFileNode = null;
321
    NodeList list = null;
322
    try
323
    {
324
      list = XPathAPI.selectNodeList(xml, xPath);
325
    }
326
    catch (Exception e)
327
    {
328
      // catch an error and return null
329
      logMetacat.error("Error in findDataFileNode: "+e.getMessage());
330
      return dataFileNode;
331
    }
332
    // go through the list and find target docid in online/url
333
    if (list != null)
334
    {
335
      for (int i = 0; i < list.getLength(); i++)
336
      {
337
        Node node = list.item(i);
338
        Node textNode = node.getFirstChild();
339
        if (textNode.getNodeType() == node.TEXT_NODE)
340
        {
341
          String URLData = textNode.getNodeValue();
342
          logMetacat.info("online/url text data: " + URLData);
343
          //Only handle ecogrid data file
344
          if (URLData.indexOf(DBSAXHandler.ECOGRID) != -1 )
345
          {
346
            // Get docid from url
347
            String docId = MetacatUtil.
348
                               getAccessionNumberFromEcogridIdentifier(URLData);
349
            // Get rid of revision
350
            docId = MetacatUtil.getDocIdFromAccessionNumber(docId);
351
            logMetacat.info("docid from url element in xml is: " +
352
                                     docId);
353
            //if this docid equals target one, we find it
354
            if (docId != null && docId.equals(targetDocId))
355
            {
356
              logMetacat.info("Find target docid in online/url: " +
357
                                       docId);
358
              dataFileNode = node;
359
              break;
360
            }
361
          } //if
362

    
363
        } //if
364
      } //for
365
    }//if
366

    
367
    return dataFileNode;
368
  }//findDataFileNode
369

    
370
  /* Get relative docid list and packagetype */
371
  private Vector getRelativeDocIdList(String id)
372
  {
373
    Vector docList = new Vector();
374
    String sql = "SELECT packagetype, subject from xml_relation " +
375
                 "where object = ?";
376
    ResultSet rs = null;
377
    PreparedStatement pStmt=null;
378
    DBConnection conn = null;
379
    int serialNumber = -1;
380
    try
381
    {
382
      //check out DBConnection
383
      conn=DBConnectionPool.getDBConnection
384
                                   ("ContentTypeProvider.getRelativeDocIdlist");
385
      serialNumber=conn.getCheckOutSerialNumber();
386
      pStmt = conn.prepareStatement(sql);
387
      // binding value
388
      pStmt.setString(1, id);
389
      //execute query
390
      pStmt.execute();
391
      rs = pStmt.getResultSet();
392
      // get result list
393
      String packType = null;
394
      while (rs.next())
395
      {
396
        packType = rs.getString(1);
397
        String subject = rs.getString(2);
398

    
399
        // get rid of duplicate record and add the docid into vector
400
        if (!docList.contains(subject))
401
        {
402

    
403
          docList.add(subject);
404
        }
405
      }//while
406

    
407
      // set up data package type
408
      if ((MetacatUtil.getOptionList(PropertyService.getProperty("xml.packagedoctype"))).
409
                                     contains(packType))
410
      {
411
        //this is beta4 or beta6 version
412
        logMetacat.warn("This is beta package");
413
        packageType = BETA;
414
      }
415
      else if ((MetacatUtil.getOptionList
416
               (PropertyService.getProperty("xml.eml2_0_0namespace"))).contains(packType))
417
      {
418
        // this eml 2 document
419
        logMetacat.warn("This is EML2.0.0 package");
420
        packageType = EML2;
421
      }
422
      else if ((MetacatUtil.getOptionList
423
               (PropertyService.getProperty("xml.eml2_0_1namespace"))).contains(packType))
424
      {
425
        // this eml 2 document
426
        logMetacat.warn("This is EML2.0.1 package");
427
        packageType = EML2;
428
      }
429

    
430

    
431

    
432
    }//try
433
    catch(SQLException e)
434
    {
435

    
436
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
437
                             e.getMessage());
438
    }//catch
439
    catch(PropertyNotFoundException pnfe)
440
    {
441
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
442
                             pnfe.getMessage());
443
    }//catch
444
    finally
445
    {
446
      try
447
      {
448
        pStmt.close();
449
      }
450
      catch (SQLException ee)
451
      {
452
        logMetacat.error("ContenTypProvider.getRelativeDoclist2 " +
453
                             ee.getMessage());
454
      }
455
      finally
456
      {
457
        DBConnectionPool.returnDBConnection(conn, serialNumber);
458
      }
459
    }//finally
460

    
461
    return docList;
462
  }// getRelativeDocIdList
463

    
464
  /* Method to get physical document for data file in xml_documents table for
465
   * beta eml package
466
   */
467
  private String getTargetDocIdForBeta(Vector list, String targetType)
468
  {
469
    String docId = null;
470
    // make sure list is not empty
471
    if (list.isEmpty())
472
    {
473

    
474
      return docId;
475
    }
476
    // get sql command
477
    String sql = "SELECT doctype, docid from xml_documents where docid in (";
478
    // the first element
479
    sql = sql + "'"+(String)list.elementAt(0) + "'";
480
    for (int i=1; i<list.size(); i++)
481
    {
482
      String docid = (String) list.elementAt(i);
483
      sql = sql + ", '" + docid + "'";
484
    }//for
485
    // add parensis
486
    sql = sql + ")";
487
    logMetacat.info("SQL for select doctype: "+ sql);
488
    ResultSet rs = null;
489
    PreparedStatement pStmt=null;
490
    DBConnection conn = null;
491
    int serialNumber = -1;
492
    try
493
    {
494
      //check out DBConnection
495
      conn=DBConnectionPool.getDBConnection
496
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
497
      serialNumber=conn.getCheckOutSerialNumber();
498
      pStmt = conn.prepareStatement(sql);
499
      //execute query
500
      pStmt.execute();
501
      rs = pStmt.getResultSet();
502
      // get result list
503
      while (rs.next())
504
      {
505
        String packType = rs.getString(1);
506
        String targetId  = rs.getString(2);
507
        // find physical document
508
        if ((MetacatUtil.getOptionList(PropertyService.getProperty(targetType))).
509
                                     contains(packType))
510
       {
511
         // assign physical document and jump out the while loop
512
         docId = targetId;
513
         break;
514
       }
515
      }//while
516

    
517
    }//try
518
    catch(SQLException e)
519
    {
520

    
521
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
522
                             e.getMessage());
523
    }//catch
524
    catch(PropertyNotFoundException pnfe)
525
    {
526

    
527
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
528
                             pnfe.getMessage());
529
    }//catch
530
    finally
531
    {
532
      try
533
      {
534
        pStmt.close();
535
      }
536
      catch(SQLException ee)
537
      {
538
        logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta2 " +
539
                             ee.getMessage());
540
      }//catch
541
      finally
542
      {
543
        DBConnectionPool.returnDBConnection(conn, serialNumber);
544
      }
545
    }//finally
546
    logMetacat.warn("target docid is: "+ docId + " "+
547
                             "for target doctype: "+targetType);
548
    return docId;
549
  }
550

    
551

    
552

    
553

    
554
  /* Parser the beta physical document and find the value in format element*/
555
  private void parsePhysicalDocumentForBeta(String physicalDocid)
556
  {
557
    String xmlDoc = null;
558
    try
559
    {
560
      String accNumber = physicalDocid + PropertyService.getProperty("document.accNumSeparator") +
561
        DBUtil.getLatestRevisionInDocumentTable(physicalDocid);
562
      //System.out.println("the accenumber is !!!!!!!!!!!!!!!!!!" + accNumber);
563
      DocumentImpl doc = new DocumentImpl(accNumber);
564
      xmlDoc = doc.toString();
565
      //System.out.println("The physical xml is "+xmlDoc);
566
    }
567
    catch (Exception e)
568
    {
569
      contentType = DEFAULTCONTENTTYPE;
570
      logMetacat.error("Error in ContentTypeProvider." +
571
                         "parsePhysicalDocumentForBeta()" + e.getMessage());
572
      return;
573
    }
574
      // get format element's text value
575
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
576

    
577
    if (format == null)
578
    {
579
      // if couldn't find the format, set contentype default value;
580
      contentType = DEFAULTCONTENTTYPE;
581
    }
582
    else
583
    {
584
      // if can find a format and look up from hash to get value
585
      contentType = lookUpContentType(format);
586
      // couldn't find the content type for this format in hash table
587
      if (contentType == null)
588
      {
589
        //set default vlaue
590
        contentType = DEFAULTCONTENTTYPE;
591
      }//if
592
    }//else
593
  }//parsePhysicalDocumentForBeta
594

    
595
  private String getTextValueFromPath(StringReader xml, String xPath)
596
  {
597
    String textValue = null;
598
    // get nodelist from doc by path
599
    try
600
    {
601
      NodeList list = EMLParser.getPathContent(xml, xPath);
602
      Node elementNode = list.item(0);
603
      Node textNode = elementNode.getFirstChild();
604
      if (textNode.getNodeType() == Node.TEXT_NODE)
605
      {
606
        textValue = textNode.getNodeValue();// get value
607
      }
608

    
609
    }
610
    catch (Exception e)
611
    {
612
      logMetacat.error("error in ContentTypeProvider."+
613
                               "getTextValueFromPath: "+e.getMessage());
614
    }
615
    logMetacat.info("The text value for " + xPath + " is: "+
616
                              textValue);
617
    return textValue;
618
  }//getTextValueFromPath
619

    
620
  /* A method to look up contentype */
621
  private String lookUpContentType(String format)
622
  {
623
    String newFormat = null;
624
    constructContentHashTable();
625
    newFormat = format.toLowerCase().trim();
626
    String type = null;
627
    type = (String)contentTypeHash.get(newFormat);
628
    logMetacat.info("contentType looked from hashtalbe is: " +
629
                              type);
630
    return type;
631
  }// lookupcontentypes
632

    
633
  /* Construct content type hashtable */
634
  private void constructContentHashTable()
635
  {
636
    contentTypeHash.put(TEXT, TEXTYPE);
637
    contentTypeHash.put(XML, XMLTYPE);
638
    contentTypeHash.put(HTML,HTMLTYPE);
639
    contentTypeHash.put(GIF, GIFTYPE);
640
    contentTypeHash.put(JPEG, JPEGTYPE);
641
    contentTypeHash.put(BMP, BMPTYPE);
642
    contentTypeHash.put(TAR, TARTYPE);
643
    contentTypeHash.put(ZIP, ZIPTYPE);
644
    contentTypeHash.put(BINARY, BINARYTYPE);
645

    
646
  }//constructrContentHashTable();
647

    
648

    
649

    
650
  public static void main(String[] argus)
651
  {
652
     try
653
     {
654
       DBConnectionPool pool = DBConnectionPool.getInstance();
655
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
656
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
657
       String str = provider.getContentType();
658
       logMetacat.warn("content type is : " + str);
659
     }
660
     catch(Exception e)
661
     {
662
       logMetacat.error("erorr in Schemalocation.main: " +
663
                                e.getMessage());
664
     }
665
  }
666
}//ContentTypeProvider
(18-18/63)