Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *
8
 *   '$Author: daigle $'
9
 *     '$Date: 2008-08-05 17:50:14 -0700 (Tue, 05 Aug 2008) $'
10
 * '$Revision: 4213 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat;
28

    
29
import java.io.StringReader;
30
import java.sql.PreparedStatement;
31
import java.sql.ResultSet;
32
import java.sql.SQLException;
33
import java.util.Hashtable;
34
import java.util.Vector;
35

    
36
import org.apache.log4j.Logger;
37
import org.apache.xpath.objects.XObject;
38
import org.apache.xpath.XPathAPI;
39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
41
import org.w3c.dom.Attr;
42
import org.w3c.dom.NamedNodeMap;
43
import org.w3c.dom.NodeList;
44
import org.w3c.dom.Document;
45
import org.w3c.dom.Node;
46
import org.w3c.dom.NodeList;
47
import org.w3c.dom.DocumentType;
48
import org.xml.sax.InputSource;
49

    
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

    
57
import org.ecoinformatics.eml.EMLParser;
58

    
59
import edu.ucsb.nceas.metacat.service.PropertyService;
60
import edu.ucsb.nceas.metacat.util.MetaCatUtil;
61
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
62
/**
63
 * This class will figure out which content type it is for a given data file.
64
 * First, from xml_relation to get all relative files to this data file.
65
 * Then from xml_documents to get physical files. From physical file pull out
66
 * the content type
67
 */
68
public class ContentTypeProvider
69
{
70
  private String dataFileId = null;
71
  private String contentType = null;
72
  private String packageType = null;
73
  private Hashtable contentTypeHash = new Hashtable();
74

    
75
  //Constant
76
  private String BETA = "beta";
77
  private String EML2 = "eml2";
78
  private static String DEFAULTCONTENTTYPE;
79
  static {
80
		try {
81
			DEFAULTCONTENTTYPE = PropertyService.getProperty("replication.defaultcontenttype");
82
		} catch (PropertyNotFoundException pnfe) {
83
			System.err.println("Could not get property DEFAULTCONTENTTYPE:" 
84
					+ pnfe.getMessage());
85
		}
86
	}
87
  private String FORMATPATH = "//format";
88
  private String TEXT       = "text";
89
  private String TEXTYPE    ="text/plain";
90
  private String XML        = "xml";
91
  private String XMLTYPE    = "text/xml";
92
  private String HTML       = "HTML";
93
  private String HTMLTYPE   = "text/html";
94
  private String GIF        = "gif";
95
  private String JPEG       = "jpeg";
96
  private String JPEGTYPE   = "image/jpeg";
97
  private String GIFTYPE    = "image/gif";
98
  private String BMP        = "bmp";
99
  private String BMPTYPE    = "image/bmp";
100
  private String TAR        = "tar";
101
  private String TARTYPE    ="application/x-tar";
102
  private String ZIP        = "zip";
103
  private String ZIPTYPE    = "application/x-zip-compressed";
104
  private String BINARY     = "binary";
105
  private String BINARYTYPE = "application/octet-stream";
106

    
107
  private String ENTITYDOCTYPE = "xml.entitydoctype";
108
  private String PHYSICALDOCTYPE = "xml.physicaldoctype";
109
  private String EML2DOCTYPE = "eml2namespace";
110
  private String DATAFORMAT = "dataFormat";
111
  private String TEXTFORMAT = "textFormat";
112
  private String EXTENALFORMAT = "externallyDefinedFormat";
113
  private String FORMATNAME = "formatName";
114
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
115

    
116
  private String DATAFILEPATH ="//physical/distribution/online/url";
117
  private static Logger logMetacat = Logger.getLogger(ContentTypeProvider.class);
118

    
119
  /**
120
   * Constructor of ContentTypeProvider
121
   */
122
  public ContentTypeProvider(String docIdWithRevision)
123
  {
124
    dataFileId = MetaCatUtil.getDocIdFromString(docIdWithRevision);
125
    //get relative doclist for data file and package type
126
    Vector docLists = null;
127
    docLists = getRelativeDocIdList(dataFileId);
128

    
129
    if ( packageType == null)
130
    {
131
      // other situation, contenetype is default value
132
      contentType = DEFAULTCONTENTTYPE;
133
    }
134
    else if (packageType.equals(BETA))
135
    {
136
      // for beta package and get entity docid for the data file
137
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
138
      // get physical docid for data file
139
      docLists = getRelativeDocIdList(entityDocid);
140
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
141
      // if no physical docid assign to this data file, content type is default
142
      if (physicalDocId == null)
143
      {
144

    
145
        contentType = DEFAULTCONTENTTYPE;
146
      }
147
      else
148
      {
149

    
150
        parsePhysicalDocumentForBeta(physicalDocId);
151
      }
152
    }
153
    else if (packageType.equals(EML2))
154
    {
155
      // for eml2 package
156
      // get eml document for data file
157
      //String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
158
      String eml2Docid = (String)docLists.elementAt(0);
159
      findContentTypeInEML2(eml2Docid);
160

    
161
    }
162

    
163
  }
164

    
165
  /** Method to get content type */
166
  public String getContentType()
167
  {
168
    return contentType;
169
  }//getContentType
170

    
171
  /* Method to find content type base on data format*/
172
  private void findContentTypeInEML2(String eml2DocId)
173
  {
174
    if (eml2DocId == null)
175
    {
176
      contentType = DEFAULTCONTENTTYPE;
177
      return;
178
    }
179
    DocumentImpl xmlDoc = null;
180
    String xmlString = null;
181
    StringReader read = null;
182
    InputSource in = null;
183
    DocumentBuilderFactory dfactory = null;
184
    Document doc = null;
185
    // create xml document
186
    try
187
    {
188
      String accNumber = eml2DocId + PropertyService.getProperty("document.accNumSeparator") +
189
                    DBUtil.getLatestRevisionInDocumentTable(eml2DocId);
190
      //System.out.println("the acc number is !!!!!!!!!!!!!!!!!"+accNumber);
191
      xmlDoc = new DocumentImpl(accNumber);
192
      xmlString = xmlDoc.toString();
193
      //System.out.println("the xml doc is "+xmlDoc);
194
      // create dom tree
195
      read = new StringReader(xmlString);
196
      in = new InputSource(read);
197
      dfactory = DocumentBuilderFactory.newInstance();
198
      dfactory.setNamespaceAware(false);
199
      doc = dfactory.newDocumentBuilder().parse(in);
200
    }
201
    catch (Exception e)
202
    {
203
      // if faild, set default value
204
      contentType = DEFAULTCONTENTTYPE;
205
      logMetacat.error("Error in ContentTypeProvider." +
206
                         "findContentTypeInEML2()" + e.getMessage());
207
      return;
208
    }
209
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH,
210
                                                   dataFileId);
211
    if (dataFormatNode == null)
212
    {
213
      contentType = DEFAULTCONTENTTYPE;
214
      logMetacat.info("Couldn't find data format node");
215
      return;
216

    
217
    }
218
    NodeList childList  = dataFormatNode.getChildNodes();
219
    // go through childList
220
    for (int i = 0; i<childList.getLength(); i++)
221
    {
222
      Node child = childList.item(i);
223

    
224
      // if has text format child set to text/plain
225
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
226
      {
227
        logMetacat.info("in text format");
228
        contentType = TEXTYPE;
229
      }
230

    
231
      //external format
232
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
233
      {
234
        logMetacat.info("in external format ");
235
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
236
        logMetacat.info("The format is: "+format);
237
        // if we can find the format in the contentTypeHash table
238
        contentType = (String)lookUpContentType(format);
239
        if (contentType == null)
240
        {
241
          contentType = BINARYTYPE;
242
        }
243
      }
244

    
245
      // binaryRasterFormat
246
      if (child.getNodeName() != null && child.getNodeName().
247
          equals(BINARYRASTERFORMAT))
248
      {
249
        contentType = BINARYTYPE;
250
      }//if
251
    }//for
252
    //if contentype still be null, set default value
253
    if (contentType == null)
254
    {
255
      contentType = DEFAULTCONTENTTYPE;
256
    }
257
  }
258

    
259
  /* Method get text value of given child tagname*/
260
  private String getTextValueForGivenChildTag(Node parentNode,
261
                                              String childTagName)
262
  {
263
    String textValue = null;
264
    NodeList childList = parentNode.getChildNodes();
265
    for (int i= 0; i<childList.getLength();i++)
266
    {
267
      Node child = childList.item(i);
268
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
269
      {
270
        logMetacat.info("Find child node: " + childTagName);
271
        Node textNode = child.getFirstChild();
272
        if (textNode.getNodeType() == Node.TEXT_NODE)
273
        {
274
          textValue = textNode.getNodeValue();
275
        }//if
276
      }//if
277
    }//for
278
    logMetacat.info("The text value for element- " + childTagName +
279
                             " is " + textValue);
280
    return textValue;
281
  }//getTExtValueForGivenChildTag
282

    
283
  /* Find the data format node in eml2 document */
284
  private Node findDataFormatNodeInEML2(Document xml, String xPath,
285
                                       String targetDocId)
286
  {
287
    Node targetNode = null;
288
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
289
    if (node != null)
290
    {
291
      // get the phycial the prent is online, grandparent is distribution
292
      // the grand'parent is physical
293
      Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
294
      NodeList list = phyicalNode.getChildNodes();
295
      for (int i = 0; i < list.getLength(); i++)
296
      {
297
        Node kid = list.item(i);
298
        // find dataFormat node
299
        if (kid.getNodeType() == node.ELEMENT_NODE &&
300
            kid.getNodeName().equals(DATAFORMAT))
301
        {
302
          targetNode = kid;
303
          break;
304
        } //if
305
      } //for
306
      if (targetNode != null)
307
      {
308
        logMetacat.info("dataFormat node'name: " +
309
                                 targetNode.getNodeName());
310
      }
311
    }//if
312
    return targetNode;
313
  }
314
  /* Find the datafile node */
315
  private Node findDataFileNodeInEML2(Document xml, String xPath,
316
                                String targetDocId)
317
  {
318
    Node dataFileNode = null;
319
    NodeList list = null;
320
    try
321
    {
322
      list = XPathAPI.selectNodeList(xml, xPath);
323
    }
324
    catch (Exception e)
325
    {
326
      // catch an error and return null
327
      logMetacat.error("Error in findDataFileNode: "+e.getMessage());
328
      return dataFileNode;
329
    }
330
    // go through the list and find target docid in online/url
331
    if (list != null)
332
    {
333
      for (int i = 0; i < list.getLength(); i++)
334
      {
335
        Node node = list.item(i);
336
        Node textNode = node.getFirstChild();
337
        if (textNode.getNodeType() == node.TEXT_NODE)
338
        {
339
          String URLData = textNode.getNodeValue();
340
          logMetacat.info("online/url text data: " + URLData);
341
          //Only handle ecogrid data file
342
          if (URLData.indexOf(DBSAXHandler.ECOGRID) != -1 )
343
          {
344
            // Get docid from url
345
            String docId = MetaCatUtil.
346
                               getAccessionNumberFromEcogridIdentifier(URLData);
347
            // Get rid of revision
348
            docId = MetaCatUtil.getDocIdFromAccessionNumber(docId);
349
            logMetacat.info("docid from url element in xml is: " +
350
                                     docId);
351
            //if this docid equals target one, we find it
352
            if (docId != null && docId.equals(targetDocId))
353
            {
354
              logMetacat.info("Find target docid in online/url: " +
355
                                       docId);
356
              dataFileNode = node;
357
              break;
358
            }
359
          } //if
360

    
361
        } //if
362
      } //for
363
    }//if
364

    
365
    return dataFileNode;
366
  }//findDataFileNode
367

    
368
  /* Get relative docid list and packagetype */
369
  private Vector getRelativeDocIdList(String id)
370
  {
371
    Vector docList = new Vector();
372
    String sql = "SELECT packagetype, subject from xml_relation " +
373
                 "where object = ?";
374
    ResultSet rs = null;
375
    PreparedStatement pStmt=null;
376
    DBConnection conn = null;
377
    int serialNumber = -1;
378
    try
379
    {
380
      //check out DBConnection
381
      conn=DBConnectionPool.getDBConnection
382
                                   ("ContentTypeProvider.getRelativeDocIdlist");
383
      serialNumber=conn.getCheckOutSerialNumber();
384
      pStmt = conn.prepareStatement(sql);
385
      // binding value
386
      pStmt.setString(1, id);
387
      //execute query
388
      pStmt.execute();
389
      rs = pStmt.getResultSet();
390
      // get result list
391
      String packType = null;
392
      while (rs.next())
393
      {
394
        packType = rs.getString(1);
395
        String subject = rs.getString(2);
396

    
397
        // get rid of duplicate record and add the docid into vector
398
        if (!docList.contains(subject))
399
        {
400

    
401
          docList.add(subject);
402
        }
403
      }//while
404

    
405
      // set up data package type
406
      if ((MetaCatUtil.getOptionList(PropertyService.getProperty("xml.packagedoctype"))).
407
                                     contains(packType))
408
      {
409
        //this is beta4 or beta6 version
410
        logMetacat.warn("This is beta package");
411
        packageType = BETA;
412
      }
413
      else if ((MetaCatUtil.getOptionList
414
               (PropertyService.getProperty("xml.eml2_0_0namespace"))).contains(packType))
415
      {
416
        // this eml 2 document
417
        logMetacat.warn("This is EML2.0.0 package");
418
        packageType = EML2;
419
      }
420
      else if ((MetaCatUtil.getOptionList
421
               (PropertyService.getProperty("xml.eml2_0_1namespace"))).contains(packType))
422
      {
423
        // this eml 2 document
424
        logMetacat.warn("This is EML2.0.1 package");
425
        packageType = EML2;
426
      }
427

    
428

    
429

    
430
    }//try
431
    catch(SQLException e)
432
    {
433

    
434
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
435
                             e.getMessage());
436
    }//catch
437
    catch(PropertyNotFoundException pnfe)
438
    {
439
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
440
                             pnfe.getMessage());
441
    }//catch
442
    finally
443
    {
444
      try
445
      {
446
        pStmt.close();
447
      }
448
      catch (SQLException ee)
449
      {
450
        logMetacat.error("ContenTypProvider.getRelativeDoclist2 " +
451
                             ee.getMessage());
452
      }
453
      finally
454
      {
455
        DBConnectionPool.returnDBConnection(conn, serialNumber);
456
      }
457
    }//finally
458

    
459
    return docList;
460
  }// getRelativeDocIdList
461

    
462
  /* Method to get physical document for data file in xml_documents table for
463
   * beta eml package
464
   */
465
  private String getTargetDocIdForBeta(Vector list, String targetType)
466
  {
467
    String docId = null;
468
    // make sure list is not empty
469
    if (list.isEmpty())
470
    {
471

    
472
      return docId;
473
    }
474
    // get sql command
475
    String sql = "SELECT doctype, docid from xml_documents where docid in (";
476
    // the first element
477
    sql = sql + "'"+(String)list.elementAt(0) + "'";
478
    for (int i=1; i<list.size(); i++)
479
    {
480
      String docid = (String) list.elementAt(i);
481
      sql = sql + ", '" + docid + "'";
482
    }//for
483
    // add parensis
484
    sql = sql + ")";
485
    logMetacat.info("SQL for select doctype: "+ sql);
486
    ResultSet rs = null;
487
    PreparedStatement pStmt=null;
488
    DBConnection conn = null;
489
    int serialNumber = -1;
490
    try
491
    {
492
      //check out DBConnection
493
      conn=DBConnectionPool.getDBConnection
494
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
495
      serialNumber=conn.getCheckOutSerialNumber();
496
      pStmt = conn.prepareStatement(sql);
497
      //execute query
498
      pStmt.execute();
499
      rs = pStmt.getResultSet();
500
      // get result list
501
      while (rs.next())
502
      {
503
        String packType = rs.getString(1);
504
        String targetId  = rs.getString(2);
505
        // find physical document
506
        if ((MetaCatUtil.getOptionList(PropertyService.getProperty(targetType))).
507
                                     contains(packType))
508
       {
509
         // assign physical document and jump out the while loop
510
         docId = targetId;
511
         break;
512
       }
513
      }//while
514

    
515
    }//try
516
    catch(SQLException e)
517
    {
518

    
519
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
520
                             e.getMessage());
521
    }//catch
522
    catch(PropertyNotFoundException pnfe)
523
    {
524

    
525
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
526
                             pnfe.getMessage());
527
    }//catch
528
    finally
529
    {
530
      try
531
      {
532
        pStmt.close();
533
      }
534
      catch(SQLException ee)
535
      {
536
        logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta2 " +
537
                             ee.getMessage());
538
      }//catch
539
      finally
540
      {
541
        DBConnectionPool.returnDBConnection(conn, serialNumber);
542
      }
543
    }//finally
544
    logMetacat.warn("target docid is: "+ docId + " "+
545
                             "for target doctype: "+targetType);
546
    return docId;
547
  }
548

    
549

    
550

    
551

    
552
  /* Parser the beta physical document and find the value in format element*/
553
  private void parsePhysicalDocumentForBeta(String physicalDocid)
554
  {
555
    String xmlDoc = null;
556
    try
557
    {
558
      String accNumber = physicalDocid + PropertyService.getProperty("document.accNumSeparator") +
559
        DBUtil.getLatestRevisionInDocumentTable(physicalDocid);
560
      //System.out.println("the accenumber is !!!!!!!!!!!!!!!!!!" + accNumber);
561
      DocumentImpl doc = new DocumentImpl(accNumber);
562
      xmlDoc = doc.toString();
563
      //System.out.println("The physical xml is "+xmlDoc);
564
    }
565
    catch (Exception e)
566
    {
567
      contentType = DEFAULTCONTENTTYPE;
568
      logMetacat.error("Error in ContentTypeProvider." +
569
                         "parsePhysicalDocumentForBeta()" + e.getMessage());
570
      return;
571
    }
572
      // get format element's text value
573
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
574

    
575
    if (format == null)
576
    {
577
      // if couldn't find the format, set contentype default value;
578
      contentType = DEFAULTCONTENTTYPE;
579
    }
580
    else
581
    {
582
      // if can find a format and look up from hash to get value
583
      contentType = lookUpContentType(format);
584
      // couldn't find the content type for this format in hash table
585
      if (contentType == null)
586
      {
587
        //set default vlaue
588
        contentType = DEFAULTCONTENTTYPE;
589
      }//if
590
    }//else
591
  }//parsePhysicalDocumentForBeta
592

    
593
  private String getTextValueFromPath(StringReader xml, String xPath)
594
  {
595
    String textValue = null;
596
    // get nodelist from doc by path
597
    try
598
    {
599
      NodeList list = EMLParser.getPathContent(xml, xPath);
600
      Node elementNode = list.item(0);
601
      Node textNode = elementNode.getFirstChild();
602
      if (textNode.getNodeType() == Node.TEXT_NODE)
603
      {
604
        textValue = textNode.getNodeValue();// get value
605
      }
606

    
607
    }
608
    catch (Exception e)
609
    {
610
      logMetacat.error("error in ContentTypeProvider."+
611
                               "getTextValueFromPath: "+e.getMessage());
612
    }
613
    logMetacat.info("The text value for " + xPath + " is: "+
614
                              textValue);
615
    return textValue;
616
  }//getTextValueFromPath
617

    
618
  /* A method to look up contentype */
619
  private String lookUpContentType(String format)
620
  {
621
    String newFormat = null;
622
    constructContentHashTable();
623
    newFormat = format.toLowerCase().trim();
624
    String type = null;
625
    type = (String)contentTypeHash.get(newFormat);
626
    logMetacat.info("contentType looked from hashtalbe is: " +
627
                              type);
628
    return type;
629
  }// lookupcontentypes
630

    
631
  /* Construct content type hashtable */
632
  private void constructContentHashTable()
633
  {
634
    contentTypeHash.put(TEXT, TEXTYPE);
635
    contentTypeHash.put(XML, XMLTYPE);
636
    contentTypeHash.put(HTML,HTMLTYPE);
637
    contentTypeHash.put(GIF, GIFTYPE);
638
    contentTypeHash.put(JPEG, JPEGTYPE);
639
    contentTypeHash.put(BMP, BMPTYPE);
640
    contentTypeHash.put(TAR, TARTYPE);
641
    contentTypeHash.put(ZIP, ZIPTYPE);
642
    contentTypeHash.put(BINARY, BINARYTYPE);
643

    
644
  }//constructrContentHashTable();
645

    
646

    
647

    
648
  public static void main(String[] argus)
649
  {
650
     try
651
     {
652
       DBConnectionPool pool = DBConnectionPool.getInstance();
653
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
654
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
655
       String str = provider.getContentType();
656
       logMetacat.warn("content type is : " + str);
657
     }
658
     catch(Exception e)
659
     {
660
       logMetacat.error("erorr in Schemalocation.main: " +
661
                                e.getMessage());
662
     }
663
  }
664
}//ContentTypeProvider
(15-15/67)