Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *
8
 *   '$Author: tao $'
9
 *     '$Date: 2016-01-15 17:17:08 -0800 (Fri, 15 Jan 2016) $'
10
 * '$Revision: 9492 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat;
28

    
29
import java.io.StringReader;
30
import java.sql.PreparedStatement;
31
import java.sql.ResultSet;
32
import java.sql.SQLException;
33
import java.util.Hashtable;
34
import java.util.Vector;
35

    
36
import org.apache.log4j.Logger;
37
import org.apache.xpath.objects.XObject;
38
import org.apache.xpath.XPathAPI;
39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
41
import org.w3c.dom.Attr;
42
import org.w3c.dom.NamedNodeMap;
43
import org.w3c.dom.NodeList;
44
import org.w3c.dom.Document;
45
import org.w3c.dom.Node;
46
import org.w3c.dom.NodeList;
47
import org.w3c.dom.DocumentType;
48
import org.xml.sax.InputSource;
49

    
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

    
57
import org.ecoinformatics.eml.EMLParser;
58

    
59
import edu.ucsb.nceas.metacat.database.DBConnection;
60
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
61
import edu.ucsb.nceas.metacat.properties.PropertyService;
62
import edu.ucsb.nceas.metacat.util.DocumentUtil;
63
import edu.ucsb.nceas.metacat.util.MetacatUtil;
64
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
65
/**
66
 * This class will figure out which content type it is for a given data file.
67
 * First, from xml_relation to get all relative files to this data file.
68
 * Then from xml_documents to get physical files. From physical file pull out
69
 * the content type
70
 */
71
public class ContentTypeProvider
72
{
73
  private String dataFileId = null;
74
  private String contentType = null;
75
  private String packageType = null;
76
  private Hashtable contentTypeHash = new Hashtable();
77

    
78
  //Constant
79
  private String BETA = "beta";
80
  private String EML2 = "eml2";
81
  private static String DEFAULTCONTENTTYPE;
82
  static {
83
		try {
84
			DEFAULTCONTENTTYPE = PropertyService.getProperty("replication.defaultcontenttype");
85
		} catch (PropertyNotFoundException pnfe) {
86
			System.err.println("Could not get property DEFAULTCONTENTTYPE:" 
87
					+ pnfe.getMessage());
88
		}
89
	}
90
  private String FORMATPATH = "//format";
91
  private String TEXT       = "text";
92
  private String TEXTYPE    ="text/plain";
93
  private String XML        = "xml";
94
  private String XMLTYPE    = "text/xml";
95
  private String HTML       = "HTML";
96
  private String HTMLTYPE   = "text/html";
97
  private String GIF        = "gif";
98
  private String JPEG       = "jpeg";
99
  private String JPEGTYPE   = "image/jpeg";
100
  private String GIFTYPE    = "image/gif";
101
  private String BMP        = "bmp";
102
  private String BMPTYPE    = "image/bmp";
103
  private String TAR        = "tar";
104
  private String TARTYPE    ="application/x-tar";
105
  private String ZIP        = "zip";
106
  private String ZIPTYPE    = "application/x-zip-compressed";
107
  private String BINARY     = "binary";
108
  private String BINARYTYPE = "application/octet-stream";
109

    
110
  private String ENTITYDOCTYPE = "xml.entitydoctype";
111
  private String PHYSICALDOCTYPE = "xml.physicaldoctype";
112
  private String EML2DOCTYPE = "eml2namespace";
113
  private String DATAFORMAT = "dataFormat";
114
  private String TEXTFORMAT = "textFormat";
115
  private String EXTENALFORMAT = "externallyDefinedFormat";
116
  private String FORMATNAME = "formatName";
117
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
118

    
119
  private String DATAFILEPATH ="//physical/distribution/online/url";
120
  private static Logger logMetacat = Logger.getLogger(ContentTypeProvider.class);
121

    
122
  /**
123
   * Constructor of ContentTypeProvider
124
   */
125
  public ContentTypeProvider(String docIdWithRevision)
126
  {
127
    dataFileId = DocumentUtil.getDocIdFromString(docIdWithRevision);
128
    //get relative doclist for data file and package type
129
    Vector docLists = null;
130
    docLists = getRelativeDocIdList(dataFileId);
131

    
132
    if ( packageType == null)
133
    {
134
      // other situation, contenetype is default value
135
      contentType = DEFAULTCONTENTTYPE;
136
    }
137
    else if (packageType.equals(BETA))
138
    {
139
      // for beta package and get entity docid for the data file
140
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
141
      // get physical docid for data file
142
      docLists = getRelativeDocIdList(entityDocid);
143
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
144
      // if no physical docid assign to this data file, content type is default
145
      if (physicalDocId == null)
146
      {
147

    
148
        contentType = DEFAULTCONTENTTYPE;
149
      }
150
      else
151
      {
152

    
153
        parsePhysicalDocumentForBeta(physicalDocId);
154
      }
155
    }
156
    else if (packageType.equals(EML2))
157
    {
158
      // for eml2 package
159
      // get eml document for data file
160
      //String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
161
      String eml2Docid = (String)docLists.elementAt(0);
162
      findContentTypeInEML2(eml2Docid);
163

    
164
    }
165

    
166
  }
167

    
168
  /** Method to get content type */
169
  public String getContentType()
170
  {
171
    return contentType;
172
  }//getContentType
173

    
174
  /* Method to find content type base on data format*/
175
  private void findContentTypeInEML2(String eml2DocId)
176
  {
177
    if (eml2DocId == null)
178
    {
179
      contentType = DEFAULTCONTENTTYPE;
180
      return;
181
    }
182
    DocumentImpl xmlDoc = null;
183
    String xmlString = null;
184
    StringReader read = null;
185
    InputSource in = null;
186
    DocumentBuilderFactory dfactory = null;
187
    Document doc = null;
188
    // create xml document
189
    try
190
    {
191
      String accNumber = eml2DocId + PropertyService.getProperty("document.accNumSeparator") +
192
                    DBUtil.getLatestRevisionInDocumentTable(eml2DocId);
193
      //System.out.println("the acc number is !!!!!!!!!!!!!!!!!"+accNumber);
194
      xmlDoc = new DocumentImpl(accNumber);
195
      xmlString = xmlDoc.toString();
196
      //System.out.println("the xml doc is "+xmlDoc);
197
      // create dom tree
198
      read = new StringReader(xmlString);
199
      in = new InputSource(read);
200
      dfactory = DocumentBuilderFactory.newInstance();
201
      dfactory.setNamespaceAware(false);
202
      doc = dfactory.newDocumentBuilder().parse(in);
203
    }
204
    catch (Exception e)
205
    {
206
      // if faild, set default value
207
      contentType = DEFAULTCONTENTTYPE;
208
      logMetacat.error("Error in ContentTypeProvider." +
209
                         "findContentTypeInEML2()" + e.getMessage());
210
      return;
211
    }
212
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH,
213
                                                   dataFileId);
214
    if (dataFormatNode == null)
215
    {
216
      contentType = DEFAULTCONTENTTYPE;
217
      logMetacat.info("Couldn't find data format node");
218
      return;
219

    
220
    }
221
    NodeList childList  = dataFormatNode.getChildNodes();
222
    // go through childList
223
    for (int i = 0; i<childList.getLength(); i++)
224
    {
225
      Node child = childList.item(i);
226

    
227
      // if has text format child set to text/plain
228
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
229
      {
230
        logMetacat.info("in text format");
231
        contentType = TEXTYPE;
232
      }
233

    
234
      //external format
235
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
236
      {
237
        logMetacat.info("in external format ");
238
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
239
        logMetacat.info("The format is: "+format);
240
        // if we can find the format in the contentTypeHash table
241
        contentType = (String)lookUpContentType(format);
242
        if (contentType == null)
243
        {
244
          contentType = BINARYTYPE;
245
        }
246
      }
247

    
248
      // binaryRasterFormat
249
      if (child.getNodeName() != null && child.getNodeName().
250
          equals(BINARYRASTERFORMAT))
251
      {
252
        contentType = BINARYTYPE;
253
      }//if
254
    }//for
255
    //if contentype still be null, set default value
256
    if (contentType == null)
257
    {
258
      contentType = DEFAULTCONTENTTYPE;
259
    }
260
  }
261

    
262
  /* Method get text value of given child tagname*/
263
  private String getTextValueForGivenChildTag(Node parentNode,
264
                                              String childTagName)
265
  {
266
    String textValue = null;
267
    NodeList childList = parentNode.getChildNodes();
268
    for (int i= 0; i<childList.getLength();i++)
269
    {
270
      Node child = childList.item(i);
271
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
272
      {
273
        logMetacat.info("Find child node: " + childTagName);
274
        Node textNode = child.getFirstChild();
275
        if (textNode.getNodeType() == Node.TEXT_NODE)
276
        {
277
          textValue = textNode.getNodeValue();
278
        }//if
279
      }//if
280
    }//for
281
    logMetacat.info("The text value for element- " + childTagName +
282
                             " is " + textValue);
283
    return textValue;
284
  }//getTExtValueForGivenChildTag
285

    
286
  /* Find the data format node in eml2 document */
287
  private Node findDataFormatNodeInEML2(Document xml, String xPath,
288
                                       String targetDocId)
289
  {
290
    Node targetNode = null;
291
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
292
    if (node != null)
293
    {
294
      // get the phycial the prent is online, grandparent is distribution
295
      // the grand'parent is physical
296
      Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
297
      NodeList list = phyicalNode.getChildNodes();
298
      for (int i = 0; i < list.getLength(); i++)
299
      {
300
        Node kid = list.item(i);
301
        // find dataFormat node
302
        if (kid.getNodeType() == node.ELEMENT_NODE &&
303
            kid.getNodeName().equals(DATAFORMAT))
304
        {
305
          targetNode = kid;
306
          break;
307
        } //if
308
      } //for
309
      if (targetNode != null)
310
      {
311
        logMetacat.info("dataFormat node'name: " +
312
                                 targetNode.getNodeName());
313
      }
314
    }//if
315
    return targetNode;
316
  }
317
  /* Find the datafile node */
318
  private Node findDataFileNodeInEML2(Document xml, String xPath,
319
                                String targetDocId)
320
  {
321
    Node dataFileNode = null;
322
    NodeList list = null;
323
    try
324
    {
325
      list = XPathAPI.selectNodeList(xml, xPath);
326
    }
327
    catch (Exception e)
328
    {
329
      // catch an error and return null
330
      logMetacat.error("Error in findDataFileNode: "+e.getMessage());
331
      return dataFileNode;
332
    }
333
    // go through the list and find target docid in online/url
334
    if (list != null)
335
    {
336
      for (int i = 0; i < list.getLength(); i++)
337
      {
338
        Node node = list.item(i);
339
        Node textNode = node.getFirstChild();
340
        if (textNode.getNodeType() == node.TEXT_NODE)
341
        {
342
          String URLData = textNode.getNodeValue();
343
          logMetacat.info("online/url text data: " + URLData);
344
          //Only handle ecogrid data file
345
          if (URLData.indexOf(DBSAXHandler.ECOGRID) != -1 )
346
          {
347
            // Get docid from url
348
            String docId = 
349
            	DocumentUtil.getAccessionNumberFromEcogridIdentifier(URLData);
350
            // Get rid of revision
351
            docId = DocumentUtil.getDocIdFromAccessionNumber(docId);
352
            logMetacat.info("docid from url element in xml is: " +
353
                                     docId);
354
            //if this docid equals target one, we find it
355
            if (docId != null && docId.equals(targetDocId))
356
            {
357
              logMetacat.info("Find target docid in online/url: " +
358
                                       docId);
359
              dataFileNode = node;
360
              break;
361
            }
362
          } //if
363

    
364
        } //if
365
      } //for
366
    }//if
367

    
368
    return dataFileNode;
369
  }//findDataFileNode
370

    
371
  /* Get relative docid list and packagetype */
372
  private Vector getRelativeDocIdList(String id)
373
  {
374
    Vector docList = new Vector();
375
    String sql = "SELECT packagetype, subject from xml_relation " +
376
                 "where object = ?";
377
    ResultSet rs = null;
378
    PreparedStatement pStmt=null;
379
    DBConnection conn = null;
380
    int serialNumber = -1;
381
    try
382
    {
383
      //check out DBConnection
384
      conn=DBConnectionPool.getDBConnection
385
                                   ("ContentTypeProvider.getRelativeDocIdlist");
386
      serialNumber=conn.getCheckOutSerialNumber();
387
      pStmt = conn.prepareStatement(sql);
388
      // binding value
389
      pStmt.setString(1, id);
390
      //execute query
391
      pStmt.execute();
392
      rs = pStmt.getResultSet();
393
      // get result list
394
      String packType = null;
395
      while (rs.next())
396
      {
397
        packType = rs.getString(1);
398
        String subject = rs.getString(2);
399

    
400
        // get rid of duplicate record and add the docid into vector
401
        if (!docList.contains(subject))
402
        {
403

    
404
          docList.add(subject);
405
        }
406
      }//while
407

    
408
      // set up data package type
409
      if ((MetacatUtil.getOptionList(PropertyService.getProperty("xml.packagedoctype"))).
410
                                     contains(packType))
411
      {
412
        //this is beta4 or beta6 version
413
        logMetacat.info("This is beta package");
414
        packageType = BETA;
415
      }
416
      else if ((MetacatUtil.getOptionList
417
               (PropertyService.getProperty("xml.eml2_0_0namespace"))).contains(packType))
418
      {
419
        // this eml 2 document
420
        logMetacat.info("This is EML2.0.0 package");
421
        packageType = EML2;
422
      }
423
      else if ((MetacatUtil.getOptionList
424
               (PropertyService.getProperty("xml.eml2_0_1namespace"))).contains(packType))
425
      {
426
        // this eml 2 document
427
        logMetacat.info("This is EML2.0.1 package");
428
        packageType = EML2;
429
      }
430

    
431

    
432

    
433
    }//try
434
    catch(SQLException e)
435
    {
436

    
437
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
438
                             e.getMessage());
439
    }//catch
440
    catch(PropertyNotFoundException pnfe)
441
    {
442
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
443
                             pnfe.getMessage());
444
    }//catch
445
    finally
446
    {
447
      try
448
      {
449
        if(rs != null) {
450
            rs.close();
451
        }
452
        if(pStmt != null) {
453
            pStmt.close();
454
        }
455
        
456
      }
457
      catch (SQLException ee)
458
      {
459
        logMetacat.error("ContenTypProvider.getRelativeDoclist2 " +
460
                             ee.getMessage());
461
      }
462
      finally
463
      {
464
        DBConnectionPool.returnDBConnection(conn, serialNumber);
465
      }
466
    }//finally
467

    
468
    return docList;
469
  }// getRelativeDocIdList
470

    
471
  /* Method to get physical document for data file in xml_documents table for
472
   * beta eml package
473
   */
474
  private String getTargetDocIdForBeta(Vector list, String targetType)
475
  {
476
    String docId = null;
477
    // make sure list is not empty
478
    if (list.isEmpty())
479
    {
480

    
481
      return docId;
482
    }
483
    // get sql command
484
    String sql = "SELECT doctype, docid from xml_documents where docid in ( ";
485
    // the first element
486
    sql = sql + "?";
487
    // remaining values
488
    for (int i = 1; i < list.size(); i++) {
489
      sql = sql + ", ?";
490
    }
491
    // add parentheses
492
    sql = sql + ")";
493
    logMetacat.info("SQL for select doctype: "+ sql);
494
    ResultSet rs = null;
495
    PreparedStatement pStmt=null;
496
    DBConnection conn = null;
497
    int serialNumber = -1;
498
    try
499
    {
500
      //check out DBConnection
501
      conn=DBConnectionPool.getDBConnection
502
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
503
      serialNumber=conn.getCheckOutSerialNumber();
504
      pStmt = conn.prepareStatement(sql);
505
      // set the parameter values
506
      for (int i = 0; i < list.size(); i++) {
507
        String docid = (String) list.elementAt(i);
508
        pStmt.setString(i+1, docid);
509
      }
510
      //execute query
511
      pStmt.execute();
512
      rs = pStmt.getResultSet();
513
      // get result list
514
      while (rs.next())
515
      {
516
        String packType = rs.getString(1);
517
        String targetId  = rs.getString(2);
518
        // find physical document
519
        if ((MetacatUtil.getOptionList(PropertyService.getProperty(targetType))).
520
                                     contains(packType))
521
       {
522
         // assign physical document and jump out the while loop
523
         docId = targetId;
524
         break;
525
       }
526
      }//while
527

    
528
    }//try
529
    catch(SQLException e)
530
    {
531

    
532
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
533
                             e.getMessage());
534
    }//catch
535
    catch(PropertyNotFoundException pnfe)
536
    {
537

    
538
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
539
                             pnfe.getMessage());
540
    }//catch
541
    finally
542
    {
543
      try
544
      {
545
          if(rs != null) {
546
              rs.close();
547
          }
548
          if(pStmt != null) {
549
              pStmt.close();
550
          }
551
        
552
      }
553
      catch(SQLException ee)
554
      {
555
        logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta2 " +
556
                             ee.getMessage());
557
      }//catch
558
      finally
559
      {
560
        DBConnectionPool.returnDBConnection(conn, serialNumber);
561
      }
562
    }//finally
563
    logMetacat.info("target docid is: "+ docId + " "+
564
                             "for target doctype: "+targetType);
565
    return docId;
566
  }
567

    
568

    
569

    
570

    
571
  /* Parser the beta physical document and find the value in format element*/
572
  private void parsePhysicalDocumentForBeta(String physicalDocid)
573
  {
574
    String xmlDoc = null;
575
    try
576
    {
577
      String accNumber = physicalDocid + PropertyService.getProperty("document.accNumSeparator") +
578
        DBUtil.getLatestRevisionInDocumentTable(physicalDocid);
579
      //System.out.println("the accenumber is !!!!!!!!!!!!!!!!!!" + accNumber);
580
      DocumentImpl doc = new DocumentImpl(accNumber);
581
      xmlDoc = doc.toString();
582
      //System.out.println("The physical xml is "+xmlDoc);
583
    }
584
    catch (Exception e)
585
    {
586
      contentType = DEFAULTCONTENTTYPE;
587
      logMetacat.error("Error in ContentTypeProvider." +
588
                         "parsePhysicalDocumentForBeta()" + e.getMessage());
589
      return;
590
    }
591
      // get format element's text value
592
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
593

    
594
    if (format == null)
595
    {
596
      // if couldn't find the format, set contentype default value;
597
      contentType = DEFAULTCONTENTTYPE;
598
    }
599
    else
600
    {
601
      // if can find a format and look up from hash to get value
602
      contentType = lookUpContentType(format);
603
      // couldn't find the content type for this format in hash table
604
      if (contentType == null)
605
      {
606
        //set default vlaue
607
        contentType = DEFAULTCONTENTTYPE;
608
      }//if
609
    }//else
610
  }//parsePhysicalDocumentForBeta
611

    
612
  private String getTextValueFromPath(StringReader xml, String xPath)
613
  {
614
    String textValue = null;
615
    // get nodelist from doc by path
616
    try
617
    {
618
      NodeList list = EMLParser.getPathContent(xml, xPath);
619
      Node elementNode = list.item(0);
620
      Node textNode = elementNode.getFirstChild();
621
      if (textNode.getNodeType() == Node.TEXT_NODE)
622
      {
623
        textValue = textNode.getNodeValue();// get value
624
      }
625

    
626
    }
627
    catch (Exception e)
628
    {
629
      logMetacat.error("error in ContentTypeProvider."+
630
                               "getTextValueFromPath: "+e.getMessage());
631
    }
632
    logMetacat.info("The text value for " + xPath + " is: "+
633
                              textValue);
634
    return textValue;
635
  }//getTextValueFromPath
636

    
637
  /* A method to look up contentype */
638
  private String lookUpContentType(String format)
639
  {
640
    String newFormat = null;
641
    constructContentHashTable();
642
    newFormat = format.toLowerCase().trim();
643
    String type = null;
644
    type = (String)contentTypeHash.get(newFormat);
645
    logMetacat.info("contentType looked from hashtalbe is: " +
646
                              type);
647
    return type;
648
  }// lookupcontentypes
649

    
650
  /* Construct content type hashtable */
651
  private void constructContentHashTable()
652
  {
653
    contentTypeHash.put(TEXT, TEXTYPE);
654
    contentTypeHash.put(XML, XMLTYPE);
655
    contentTypeHash.put(HTML,HTMLTYPE);
656
    contentTypeHash.put(GIF, GIFTYPE);
657
    contentTypeHash.put(JPEG, JPEGTYPE);
658
    contentTypeHash.put(BMP, BMPTYPE);
659
    contentTypeHash.put(TAR, TARTYPE);
660
    contentTypeHash.put(ZIP, ZIPTYPE);
661
    contentTypeHash.put(BINARY, BINARYTYPE);
662

    
663
  }//constructrContentHashTable();
664

    
665

    
666

    
667
  public static void main(String[] argus)
668
  {
669
     try
670
     {
671
       DBConnectionPool pool = DBConnectionPool.getInstance();
672
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
673
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
674
       String str = provider.getContentType();
675
       logMetacat.info("content type is : " + str);
676
     }
677
     catch(Exception e)
678
     {
679
       logMetacat.error("erorr in Schemalocation.main: " +
680
                                e.getMessage());
681
     }
682
  }
683
}//ContentTypeProvider
(14-14/64)