Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *
8
 *   '$Author: jones $'
9
 *     '$Date: 2006-11-10 10:25:38 -0800 (Fri, 10 Nov 2006) $'
10
 * '$Revision: 3077 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat;
28

    
29
import java.io.StringReader;
30
import java.sql.PreparedStatement;
31
import java.sql.ResultSet;
32
import java.sql.SQLException;
33
import java.util.Hashtable;
34
import java.util.Vector;
35

    
36
import org.apache.log4j.Logger;
37
import org.apache.xpath.objects.XObject;
38
import org.apache.xpath.XPathAPI;
39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
41
import org.w3c.dom.Attr;
42
import org.w3c.dom.NamedNodeMap;
43
import org.w3c.dom.NodeList;
44
import org.w3c.dom.Document;
45
import org.w3c.dom.Node;
46
import org.w3c.dom.NodeList;
47
import org.w3c.dom.DocumentType;
48
import org.xml.sax.InputSource;
49

    
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

    
57
import org.ecoinformatics.eml.EMLParser;
58
/**
59
 * This class will figure out which content type it is for a given data file.
60
 * First, from xml_relation to get all relative files to this data file.
61
 * Then from xml_documents to get physical files. From physical file pull out
62
 * the content type
63
 */
64
public class ContentTypeProvider
65
{
66
  private String dataFileId = null;
67
  private String contentType = null;
68
  private String packageType = null;
69
  private Hashtable contentTypeHash = new Hashtable();
70

    
71
  //Constant
72
  private String BETA = "beta";
73
  private String EML2 = "eml2";
74
  private String DEFAULTCONTENTTYPE = MetaCatUtil.
75
                                           getOption("defaultcontenttype");
76
  private String FORMATPATH = "//format";
77
  private String TEXT       = "text";
78
  private String TEXTYPE    ="text/plain";
79
  private String XML        = "xml";
80
  private String XMLTYPE    = "text/xml";
81
  private String HTML       = "HTML";
82
  private String HTMLTYPE   = "text/html";
83
  private String GIF        = "gif";
84
  private String JPEG       = "jpeg";
85
  private String JPEGTYPE   = "image/jpeg";
86
  private String GIFTYPE    = "image/gif";
87
  private String BMP        = "bmp";
88
  private String BMPTYPE    = "image/bmp";
89
  private String TAR        = "tar";
90
  private String TARTYPE    ="application/x-tar";
91
  private String ZIP        = "zip";
92
  private String ZIPTYPE    = "application/x-zip-compressed";
93
  private String BINARY     = "binary";
94
  private String BINARYTYPE = "application/octet-stream";
95

    
96
  private String ENTITYDOCTYPE = "entitydoctype";
97
  private String PHYSICALDOCTYPE = "physicaldoctype";
98
  private String EML2DOCTYPE = "eml2namespace";
99
  private String DATAFORMAT = "dataFormat";
100
  private String TEXTFORMAT = "textFormat";
101
  private String EXTENALFORMAT = "externallyDefinedFormat";
102
  private String FORMATNAME = "formatName";
103
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
104

    
105
  private String DATAFILEPATH ="//physical/distribution/online/url";
106
  private static Logger logMetacat = Logger.getLogger(ContentTypeProvider.class);
107

    
108
  /**
109
   * Constructor of ContentTypeProvider
110
   */
111
  public ContentTypeProvider(String docIdWithRevision)
112
  {
113
    dataFileId = MetaCatUtil.getDocIdFromString(docIdWithRevision);
114
    //get relative doclist for data file and package type
115
    Vector docLists = null;
116
    docLists = getRelativeDocIdList(dataFileId);
117

    
118
    if ( packageType == null)
119
    {
120
      // other situation, contenetype is default value
121
      contentType = DEFAULTCONTENTTYPE;
122
    }
123
    else if (packageType.equals(BETA))
124
    {
125
      // for beta package and get entity docid for the data file
126
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
127
      // get physical docid for data file
128
      docLists = getRelativeDocIdList(entityDocid);
129
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
130
      // if no physical docid assign to this data file, content type is default
131
      if (physicalDocId == null)
132
      {
133

    
134
        contentType = DEFAULTCONTENTTYPE;
135
      }
136
      else
137
      {
138

    
139
        parsePhysicalDocumentForBeta(physicalDocId);
140
      }
141
    }
142
    else if (packageType.equals(EML2))
143
    {
144
      // for eml2 package
145
      // get eml document for data file
146
      //String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
147
      String eml2Docid = (String)docLists.elementAt(0);
148
      findContentTypeInEML2(eml2Docid);
149

    
150
    }
151

    
152
  }
153

    
154
  /** Method to get content type */
155
  public String getContentType()
156
  {
157
    return contentType;
158
  }//getContentType
159

    
160
  /* Method to find content type base on data format*/
161
  private void findContentTypeInEML2(String eml2DocId)
162
  {
163
    if (eml2DocId == null)
164
    {
165
      contentType = DEFAULTCONTENTTYPE;
166
      return;
167
    }
168
    DocumentImpl xmlDoc = null;
169
    String xmlString = null;
170
    StringReader read = null;
171
    InputSource in = null;
172
    DocumentBuilderFactory dfactory = null;
173
    Document doc = null;
174
    // create xml document
175
    try
176
    {
177
      String accNumber = eml2DocId + MetaCatUtil.getOption("accNumSeparator") +
178
                    DBUtil.getLatestRevisionInDocumentTable(eml2DocId);
179
      //System.out.println("the acc number is !!!!!!!!!!!!!!!!!"+accNumber);
180
      xmlDoc = new DocumentImpl(accNumber);
181
      xmlString = xmlDoc.toString();
182
      //System.out.println("the xml doc is "+xmlDoc);
183
      // create dom tree
184
      read = new StringReader(xmlString);
185
      in = new InputSource(read);
186
      dfactory = DocumentBuilderFactory.newInstance();
187
      dfactory.setNamespaceAware(false);
188
      doc = dfactory.newDocumentBuilder().parse(in);
189
    }
190
    catch (Exception e)
191
    {
192
      // if faild, set default value
193
      contentType = DEFAULTCONTENTTYPE;
194
      logMetacat.error("Error in ContentTypeProvider." +
195
                         "findContentTypeInEML2()" + e.getMessage());
196
      return;
197
    }
198
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH,
199
                                                   dataFileId);
200
    if (dataFormatNode == null)
201
    {
202
      contentType = DEFAULTCONTENTTYPE;
203
      logMetacat.info("Couldn't find data format node");
204
      return;
205

    
206
    }
207
    NodeList childList  = dataFormatNode.getChildNodes();
208
    // go through childList
209
    for (int i = 0; i<childList.getLength(); i++)
210
    {
211
      Node child = childList.item(i);
212

    
213
      // if has text format child set to text/plain
214
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
215
      {
216
        logMetacat.info("in text format");
217
        contentType = TEXTYPE;
218
      }
219

    
220
      //external format
221
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
222
      {
223
        logMetacat.info("in external format ");
224
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
225
        logMetacat.info("The format is: "+format);
226
        // if we can find the format in the contentTypeHash table
227
        contentType = (String)lookUpContentType(format);
228
        if (contentType == null)
229
        {
230
          contentType = BINARYTYPE;
231
        }
232
      }
233

    
234
      // binaryRasterFormat
235
      if (child.getNodeName() != null && child.getNodeName().
236
          equals(BINARYRASTERFORMAT))
237
      {
238
        contentType = BINARYTYPE;
239
      }//if
240
    }//for
241
    //if contentype still be null, set default value
242
    if (contentType == null)
243
    {
244
      contentType = DEFAULTCONTENTTYPE;
245
    }
246
  }
247

    
248
  /* Method get text value of given child tagname*/
249
  private String getTextValueForGivenChildTag(Node parentNode,
250
                                              String childTagName)
251
  {
252
    String textValue = null;
253
    NodeList childList = parentNode.getChildNodes();
254
    for (int i= 0; i<childList.getLength();i++)
255
    {
256
      Node child = childList.item(i);
257
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
258
      {
259
        logMetacat.info("Find child node: " + childTagName);
260
        Node textNode = child.getFirstChild();
261
        if (textNode.getNodeType() == Node.TEXT_NODE)
262
        {
263
          textValue = textNode.getNodeValue();
264
        }//if
265
      }//if
266
    }//for
267
    logMetacat.info("The text value for element- " + childTagName +
268
                             " is " + textValue);
269
    return textValue;
270
  }//getTExtValueForGivenChildTag
271

    
272
  /* Find the data format node in eml2 document */
273
  private Node findDataFormatNodeInEML2(Document xml, String xPath,
274
                                       String targetDocId)
275
  {
276
    Node targetNode = null;
277
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
278
    if (node != null)
279
    {
280
      // get the phycial the prent is online, grandparent is distribution
281
      // the grand'parent is physical
282
      Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
283
      NodeList list = phyicalNode.getChildNodes();
284
      for (int i = 0; i < list.getLength(); i++)
285
      {
286
        Node kid = list.item(i);
287
        // find dataFormat node
288
        if (kid.getNodeType() == node.ELEMENT_NODE &&
289
            kid.getNodeName().equals(DATAFORMAT))
290
        {
291
          targetNode = kid;
292
          break;
293
        } //if
294
      } //for
295
      if (targetNode != null)
296
      {
297
        logMetacat.info("dataFormat node'name: " +
298
                                 targetNode.getNodeName());
299
      }
300
    }//if
301
    return targetNode;
302
  }
303
  /* Find the datafile node */
304
  private Node findDataFileNodeInEML2(Document xml, String xPath,
305
                                String targetDocId)
306
  {
307
    Node dataFileNode = null;
308
    NodeList list = null;
309
    try
310
    {
311
      list = XPathAPI.selectNodeList(xml, xPath);
312
    }
313
    catch (Exception e)
314
    {
315
      // catch an error and return null
316
      logMetacat.error("Error in findDataFileNode: "+e.getMessage());
317
      return dataFileNode;
318
    }
319
    // go through the list and find target docid in online/url
320
    if (list != null)
321
    {
322
      for (int i = 0; i < list.getLength(); i++)
323
      {
324
        Node node = list.item(i);
325
        Node textNode = node.getFirstChild();
326
        if (textNode.getNodeType() == node.TEXT_NODE)
327
        {
328
          String URLData = textNode.getNodeValue();
329
          logMetacat.info("online/url text data: " + URLData);
330
          //Only handle ecogrid data file
331
          if (URLData.indexOf(DBSAXHandler.ECOGRID) != -1 )
332
          {
333
            // Get docid from url
334
            String docId = MetaCatUtil.
335
                               getAccessionNumberFromEcogridIdentifier(URLData);
336
            // Get rid of revision
337
            docId = MetaCatUtil.getDocIdFromAccessionNumber(docId);
338
            logMetacat.info("docid from url element in xml is: " +
339
                                     docId);
340
            //if this docid equals target one, we find it
341
            if (docId != null && docId.equals(targetDocId))
342
            {
343
              logMetacat.info("Find target docid in online/url: " +
344
                                       docId);
345
              dataFileNode = node;
346
              break;
347
            }
348
          } //if
349

    
350
        } //if
351
      } //for
352
    }//if
353

    
354
    return dataFileNode;
355
  }//findDataFileNode
356

    
357
  /* Get relative docid list and packagetype */
358
  private Vector getRelativeDocIdList(String id)
359
  {
360
    Vector docList = new Vector();
361
    String sql = "SELECT packagetype, subject from xml_relation " +
362
                 "where object = ?";
363
    ResultSet rs = null;
364
    PreparedStatement pStmt=null;
365
    DBConnection conn = null;
366
    int serialNumber = -1;
367
    try
368
    {
369
      //check out DBConnection
370
      conn=DBConnectionPool.getDBConnection
371
                                   ("ContentTypeProvider.getRelativeDocIdlist");
372
      serialNumber=conn.getCheckOutSerialNumber();
373
      pStmt = conn.prepareStatement(sql);
374
      // binding value
375
      pStmt.setString(1, id);
376
      //execute query
377
      pStmt.execute();
378
      rs = pStmt.getResultSet();
379
      // get result list
380
      String packType = null;
381
      while (rs.next())
382
      {
383
        packType = rs.getString(1);
384
        String subject = rs.getString(2);
385

    
386
        // get rid of duplicate record and add the docid into vector
387
        if (!docList.contains(subject))
388
        {
389

    
390
          docList.add(subject);
391
        }
392
      }//while
393

    
394
      // set up data package type
395
      if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption("packagedoctype"))).
396
                                     contains(packType))
397
      {
398
        //this is beta4 or beta6 version
399
        logMetacat.warn("This is beta package");
400
        packageType = BETA;
401
      }
402
      else if ((MetaCatUtil.getOptionList
403
               (MetaCatUtil.getOption("eml2_0_0namespace"))).contains(packType))
404
      {
405
        // this eml 2 document
406
        logMetacat.warn("This is EML2.0.0 package");
407
        packageType = EML2;
408
      }
409
      else if ((MetaCatUtil.getOptionList
410
               (MetaCatUtil.getOption("eml2_0_1namespace"))).contains(packType))
411
      {
412
        // this eml 2 document
413
        logMetacat.warn("This is EML2.0.1 package");
414
        packageType = EML2;
415
      }
416

    
417

    
418

    
419
    }//try
420
    catch(SQLException e)
421
    {
422

    
423
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
424
                             e.getMessage());
425
    }//catch
426
    finally
427
    {
428
      try
429
      {
430
        pStmt.close();
431
      }
432
      catch (SQLException ee)
433
      {
434
        logMetacat.error("ContenTypProvider.getRelativeDoclist2 " +
435
                             ee.getMessage());
436
      }
437
      finally
438
      {
439
        DBConnectionPool.returnDBConnection(conn, serialNumber);
440
      }
441
    }//finally
442

    
443
    return docList;
444
  }// getRelativeDocIdList
445

    
446
  /* Method to get physical document for data file in xml_documents table for
447
   * beta eml package
448
   */
449
  private String getTargetDocIdForBeta(Vector list, String targetType)
450
  {
451
    String docId = null;
452
    // make sure list is not empty
453
    if (list.isEmpty())
454
    {
455

    
456
      return docId;
457
    }
458
    // get sql command
459
    String sql = "SELECT doctype, docid from xml_documents where docid in (";
460
    // the first element
461
    sql = sql + "'"+(String)list.elementAt(0) + "'";
462
    for (int i=1; i<list.size(); i++)
463
    {
464
      String docid = (String) list.elementAt(i);
465
      sql = sql + ", '" + docid + "'";
466
    }//for
467
    // add parensis
468
    sql = sql + ")";
469
    logMetacat.info("SQL for select doctype: "+ sql);
470
    ResultSet rs = null;
471
    PreparedStatement pStmt=null;
472
    DBConnection conn = null;
473
    int serialNumber = -1;
474
    try
475
    {
476
      //check out DBConnection
477
      conn=DBConnectionPool.getDBConnection
478
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
479
      serialNumber=conn.getCheckOutSerialNumber();
480
      pStmt = conn.prepareStatement(sql);
481
      //execute query
482
      pStmt.execute();
483
      rs = pStmt.getResultSet();
484
      // get result list
485
      while (rs.next())
486
      {
487
        String packType = rs.getString(1);
488
        String targetId  = rs.getString(2);
489
        // find physical document
490
        if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption(targetType))).
491
                                     contains(packType))
492
       {
493
         // assign physical document and jump out the while loop
494
         docId = targetId;
495
         break;
496
       }
497
      }//while
498

    
499
    }//try
500
    catch(SQLException e)
501
    {
502

    
503
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
504
                             e.getMessage());
505
    }//catch
506
    finally
507
    {
508
      try
509
      {
510
        pStmt.close();
511
      }
512
      catch(SQLException ee)
513
      {
514
        logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta2 " +
515
                             ee.getMessage());
516
      }//catch
517
      finally
518
      {
519
        DBConnectionPool.returnDBConnection(conn, serialNumber);
520
      }
521
    }//finally
522
    logMetacat.warn("target docid is: "+ docId + " "+
523
                             "for target doctype: "+targetType);
524
    return docId;
525
  }
526

    
527

    
528

    
529

    
530
  /* Parser the beta physical document and find the value in format element*/
531
  private void parsePhysicalDocumentForBeta(String physicalDocid)
532
  {
533
    String xmlDoc = null;
534
    try
535
    {
536
      String accNumber = physicalDocid + MetaCatUtil.getOption("accNumSeparator") +
537
        DBUtil.getLatestRevisionInDocumentTable(physicalDocid);
538
      //System.out.println("the accenumber is !!!!!!!!!!!!!!!!!!" + accNumber);
539
      DocumentImpl doc = new DocumentImpl(accNumber);
540
      xmlDoc = doc.toString();
541
      //System.out.println("The physical xml is "+xmlDoc);
542
    }
543
    catch (Exception e)
544
    {
545
      contentType = DEFAULTCONTENTTYPE;
546
      logMetacat.error("Error in ContentTypeProvider." +
547
                         "parsePhysicalDocumentForBeta()" + e.getMessage());
548
      return;
549
    }
550
      // get format element's text value
551
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
552

    
553
    if (format == null)
554
    {
555
      // if couldn't find the format, set contentype default value;
556
      contentType = DEFAULTCONTENTTYPE;
557
    }
558
    else
559
    {
560
      // if can find a format and look up from hash to get value
561
      contentType = lookUpContentType(format);
562
      // couldn't find the content type for this format in hash table
563
      if (contentType == null)
564
      {
565
        //set default vlaue
566
        contentType = DEFAULTCONTENTTYPE;
567
      }//if
568
    }//else
569
  }//parsePhysicalDocumentForBeta
570

    
571
  private String getTextValueFromPath(StringReader xml, String xPath)
572
  {
573
    String textValue = null;
574
    // get nodelist from doc by path
575
    try
576
    {
577
      NodeList list = EMLParser.getPathContent(xml, xPath);
578
      Node elementNode = list.item(0);
579
      Node textNode = elementNode.getFirstChild();
580
      if (textNode.getNodeType() == Node.TEXT_NODE)
581
      {
582
        textValue = textNode.getNodeValue();// get value
583
      }
584

    
585
    }
586
    catch (Exception e)
587
    {
588
      logMetacat.error("error in ContentTypeProvider."+
589
                               "getTextValueFromPath: "+e.getMessage());
590
    }
591
    logMetacat.info("The text value for " + xPath + " is: "+
592
                              textValue);
593
    return textValue;
594
  }//getTextValueFromPath
595

    
596
  /* A method to look up contentype */
597
  private String lookUpContentType(String format)
598
  {
599
    String newFormat = null;
600
    constructContentHashTable();
601
    newFormat = format.toLowerCase().trim();
602
    String type = null;
603
    type = (String)contentTypeHash.get(newFormat);
604
    logMetacat.info("contentType looked from hashtalbe is: " +
605
                              type);
606
    return type;
607
  }// lookupcontentypes
608

    
609
  /* Construct content type hashtable */
610
  private void constructContentHashTable()
611
  {
612
    contentTypeHash.put(TEXT, TEXTYPE);
613
    contentTypeHash.put(XML, XMLTYPE);
614
    contentTypeHash.put(HTML,HTMLTYPE);
615
    contentTypeHash.put(GIF, GIFTYPE);
616
    contentTypeHash.put(JPEG, JPEGTYPE);
617
    contentTypeHash.put(BMP, BMPTYPE);
618
    contentTypeHash.put(TAR, TARTYPE);
619
    contentTypeHash.put(ZIP, ZIPTYPE);
620
    contentTypeHash.put(BINARY, BINARYTYPE);
621

    
622
  }//constructrContentHashTable();
623

    
624

    
625

    
626
  public static void main(String[] argus)
627
  {
628
     try
629
     {
630
       DBConnectionPool pool = DBConnectionPool.getInstance();
631
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
632
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
633
       String str = provider.getContentType();
634
       logMetacat.warn("content type is : " + str);
635
     }
636
     catch(Exception e)
637
     {
638
       logMetacat.error("erorr in Schemalocation.main: " +
639
                                e.getMessage());
640
     }
641
  }
642
}//ContentTypeProvider
(16-16/66)