Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *    Release: @release@
8
 *
9
 *   '$Author: tao $'
10
 *     '$Date: 2005-10-04 10:58:48 -0700 (Tue, 04 Oct 2005) $'
11
 * '$Revision: 2641 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27

    
28
package edu.ucsb.nceas.metacat;
29

    
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Hashtable;
35
import java.util.Vector;
36

    
37
import org.apache.xpath.objects.XObject;
38
import org.apache.xpath.XPathAPI;
39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
41
import org.w3c.dom.Attr;
42
import org.w3c.dom.NamedNodeMap;
43
import org.w3c.dom.NodeList;
44
import org.w3c.dom.Document;
45
import org.w3c.dom.Node;
46
import org.w3c.dom.NodeList;
47
import org.w3c.dom.DocumentType;
48
import org.xml.sax.InputSource;
49

    
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

    
57
import org.ecoinformatics.eml.EMLParser;
58
/**
59
 * This class will figure out which content type it is for a given data file.
60
 * First, from xml_relation to get all relative files to this data file.
61
 * Then from xml_documents to get physical files. From physical file pull out
62
 * the content type
63
 */
64
public class ContentTypeProvider
65
{
66
  private String dataFileId = null;
67
  private String contentType = null;
68
  private String packageType = null;
69
  private Hashtable contentTypeHash = new Hashtable();
70

    
71
  //Constant
72
  private String BETA = "beta";
73
  private String EML2 = "eml2";
74
  private String DEFAULTCONTENTTYPE = MetaCatUtil.
75
                                           getOption("defaultcontenttype");
76
  private String FORMATPATH = "//format";
77
  private String TEXT       = "text";
78
  private String TEXTYPE    ="text/plain";
79
  private String XML        = "xml";
80
  private String XMLTYPE    = "text/xml";
81
  private String HTML       = "HTML";
82
  private String HTMLTYPE   = "text/html";
83
  private String GIF        = "gif";
84
  private String JPEG       = "jpeg";
85
  private String JPEGTYPE   = "image/jpeg";
86
  private String GIFTYPE    = "image/gif";
87
  private String BMP        = "bmp";
88
  private String BMPTYPE    = "image/bmp";
89
  private String TAR        = "tar";
90
  private String TARTYPE    ="application/x-tar";
91
  private String ZIP        = "zip";
92
  private String ZIPTYPE    = "application/x-zip-compressed";
93
  private String BINARY     = "binary";
94
  private String BINARYTYPE = "application/octet-stream";
95

    
96
  private String ENTITYDOCTYPE = "entitydoctype";
97
  private String PHYSICALDOCTYPE = "physicaldoctype";
98
  private String EML2DOCTYPE = "eml2namespace";
99
  private String DATAFORMAT = "dataFormat";
100
  private String TEXTFORMAT = "textFormat";
101
  private String EXTENALFORMAT = "externallyDefinedFormat";
102
  private String FORMATNAME = "formatName";
103
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
104

    
105
  private String DATAFILEPATH ="//physical/distribution/online/url";
106

    
107
  /**
108
   * Constructor of ContentTypeProvider
109
   */
110
  public ContentTypeProvider(String docIdWithRevision)
111
  {
112
    dataFileId = MetaCatUtil.getDocIdFromString(docIdWithRevision);
113
    //get relative doclist for data file and package type
114
    Vector docLists = null;
115
    docLists = getRelativeDocIdList(dataFileId);
116

    
117
    if ( packageType == null)
118
    {
119
      // other situation, contenetype is default value
120
      contentType = DEFAULTCONTENTTYPE;
121
    }
122
    else if (packageType.equals(BETA))
123
    {
124
      // for beta package and get entity docid for the data file
125
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
126
      // get physical docid for data file
127
      docLists = getRelativeDocIdList(entityDocid);
128
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
129
      // if no physical docid assign to this data file, content type is default
130
      if (physicalDocId == null)
131
      {
132

    
133
        contentType = DEFAULTCONTENTTYPE;
134
      }
135
      else
136
      {
137

    
138
        parsePhysicalDocumentForBeta(physicalDocId);
139
      }
140
    }
141
    else if (packageType.equals(EML2))
142
    {
143
      // for eml2 package
144
      // get eml document for data file
145
      //String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
146
      String eml2Docid = (String)docLists.elementAt(0);
147
      findContentTypeInEML2(eml2Docid);
148

    
149
    }
150

    
151
  }
152

    
153
  /** Method to get content type */
154
  public String getContentType()
155
  {
156
    return contentType;
157
  }//getContentType
158

    
159
  /* Method to find content type base on data format*/
160
  private void findContentTypeInEML2(String eml2DocId)
161
  {
162
    if (eml2DocId == null)
163
    {
164
      contentType = DEFAULTCONTENTTYPE;
165
      return;
166
    }
167
    DocumentImpl xmlDoc = null;
168
    String xmlString = null;
169
    StringReader read = null;
170
    InputSource in = null;
171
    DocumentBuilderFactory dfactory = null;
172
    Document doc = null;
173
    // create xml document
174
    try
175
    {
176
      String accNumber = eml2DocId + MetaCatUtil.getOption("accNumSeparator") +
177
                    DBUtil.getLatestRevisionInDocumentTable(eml2DocId);
178
      //System.out.println("the acc number is !!!!!!!!!!!!!!!!!"+accNumber);
179
      xmlDoc = new DocumentImpl(accNumber);
180
      xmlString = xmlDoc.toString();
181
      //System.out.println("the xml doc is "+xmlDoc);
182
      // create dom tree
183
      read = new StringReader(xmlString);
184
      in = new InputSource(read);
185
      dfactory = DocumentBuilderFactory.newInstance();
186
      dfactory.setNamespaceAware(false);
187
      doc = dfactory.newDocumentBuilder().parse(in);
188
    }
189
    catch (Exception e)
190
    {
191
      // if faild, set default value
192
      contentType = DEFAULTCONTENTTYPE;
193
      MetaCatUtil.debugMessage("Error in ContentTypeProvider." +
194
                         "findContentTypeInEML2()" + e.getMessage(), 30);
195
      return;
196
    }
197
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH,
198
                                                   dataFileId);
199
    if (dataFormatNode == null)
200
    {
201
      contentType = DEFAULTCONTENTTYPE;
202
      MetaCatUtil.debugMessage("Couldn't find data format node", 30);
203
      return;
204

    
205
    }
206
    NodeList childList  = dataFormatNode.getChildNodes();
207
    // go through childList
208
    for (int i = 0; i<childList.getLength(); i++)
209
    {
210
      Node child = childList.item(i);
211

    
212
      // if has text format child set to text/plain
213
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
214
      {
215
        MetaCatUtil.debugMessage("in text format", 35);
216
        contentType = TEXTYPE;
217
      }
218

    
219
      //external format
220
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
221
      {
222
        MetaCatUtil.debugMessage("in external format ", 35);
223
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
224
        MetaCatUtil.debugMessage("The format is: "+format, 35);
225
        // if we can find the format in the contentTypeHash table
226
        contentType = (String)lookUpContentType(format);
227
        if (contentType == null)
228
        {
229
          contentType = BINARYTYPE;
230
        }
231
      }
232

    
233
      // binaryRasterFormat
234
      if (child.getNodeName() != null && child.getNodeName().
235
          equals(BINARYRASTERFORMAT))
236
      {
237
        contentType = BINARYTYPE;
238
      }//if
239
    }//for
240
    //if contentype still be null, set default value
241
    if (contentType == null)
242
    {
243
      contentType = DEFAULTCONTENTTYPE;
244
    }
245
  }
246

    
247
  /* Method get text value of given child tagname*/
248
  private String getTextValueForGivenChildTag(Node parentNode,
249
                                              String childTagName)
250
  {
251
    String textValue = null;
252
    NodeList childList = parentNode.getChildNodes();
253
    for (int i= 0; i<childList.getLength();i++)
254
    {
255
      Node child = childList.item(i);
256
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
257
      {
258
        MetaCatUtil.debugMessage("Find child node: " + childTagName, 35);
259
        Node textNode = child.getFirstChild();
260
        if (textNode.getNodeType() == Node.TEXT_NODE)
261
        {
262
          textValue = textNode.getNodeValue();
263
        }//if
264
      }//if
265
    }//for
266
    MetaCatUtil.debugMessage("The text value for element- " + childTagName +
267
                             " is " + textValue, 30);
268
    return textValue;
269
  }//getTExtValueForGivenChildTag
270

    
271
  /* Find the data format node in eml2 document */
272
  private Node findDataFormatNodeInEML2(Document xml, String xPath,
273
                                       String targetDocId)
274
  {
275
    Node targetNode = null;
276
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
277
    if (node != null)
278
    {
279
      // get the phycial the prent is online, grandparent is distribution
280
      // the grand'parent is physical
281
      Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
282
      NodeList list = phyicalNode.getChildNodes();
283
      for (int i = 0; i < list.getLength(); i++)
284
      {
285
        Node kid = list.item(i);
286
        // find dataFormat node
287
        if (kid.getNodeType() == node.ELEMENT_NODE &&
288
            kid.getNodeName().equals(DATAFORMAT))
289
        {
290
          targetNode = kid;
291
          break;
292
        } //if
293
      } //for
294
      if (targetNode != null)
295
      {
296
        MetaCatUtil.debugMessage("dataFormat node'name: " +
297
                                 targetNode.getNodeName(), 35);
298
      }
299
    }//if
300
    return targetNode;
301
  }
302
  /* Find the datafile node */
303
  private Node findDataFileNodeInEML2(Document xml, String xPath,
304
                                String targetDocId)
305
  {
306
    Node dataFileNode = null;
307
    NodeList list = null;
308
    try
309
    {
310
      list = XPathAPI.selectNodeList(xml, xPath);
311
    }
312
    catch (Exception e)
313
    {
314
      // catch an error and return null
315
      MetaCatUtil.debugMessage("Error in findDataFileNode: "+e.getMessage(), 30);
316
      return dataFileNode;
317
    }
318
    // go through the list and find target docid in online/url
319
    if (list != null)
320
    {
321
      for (int i = 0; i < list.getLength(); i++)
322
      {
323
        Node node = list.item(i);
324
        Node textNode = node.getFirstChild();
325
        if (textNode.getNodeType() == node.TEXT_NODE)
326
        {
327
          String URLData = textNode.getNodeValue();
328
          MetaCatUtil.debugMessage("online/url text data: " + URLData, 30);
329
          //Only handle ecogrid data file
330
          if (URLData.indexOf(DBSAXHandler.ECOGRID) != -1 )
331
          {
332
            // Get docid from url
333
            String docId = MetaCatUtil.
334
                               getAccessionNumberFromEcogridIdentifier(URLData);
335
            // Get rid of revision
336
            docId = MetaCatUtil.getDocIdFromAccessionNumber(docId);
337
            MetaCatUtil.debugMessage("docid from url element in xml is: " +
338
                                     docId, 30);
339
            //if this docid equals target one, we find it
340
            if (docId != null && docId.equals(targetDocId))
341
            {
342
              MetaCatUtil.debugMessage("Find target docid in online/url: " +
343
                                       docId, 30);
344
              dataFileNode = node;
345
              break;
346
            }
347
          } //if
348

    
349
        } //if
350
      } //for
351
    }//if
352

    
353
    return dataFileNode;
354
  }//findDataFileNode
355

    
356
  /* Get relative docid list and packagetype */
357
  private Vector getRelativeDocIdList(String id)
358
  {
359
    Vector docList = new Vector();
360
    String sql = "SELECT packagetype, subject from xml_relation " +
361
                 "where object = ?";
362
    ResultSet rs = null;
363
    PreparedStatement pStmt=null;
364
    DBConnection conn = null;
365
    int serialNumber = -1;
366
    try
367
    {
368
      //check out DBConnection
369
      conn=DBConnectionPool.getDBConnection
370
                                   ("ContentTypeProvider.getRelativeDocIdlist");
371
      serialNumber=conn.getCheckOutSerialNumber();
372
      pStmt = conn.prepareStatement(sql);
373
      // binding value
374
      pStmt.setString(1, id);
375
      //execute query
376
      pStmt.execute();
377
      rs = pStmt.getResultSet();
378
      // get result list
379
      String packType = null;
380
      while (rs.next())
381
      {
382
        packType = rs.getString(1);
383
        String subject = rs.getString(2);
384

    
385
        // get rid of duplicate record and add the docid into vector
386
        if (!docList.contains(subject))
387
        {
388

    
389
          docList.add(subject);
390
        }
391
      }//while
392

    
393
      // set up data package type
394
      if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption("packagedoctype"))).
395
                                     contains(packType))
396
      {
397
        //this is beta4 or beta6 version
398
        MetaCatUtil.debugMessage("This is beta package", 30);
399
        packageType = BETA;
400
      }
401
      else if ((MetaCatUtil.getOptionList
402
               (MetaCatUtil.getOption("eml2_0_0namespace"))).contains(packType))
403
      {
404
        // this eml 2 document
405
        MetaCatUtil.debugMessage("This is EML2.0.0 package", 30);
406
        packageType = EML2;
407
      }
408
      else if ((MetaCatUtil.getOptionList
409
               (MetaCatUtil.getOption("eml2_0_1namespace"))).contains(packType))
410
      {
411
        // this eml 2 document
412
        MetaCatUtil.debugMessage("This is EML2.0.1 package", 30);
413
        packageType = EML2;
414
      }
415

    
416

    
417

    
418
    }//try
419
    catch(SQLException e)
420
    {
421

    
422
      MetaCatUtil.debugMessage("ContenTypProvider.getRelativeDoclist1 " +
423
                             e.getMessage(), 30);
424
    }//catch
425
    finally
426
    {
427
      try
428
      {
429
        pStmt.close();
430
      }
431
      catch (SQLException ee)
432
      {
433
        MetaCatUtil.debugMessage("ContenTypProvider.getRelativeDoclist2 " +
434
                             ee.getMessage(), 30);
435
      }
436
      finally
437
      {
438
        DBConnectionPool.returnDBConnection(conn, serialNumber);
439
      }
440
    }//finally
441

    
442
    return docList;
443
  }// getRelativeDocIdList
444

    
445
  /* Method to get physical document for data file in xml_documents table for
446
   * beta eml package
447
   */
448
  private String getTargetDocIdForBeta(Vector list, String targetType)
449
  {
450
    String docId = null;
451
    // make sure list is not empty
452
    if (list.isEmpty())
453
    {
454

    
455
      return docId;
456
    }
457
    // get sql command
458
    String sql = "SELECT doctype, docid from xml_documents where docid in (";
459
    // the first element
460
    sql = sql + "'"+(String)list.elementAt(0) + "'";
461
    for (int i=1; i<list.size(); i++)
462
    {
463
      String docid = (String) list.elementAt(i);
464
      sql = sql + ", '" + docid + "'";
465
    }//for
466
    // add parensis
467
    sql = sql + ")";
468
    MetaCatUtil.debugMessage("SQL for select doctype: "+ sql, 35);
469
    ResultSet rs = null;
470
    PreparedStatement pStmt=null;
471
    DBConnection conn = null;
472
    int serialNumber = -1;
473
    try
474
    {
475
      //check out DBConnection
476
      conn=DBConnectionPool.getDBConnection
477
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
478
      serialNumber=conn.getCheckOutSerialNumber();
479
      pStmt = conn.prepareStatement(sql);
480
      //execute query
481
      pStmt.execute();
482
      rs = pStmt.getResultSet();
483
      // get result list
484
      while (rs.next())
485
      {
486
        String packType = rs.getString(1);
487
        String targetId  = rs.getString(2);
488
        // find physical document
489
        if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption(targetType))).
490
                                     contains(packType))
491
       {
492
         // assign physical document and jump out the while loop
493
         docId = targetId;
494
         break;
495
       }
496
      }//while
497

    
498
    }//try
499
    catch(SQLException e)
500
    {
501

    
502
      MetaCatUtil.debugMessage("ContenTypProvider.setPhysicalDocIdForBeta1 " +
503
                             e.getMessage(), 30);
504
    }//catch
505
    finally
506
    {
507
      try
508
      {
509
        pStmt.close();
510
      }
511
      catch(SQLException ee)
512
      {
513
        MetaCatUtil.debugMessage("ContenTypProvider.setPhysicalDocIdForBeta2 " +
514
                             ee.getMessage(), 30);
515
      }//catch
516
      finally
517
      {
518
        DBConnectionPool.returnDBConnection(conn, serialNumber);
519
      }
520
    }//finally
521
    MetaCatUtil.debugMessage("target docid is: "+ docId + " "+
522
                             "for target doctype: "+targetType, 25);
523
    return docId;
524
  }
525

    
526

    
527

    
528

    
529
  /* Parser the beta physical document and find the value in format element*/
530
  private void parsePhysicalDocumentForBeta(String physicalDocid)
531
  {
532
    String xmlDoc = null;
533
    try
534
    {
535
      String accNumber = physicalDocid + MetaCatUtil.getOption("accNumSeparator") +
536
        DBUtil.getLatestRevisionInDocumentTable(physicalDocid);
537
      //System.out.println("the accenumber is !!!!!!!!!!!!!!!!!!" + accNumber);
538
      DocumentImpl doc = new DocumentImpl(accNumber);
539
      xmlDoc = doc.toString();
540
      //System.out.println("The physical xml is "+xmlDoc);
541
    }
542
    catch (Exception e)
543
    {
544
      contentType = DEFAULTCONTENTTYPE;
545
      MetaCatUtil.debugMessage("Error in ContentTypeProvider." +
546
                         "parsePhysicalDocumentForBeta()" + e.getMessage(), 30);
547
      return;
548
    }
549
      // get format element's text value
550
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
551

    
552
    if (format == null)
553
    {
554
      // if couldn't find the format, set contentype default value;
555
      contentType = DEFAULTCONTENTTYPE;
556
    }
557
    else
558
    {
559
      // if can find a format and look up from hash to get value
560
      contentType = lookUpContentType(format);
561
      // couldn't find the content type for this format in hash table
562
      if (contentType == null)
563
      {
564
        //set default vlaue
565
        contentType = DEFAULTCONTENTTYPE;
566
      }//if
567
    }//else
568
  }//parsePhysicalDocumentForBeta
569

    
570
  private String getTextValueFromPath(StringReader xml, String xPath)
571
  {
572
    String textValue = null;
573
    // get nodelist from doc by path
574
    try
575
    {
576
      NodeList list = EMLParser.getPathContent(xml, xPath);
577
      Node elementNode = list.item(0);
578
      Node textNode = elementNode.getFirstChild();
579
      if (textNode.getNodeType() == Node.TEXT_NODE)
580
      {
581
        textValue = textNode.getNodeValue();// get value
582
      }
583

    
584
    }
585
    catch (Exception e)
586
    {
587
      MetaCatUtil.debugMessage("error in ContentTypeProvider."+
588
                               "getTextValueFromPath: "+e.getMessage(), 30);
589
    }
590
    MetaCatUtil.debugMessage("The text value for " + xPath + " is: "+
591
                              textValue, 30);
592
    return textValue;
593
  }//getTextValueFromPath
594

    
595
  /* A method to look up contentype */
596
  private String lookUpContentType(String format)
597
  {
598
    String newFormat = null;
599
    constructContentHashTable();
600
    newFormat = format.toLowerCase().trim();
601
    String type = null;
602
    type = (String)contentTypeHash.get(newFormat);
603
    MetaCatUtil.debugMessage("contentType looked from hashtalbe is: " +
604
                              type, 30);
605
    return type;
606
  }// lookupcontentypes
607

    
608
  /* Construct content type hashtable */
609
  private void constructContentHashTable()
610
  {
611
    contentTypeHash.put(TEXT, TEXTYPE);
612
    contentTypeHash.put(XML, XMLTYPE);
613
    contentTypeHash.put(HTML,HTMLTYPE);
614
    contentTypeHash.put(GIF, GIFTYPE);
615
    contentTypeHash.put(JPEG, JPEGTYPE);
616
    contentTypeHash.put(BMP, BMPTYPE);
617
    contentTypeHash.put(TAR, TARTYPE);
618
    contentTypeHash.put(ZIP, ZIPTYPE);
619
    contentTypeHash.put(BINARY, BINARYTYPE);
620

    
621
  }//constructrContentHashTable();
622

    
623

    
624

    
625
  public static void main(String[] argus)
626
  {
627
     try
628
     {
629
       DBConnectionPool pool = DBConnectionPool.getInstance();
630
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
631
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
632
       String str = provider.getContentType();
633
       MetaCatUtil.debugMessage("content type is : " + str, 20);
634
     }
635
     catch(Exception e)
636
     {
637
       MetaCatUtil.debugMessage("erorr in Schemalocation.main: " +
638
                                e.getMessage(), 30);
639
     }
640
  }
641
}//ContentTypeProvider
(17-17/63)