Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *    Release: @release@
8
 *
9
 *   '$Author: sgarg $'
10
 *     '$Date: 2005-03-04 11:51:55 -0800 (Fri, 04 Mar 2005) $'
11
 * '$Revision: 2399 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27

    
28
package edu.ucsb.nceas.metacat;
29

    
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Hashtable;
35
import java.util.Vector;
36

    
37
import org.apache.xpath.objects.XObject;
38
import org.apache.xpath.XPathAPI;
39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
41
import org.w3c.dom.Attr;
42
import org.w3c.dom.NamedNodeMap;
43
import org.w3c.dom.NodeList;
44
import org.w3c.dom.Document;
45
import org.w3c.dom.Node;
46
import org.w3c.dom.NodeList;
47
import org.w3c.dom.DocumentType;
48
import org.xml.sax.InputSource;
49

    
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

    
57
import org.ecoinformatics.eml.EMLParser;
58
/**
59
 * This class will figure out which content type it is for a given data file.
60
 * First, from xml_relation to get all relative files to this data file.
61
 * Then from xml_documents to get physical files. From physical file pull out
62
 * the content type
63
 */
64
public class ContentTypeProvider
65
{
66
  private String dataFileId = null;
67
  private String contentType = null;
68
  private String packageType = null;
69
  private Hashtable contentTypeHash = new Hashtable();
70

    
71
  //Constant
72
  private String BETA = "beta";
73
  private String EML2 = "eml2";
74
  private String DEFAULTCONTENTTYPE = MetaCatUtil.
75
                                           getOption("defaultcontenttype");
76
  private String FORMATPATH = "//format";
77
  private String TEXT       = "text";
78
  private String TEXTYPE    ="text/plain";
79
  private String XML        = "xml";
80
  private String XMLTYPE    = "text/xml";
81
  private String HTML       = "HTML";
82
  private String HTMLTYPE   = "text/html";
83
  private String GIF        = "gif";
84
  private String JPEG       = "jpeg";
85
  private String JPEGTYPE   = "image/jpeg";
86
  private String GIFTYPE    = "image/gif";
87
  private String BMP        = "bmp";
88
  private String BMPTYPE    = "image/bmp";
89
  private String TAR        = "tar";
90
  private String TARTYPE    ="application/x-tar";
91
  private String ZIP        = "zip";
92
  private String ZIPTYPE    = "application/x-zip-compressed";
93
  private String BINARY     = "binary";
94
  private String BINARYTYPE = "application/octet-stream";
95

    
96
  private String ENTITYDOCTYPE = "entitydoctype";
97
  private String PHYSICALDOCTYPE = "physicaldoctype";
98
  private String EML2DOCTYPE = "eml2namespace";
99
  private String DATAFORMAT = "dataFormat";
100
  private String TEXTFORMAT = "textFormat";
101
  private String EXTENALFORMAT = "externallyDefinedFormat";
102
  private String FORMATNAME = "formatName";
103
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
104

    
105
  private String DATAFILEPATH ="//physical/distribution/online/url";
106

    
107
  /**
108
   * Constructor of ContentTypeProvider
109
   */
110
  public ContentTypeProvider(String docIdWithRevision)
111
  {
112
    dataFileId = MetaCatUtil.getDocIdFromString(docIdWithRevision);
113
    //get relative doclist for data file and package type
114
    Vector docLists = null;
115
    docLists = getRelativeDocIdList(dataFileId);
116

    
117
    if ( packageType == null)
118
    {
119
      // other situation, contenetype is default value
120
      contentType = DEFAULTCONTENTTYPE;
121
    }
122
    else if (packageType.equals(BETA))
123
    {
124
      // for beta package and get entity docid for the data file
125
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
126
      // get physical docid for data file
127
      docLists = getRelativeDocIdList(entityDocid);
128
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
129
      // if no physical docid assign to this data file, content type is default
130
      if (physicalDocId == null)
131
      {
132

    
133
        contentType = DEFAULTCONTENTTYPE;
134
      }
135
      else
136
      {
137

    
138
        parsePhysicalDocumentForBeta(physicalDocId);
139
      }
140
    }
141
    else if (packageType.equals(EML2))
142
    {
143
      // for eml2 package
144
      // get eml document for data file
145
      //String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
146
      String eml2Docid = (String)docLists.elementAt(0);
147
      findContentTypeInEML2(eml2Docid);
148

    
149
    }
150

    
151
  }
152

    
153
  /** Method to get content type */
154
  public String getContentType()
155
  {
156
    return contentType;
157
  }//getContentType
158

    
159
  /* Method to find content type base on data format*/
160
  private void findContentTypeInEML2(String eml2DocId)
161
  {
162
    if (eml2DocId == null)
163
    {
164
      contentType = DEFAULTCONTENTTYPE;
165
      return;
166
    }
167
    DocumentImpl xmlDoc = null;
168
    String xmlString = null;
169
    StringReader read = null;
170
    InputSource in = null;
171
    DocumentBuilderFactory dfactory = null;
172
    Document doc = null;
173
    // create xml document
174
    try
175
    {
176
      xmlDoc = new DocumentImpl(eml2DocId);
177
      xmlString = xmlDoc.toString();
178
      // create dom tree
179
      read = new StringReader(xmlString);
180
      in = new InputSource(read);
181
      dfactory = DocumentBuilderFactory.newInstance();
182
      dfactory.setNamespaceAware(false);
183
      doc = dfactory.newDocumentBuilder().parse(in);
184
    }
185
    catch (Exception e)
186
    {
187
      // if faild, set default value
188
      contentType = DEFAULTCONTENTTYPE;
189
      MetaCatUtil.debugMessage("Error in ContentTypeProvider." +
190
                         "findContentTypeInEML2()" + e.getMessage(), 30);
191
      return;
192
    }
193
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH,
194
                                                   dataFileId);
195
    if (dataFormatNode == null)
196
    {
197
      contentType = DEFAULTCONTENTTYPE;
198
      MetaCatUtil.debugMessage("Couldn't find data format node", 30);
199
      return;
200

    
201
    }
202
    NodeList childList  = dataFormatNode.getChildNodes();
203
    // go through childList
204
    for (int i = 0; i<childList.getLength(); i++)
205
    {
206
      Node child = childList.item(i);
207

    
208
      // if has text format child set to text/plain
209
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
210
      {
211
        MetaCatUtil.debugMessage("in text format", 35);
212
        contentType = TEXTYPE;
213
      }
214

    
215
      //external format
216
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
217
      {
218
        MetaCatUtil.debugMessage("in external format ", 35);
219
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
220
        MetaCatUtil.debugMessage("The format is: "+format, 35);
221
        // if we can find the format in the contentTypeHash table
222
        contentType = (String)lookUpContentType(format);
223
        if (contentType == null)
224
        {
225
          contentType = BINARYTYPE;
226
        }
227
      }
228

    
229
      // binaryRasterFormat
230
      if (child.getNodeName() != null && child.getNodeName().
231
          equals(BINARYRASTERFORMAT))
232
      {
233
        contentType = BINARYTYPE;
234
      }//if
235
    }//for
236
    //if contentype still be null, set default value
237
    if (contentType == null)
238
    {
239
      contentType = DEFAULTCONTENTTYPE;
240
    }
241
  }
242

    
243
  /* Method get text value of given child tagname*/
244
  private String getTextValueForGivenChildTag(Node parentNode,
245
                                              String childTagName)
246
  {
247
    String textValue = null;
248
    NodeList childList = parentNode.getChildNodes();
249
    for (int i= 0; i<childList.getLength();i++)
250
    {
251
      Node child = childList.item(i);
252
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
253
      {
254
        MetaCatUtil.debugMessage("Find child node: " + childTagName, 35);
255
        Node textNode = child.getFirstChild();
256
        if (textNode.getNodeType() == Node.TEXT_NODE)
257
        {
258
          textValue = textNode.getNodeValue();
259
        }//if
260
      }//if
261
    }//for
262
    MetaCatUtil.debugMessage("The text value for element- " + childTagName +
263
                             " is " + textValue, 30);
264
    return textValue;
265
  }//getTExtValueForGivenChildTag
266

    
267
  /* Find the data format node in eml2 document */
268
  private Node findDataFormatNodeInEML2(Document xml, String xPath,
269
                                       String targetDocId)
270
  {
271
    Node targetNode = null;
272
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
273
    if (node != null)
274
    {
275
      // get the phycial the prent is online, grandparent is distribution
276
      // the grand'parent is physical
277
      Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
278
      NodeList list = phyicalNode.getChildNodes();
279
      for (int i = 0; i < list.getLength(); i++)
280
      {
281
        Node kid = list.item(i);
282
        // find dataFormat node
283
        if (kid.getNodeType() == node.ELEMENT_NODE &&
284
            kid.getNodeName().equals(DATAFORMAT))
285
        {
286
          targetNode = kid;
287
          break;
288
        } //if
289
      } //for
290
      if (targetNode != null)
291
      {
292
        MetaCatUtil.debugMessage("dataFormat node'name: " +
293
                                 targetNode.getNodeName(), 35);
294
      }
295
    }//if
296
    return targetNode;
297
  }
298
  /* Find the datafile node */
299
  private Node findDataFileNodeInEML2(Document xml, String xPath,
300
                                String targetDocId)
301
  {
302
    Node dataFileNode = null;
303
    NodeList list = null;
304
    try
305
    {
306
      list = XPathAPI.selectNodeList(xml, xPath);
307
    }
308
    catch (Exception e)
309
    {
310
      // catch an error and return null
311
      MetaCatUtil.debugMessage("Error in findDataFileNode: "+e.getMessage(), 30);
312
      return dataFileNode;
313
    }
314
    // go through the list and find target docid in online/url
315
    if (list != null)
316
    {
317
      for (int i = 0; i < list.getLength(); i++)
318
      {
319
        Node node = list.item(i);
320
        Node textNode = node.getFirstChild();
321
        if (textNode.getNodeType() == node.TEXT_NODE)
322
        {
323
          String URLData = textNode.getNodeValue();
324
          MetaCatUtil.debugMessage("online/url text data: " + URLData, 30);
325
          //Only handle ecogrid data file
326
          if (URLData.indexOf(DBSAXHandler.ECOGRID) != -1 )
327
          {
328
            // Get docid from url
329
            String docId = MetaCatUtil.
330
                               getAccessionNumberFromEcogridIdentifier(URLData);
331
            // Get rid of revision
332
            docId = MetaCatUtil.getDocIdFromAccessionNumber(docId);
333
            MetaCatUtil.debugMessage("docid from url element in xml is: " +
334
                                     docId, 30);
335
            //if this docid equals target one, we find it
336
            if (docId != null && docId.equals(targetDocId))
337
            {
338
              MetaCatUtil.debugMessage("Find target docid in online/url: " +
339
                                       docId, 30);
340
              dataFileNode = node;
341
              break;
342
            }
343
          } //if
344

    
345
        } //if
346
      } //for
347
    }//if
348

    
349
    return dataFileNode;
350
  }//findDataFileNode
351

    
352
  /* Get relative docid list and packagetype */
353
  private Vector getRelativeDocIdList(String id)
354
  {
355
    Vector docList = new Vector();
356
    String sql = "SELECT packagetype, subject from xml_relation " +
357
                 "where object = ?";
358
    ResultSet rs = null;
359
    PreparedStatement pStmt=null;
360
    DBConnection conn = null;
361
    int serialNumber = -1;
362
    try
363
    {
364
      //check out DBConnection
365
      conn=DBConnectionPool.getDBConnection
366
                                   ("ContentTypeProvider.getRelativeDocIdlist");
367
      serialNumber=conn.getCheckOutSerialNumber();
368
      pStmt = conn.prepareStatement(sql);
369
      // binding value
370
      pStmt.setString(1, id);
371
      //execute query
372
      pStmt.execute();
373
      rs = pStmt.getResultSet();
374
      // get result list
375
      String packType = null;
376
      while (rs.next())
377
      {
378
        packType = rs.getString(1);
379
        String subject = rs.getString(2);
380

    
381
        // get rid of duplicate record and add the docid into vector
382
        if (!docList.contains(subject))
383
        {
384

    
385
          docList.add(subject);
386
        }
387
      }//while
388

    
389
      // set up data package type
390
      if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption("packagedoctype"))).
391
                                     contains(packType))
392
      {
393
        //this is beta4 or beta6 version
394
        MetaCatUtil.debugMessage("This is beta package", 30);
395
        packageType = BETA;
396
      }
397
      else if ((MetaCatUtil.getOptionList
398
               (MetaCatUtil.getOption("eml2_0_0namespace"))).contains(packType))
399
      {
400
        // this eml 2 document
401
        MetaCatUtil.debugMessage("This is EML2.0.0 package", 30);
402
        packageType = EML2;
403
      }
404
      else if ((MetaCatUtil.getOptionList
405
               (MetaCatUtil.getOption("eml2_0_1namespace"))).contains(packType))
406
      {
407
        // this eml 2 document
408
        MetaCatUtil.debugMessage("This is EML2.0.1 package", 30);
409
        packageType = EML2;
410
      }
411

    
412

    
413

    
414
    }//try
415
    catch(SQLException e)
416
    {
417

    
418
      MetaCatUtil.debugMessage("ContenTypProvider.getRelativeDoclist1 " +
419
                             e.getMessage(), 30);
420
    }//catch
421
    finally
422
    {
423
      try
424
      {
425
        pStmt.close();
426
      }
427
      catch (SQLException ee)
428
      {
429
        MetaCatUtil.debugMessage("ContenTypProvider.getRelativeDoclist2 " +
430
                             ee.getMessage(), 30);
431
      }
432
      finally
433
      {
434
        DBConnectionPool.returnDBConnection(conn, serialNumber);
435
      }
436
    }//finally
437

    
438
    return docList;
439
  }// getRelativeDocIdList
440

    
441
  /* Method to get physical document for data file in xml_documents table for
442
   * beta eml package
443
   */
444
  private String getTargetDocIdForBeta(Vector list, String targetType)
445
  {
446
    String docId = null;
447
    // make sure list is not empty
448
    if (list.isEmpty())
449
    {
450

    
451
      return docId;
452
    }
453
    // get sql command
454
    String sql = "SELECT doctype, docid from xml_documents where docid in (";
455
    // the first element
456
    sql = sql + "'"+(String)list.elementAt(0) + "'";
457
    for (int i=1; i<list.size(); i++)
458
    {
459
      String docid = (String) list.elementAt(i);
460
      sql = sql + ", '" + docid + "'";
461
    }//for
462
    // add parensis
463
    sql = sql + ")";
464
    MetaCatUtil.debugMessage("SQL for select doctype: "+ sql, 35);
465
    ResultSet rs = null;
466
    PreparedStatement pStmt=null;
467
    DBConnection conn = null;
468
    int serialNumber = -1;
469
    try
470
    {
471
      //check out DBConnection
472
      conn=DBConnectionPool.getDBConnection
473
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
474
      serialNumber=conn.getCheckOutSerialNumber();
475
      pStmt = conn.prepareStatement(sql);
476
      //execute query
477
      pStmt.execute();
478
      rs = pStmt.getResultSet();
479
      // get result list
480
      while (rs.next())
481
      {
482
        String packType = rs.getString(1);
483
        String targetId  = rs.getString(2);
484
        // find physical document
485
        if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption(targetType))).
486
                                     contains(packType))
487
       {
488
         // assign physical document and jump out the while loop
489
         docId = targetId;
490
         break;
491
       }
492
      }//while
493

    
494
    }//try
495
    catch(SQLException e)
496
    {
497

    
498
      MetaCatUtil.debugMessage("ContenTypProvider.setPhysicalDocIdForBeta1 " +
499
                             e.getMessage(), 30);
500
    }//catch
501
    finally
502
    {
503
      try
504
      {
505
        pStmt.close();
506
      }
507
      catch(SQLException ee)
508
      {
509
        MetaCatUtil.debugMessage("ContenTypProvider.setPhysicalDocIdForBeta2 " +
510
                             ee.getMessage(), 30);
511
      }//catch
512
      finally
513
      {
514
        DBConnectionPool.returnDBConnection(conn, serialNumber);
515
      }
516
    }//finally
517
    MetaCatUtil.debugMessage("target docid is: "+ docId + " "+
518
                             "for target doctype: "+targetType, 25);
519
    return docId;
520
  }
521

    
522

    
523

    
524

    
525
  /* Parser the beta physical document and find the value in format element*/
526
  private void parsePhysicalDocumentForBeta(String physicalDocid)
527
  {
528
    String xmlDoc = null;
529
    try
530
    {
531
      DocumentImpl doc = new DocumentImpl(physicalDocid);
532
      xmlDoc = doc.toString();
533
    }
534
    catch (Exception e)
535
    {
536
      contentType = DEFAULTCONTENTTYPE;
537
      MetaCatUtil.debugMessage("Error in ContentTypeProvider." +
538
                         "parsePhysicalDocumentForBeta()" + e.getMessage(), 30);
539
      return;
540
    }
541
      // get format element's text value
542
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
543

    
544
    if (format == null)
545
    {
546
      // if couldn't find the format, set contentype default value;
547
      contentType = DEFAULTCONTENTTYPE;
548
    }
549
    else
550
    {
551
      // if can find a format and look up from hash to get value
552
      contentType = lookUpContentType(format);
553
      // couldn't find the content type for this format in hash table
554
      if (contentType == null)
555
      {
556
        //set default vlaue
557
        contentType = DEFAULTCONTENTTYPE;
558
      }//if
559
    }//else
560
  }//parsePhysicalDocumentForBeta
561

    
562
  private String getTextValueFromPath(StringReader xml, String xPath)
563
  {
564
    String textValue = null;
565
    // get nodelist from doc by path
566
    try
567
    {
568
      NodeList list = EMLParser.getPathContent(xml, xPath);
569
      Node elementNode = list.item(0);
570
      Node textNode = elementNode.getFirstChild();
571
      if (textNode.getNodeType() == Node.TEXT_NODE)
572
      {
573
        textValue = textNode.getNodeValue();// get value
574
      }
575

    
576
    }
577
    catch (Exception e)
578
    {
579
      MetaCatUtil.debugMessage("error in ContentTypeProvider."+
580
                               "getTextValueFromPath: "+e.getMessage(), 30);
581
    }
582
    MetaCatUtil.debugMessage("The text value for " + xPath + " is: "+
583
                              textValue, 30);
584
    return textValue;
585
  }//getTextValueFromPath
586

    
587
  /* A method to look up contentype */
588
  private String lookUpContentType(String format)
589
  {
590
    String newFormat = null;
591
    constructContentHashTable();
592
    newFormat = format.toLowerCase().trim();
593
    String type = null;
594
    type = (String)contentTypeHash.get(newFormat);
595
    MetaCatUtil.debugMessage("contentType looked from hashtalbe is: " +
596
                              type, 30);
597
    return type;
598
  }// lookupcontentypes
599

    
600
  /* Construct content type hashtable */
601
  private void constructContentHashTable()
602
  {
603
    contentTypeHash.put(TEXT, TEXTYPE);
604
    contentTypeHash.put(XML, XMLTYPE);
605
    contentTypeHash.put(HTML,HTMLTYPE);
606
    contentTypeHash.put(GIF, GIFTYPE);
607
    contentTypeHash.put(JPEG, JPEGTYPE);
608
    contentTypeHash.put(BMP, BMPTYPE);
609
    contentTypeHash.put(TAR, TARTYPE);
610
    contentTypeHash.put(ZIP, ZIPTYPE);
611
    contentTypeHash.put(BINARY, BINARYTYPE);
612

    
613
  }//constructrContentHashTable();
614

    
615

    
616

    
617
  public static void main(String[] argus)
618
  {
619
     try
620
     {
621
       DBConnectionPool pool = DBConnectionPool.getInstance();
622
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
623
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
624
       String str = provider.getContentType();
625
       MetaCatUtil.debugMessage("content type is : " + str, 20);
626
     }
627
     catch(Exception e)
628
     {
629
       MetaCatUtil.debugMessage("erorr in Schemalocation.main: " +
630
                                e.getMessage(), 30);
631
     }
632
  }
633
}//ContentTypeProvider
(17-17/63)