Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *    Release: @release@
8
 *
9
 *   '$Author: sgarg $'
10
 *     '$Date: 2005-10-10 11:06:55 -0700 (Mon, 10 Oct 2005) $'
11
 * '$Revision: 2663 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27

    
28
package edu.ucsb.nceas.metacat;
29

    
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Hashtable;
35
import java.util.Vector;
36

    
37
import org.apache.log4j.Logger;
38
import org.apache.xpath.objects.XObject;
39
import org.apache.xpath.XPathAPI;
40
import org.apache.xerces.parsers.DOMParser;
41
import org.apache.xerces.dom.DocumentTypeImpl;
42
import org.w3c.dom.Attr;
43
import org.w3c.dom.NamedNodeMap;
44
import org.w3c.dom.NodeList;
45
import org.w3c.dom.Document;
46
import org.w3c.dom.Node;
47
import org.w3c.dom.NodeList;
48
import org.w3c.dom.DocumentType;
49
import org.xml.sax.InputSource;
50

    
51
import javax.xml.parsers.DocumentBuilder;
52
import javax.xml.parsers.DocumentBuilderFactory;
53
import javax.xml.parsers.ParserConfigurationException;
54
import javax.xml.transform.*;
55
import javax.xml.transform.stream.*;
56
import javax.xml.transform.dom.*;
57

    
58
import org.ecoinformatics.eml.EMLParser;
59
/**
60
 * This class will figure out which content type it is for a given data file.
61
 * First, from xml_relation to get all relative files to this data file.
62
 * Then from xml_documents to get physical files. From physical file pull out
63
 * the content type
64
 */
65
public class ContentTypeProvider
66
{
67
  private String dataFileId = null;
68
  private String contentType = null;
69
  private String packageType = null;
70
  private Hashtable contentTypeHash = new Hashtable();
71

    
72
  //Constant
73
  private String BETA = "beta";
74
  private String EML2 = "eml2";
75
  private String DEFAULTCONTENTTYPE = MetaCatUtil.
76
                                           getOption("defaultcontenttype");
77
  private String FORMATPATH = "//format";
78
  private String TEXT       = "text";
79
  private String TEXTYPE    ="text/plain";
80
  private String XML        = "xml";
81
  private String XMLTYPE    = "text/xml";
82
  private String HTML       = "HTML";
83
  private String HTMLTYPE   = "text/html";
84
  private String GIF        = "gif";
85
  private String JPEG       = "jpeg";
86
  private String JPEGTYPE   = "image/jpeg";
87
  private String GIFTYPE    = "image/gif";
88
  private String BMP        = "bmp";
89
  private String BMPTYPE    = "image/bmp";
90
  private String TAR        = "tar";
91
  private String TARTYPE    ="application/x-tar";
92
  private String ZIP        = "zip";
93
  private String ZIPTYPE    = "application/x-zip-compressed";
94
  private String BINARY     = "binary";
95
  private String BINARYTYPE = "application/octet-stream";
96

    
97
  private String ENTITYDOCTYPE = "entitydoctype";
98
  private String PHYSICALDOCTYPE = "physicaldoctype";
99
  private String EML2DOCTYPE = "eml2namespace";
100
  private String DATAFORMAT = "dataFormat";
101
  private String TEXTFORMAT = "textFormat";
102
  private String EXTENALFORMAT = "externallyDefinedFormat";
103
  private String FORMATNAME = "formatName";
104
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
105

    
106
  private String DATAFILEPATH ="//physical/distribution/online/url";
107
  private static Logger logMetacat = Logger.getLogger(ContentTypeProvider.class);
108

    
109
  /**
110
   * Constructor of ContentTypeProvider
111
   */
112
  public ContentTypeProvider(String docIdWithRevision)
113
  {
114
    dataFileId = MetaCatUtil.getDocIdFromString(docIdWithRevision);
115
    //get relative doclist for data file and package type
116
    Vector docLists = null;
117
    docLists = getRelativeDocIdList(dataFileId);
118

    
119
    if ( packageType == null)
120
    {
121
      // other situation, contenetype is default value
122
      contentType = DEFAULTCONTENTTYPE;
123
    }
124
    else if (packageType.equals(BETA))
125
    {
126
      // for beta package and get entity docid for the data file
127
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
128
      // get physical docid for data file
129
      docLists = getRelativeDocIdList(entityDocid);
130
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
131
      // if no physical docid assign to this data file, content type is default
132
      if (physicalDocId == null)
133
      {
134

    
135
        contentType = DEFAULTCONTENTTYPE;
136
      }
137
      else
138
      {
139

    
140
        parsePhysicalDocumentForBeta(physicalDocId);
141
      }
142
    }
143
    else if (packageType.equals(EML2))
144
    {
145
      // for eml2 package
146
      // get eml document for data file
147
      //String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
148
      String eml2Docid = (String)docLists.elementAt(0);
149
      findContentTypeInEML2(eml2Docid);
150

    
151
    }
152

    
153
  }
154

    
155
  /** Method to get content type */
156
  public String getContentType()
157
  {
158
    return contentType;
159
  }//getContentType
160

    
161
  /* Method to find content type base on data format*/
162
  private void findContentTypeInEML2(String eml2DocId)
163
  {
164
    if (eml2DocId == null)
165
    {
166
      contentType = DEFAULTCONTENTTYPE;
167
      return;
168
    }
169
    DocumentImpl xmlDoc = null;
170
    String xmlString = null;
171
    StringReader read = null;
172
    InputSource in = null;
173
    DocumentBuilderFactory dfactory = null;
174
    Document doc = null;
175
    // create xml document
176
    try
177
    {
178
      String accNumber = eml2DocId + MetaCatUtil.getOption("accNumSeparator") +
179
                    DBUtil.getLatestRevisionInDocumentTable(eml2DocId);
180
      //System.out.println("the acc number is !!!!!!!!!!!!!!!!!"+accNumber);
181
      xmlDoc = new DocumentImpl(accNumber);
182
      xmlString = xmlDoc.toString();
183
      //System.out.println("the xml doc is "+xmlDoc);
184
      // create dom tree
185
      read = new StringReader(xmlString);
186
      in = new InputSource(read);
187
      dfactory = DocumentBuilderFactory.newInstance();
188
      dfactory.setNamespaceAware(false);
189
      doc = dfactory.newDocumentBuilder().parse(in);
190
    }
191
    catch (Exception e)
192
    {
193
      // if faild, set default value
194
      contentType = DEFAULTCONTENTTYPE;
195
      logMetacat.error("Error in ContentTypeProvider." +
196
                         "findContentTypeInEML2()" + e.getMessage());
197
      return;
198
    }
199
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH,
200
                                                   dataFileId);
201
    if (dataFormatNode == null)
202
    {
203
      contentType = DEFAULTCONTENTTYPE;
204
      logMetacat.info("Couldn't find data format node");
205
      return;
206

    
207
    }
208
    NodeList childList  = dataFormatNode.getChildNodes();
209
    // go through childList
210
    for (int i = 0; i<childList.getLength(); i++)
211
    {
212
      Node child = childList.item(i);
213

    
214
      // if has text format child set to text/plain
215
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
216
      {
217
        logMetacat.info("in text format");
218
        contentType = TEXTYPE;
219
      }
220

    
221
      //external format
222
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
223
      {
224
        logMetacat.info("in external format ");
225
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
226
        logMetacat.info("The format is: "+format);
227
        // if we can find the format in the contentTypeHash table
228
        contentType = (String)lookUpContentType(format);
229
        if (contentType == null)
230
        {
231
          contentType = BINARYTYPE;
232
        }
233
      }
234

    
235
      // binaryRasterFormat
236
      if (child.getNodeName() != null && child.getNodeName().
237
          equals(BINARYRASTERFORMAT))
238
      {
239
        contentType = BINARYTYPE;
240
      }//if
241
    }//for
242
    //if contentype still be null, set default value
243
    if (contentType == null)
244
    {
245
      contentType = DEFAULTCONTENTTYPE;
246
    }
247
  }
248

    
249
  /* Method get text value of given child tagname*/
250
  private String getTextValueForGivenChildTag(Node parentNode,
251
                                              String childTagName)
252
  {
253
    String textValue = null;
254
    NodeList childList = parentNode.getChildNodes();
255
    for (int i= 0; i<childList.getLength();i++)
256
    {
257
      Node child = childList.item(i);
258
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
259
      {
260
        logMetacat.info("Find child node: " + childTagName);
261
        Node textNode = child.getFirstChild();
262
        if (textNode.getNodeType() == Node.TEXT_NODE)
263
        {
264
          textValue = textNode.getNodeValue();
265
        }//if
266
      }//if
267
    }//for
268
    logMetacat.info("The text value for element- " + childTagName +
269
                             " is " + textValue);
270
    return textValue;
271
  }//getTExtValueForGivenChildTag
272

    
273
  /* Find the data format node in eml2 document */
274
  private Node findDataFormatNodeInEML2(Document xml, String xPath,
275
                                       String targetDocId)
276
  {
277
    Node targetNode = null;
278
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
279
    if (node != null)
280
    {
281
      // get the phycial the prent is online, grandparent is distribution
282
      // the grand'parent is physical
283
      Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
284
      NodeList list = phyicalNode.getChildNodes();
285
      for (int i = 0; i < list.getLength(); i++)
286
      {
287
        Node kid = list.item(i);
288
        // find dataFormat node
289
        if (kid.getNodeType() == node.ELEMENT_NODE &&
290
            kid.getNodeName().equals(DATAFORMAT))
291
        {
292
          targetNode = kid;
293
          break;
294
        } //if
295
      } //for
296
      if (targetNode != null)
297
      {
298
        logMetacat.info("dataFormat node'name: " +
299
                                 targetNode.getNodeName());
300
      }
301
    }//if
302
    return targetNode;
303
  }
304
  /* Find the datafile node */
305
  private Node findDataFileNodeInEML2(Document xml, String xPath,
306
                                String targetDocId)
307
  {
308
    Node dataFileNode = null;
309
    NodeList list = null;
310
    try
311
    {
312
      list = XPathAPI.selectNodeList(xml, xPath);
313
    }
314
    catch (Exception e)
315
    {
316
      // catch an error and return null
317
      logMetacat.error("Error in findDataFileNode: "+e.getMessage());
318
      return dataFileNode;
319
    }
320
    // go through the list and find target docid in online/url
321
    if (list != null)
322
    {
323
      for (int i = 0; i < list.getLength(); i++)
324
      {
325
        Node node = list.item(i);
326
        Node textNode = node.getFirstChild();
327
        if (textNode.getNodeType() == node.TEXT_NODE)
328
        {
329
          String URLData = textNode.getNodeValue();
330
          logMetacat.info("online/url text data: " + URLData);
331
          //Only handle ecogrid data file
332
          if (URLData.indexOf(DBSAXHandler.ECOGRID) != -1 )
333
          {
334
            // Get docid from url
335
            String docId = MetaCatUtil.
336
                               getAccessionNumberFromEcogridIdentifier(URLData);
337
            // Get rid of revision
338
            docId = MetaCatUtil.getDocIdFromAccessionNumber(docId);
339
            logMetacat.info("docid from url element in xml is: " +
340
                                     docId);
341
            //if this docid equals target one, we find it
342
            if (docId != null && docId.equals(targetDocId))
343
            {
344
              logMetacat.info("Find target docid in online/url: " +
345
                                       docId);
346
              dataFileNode = node;
347
              break;
348
            }
349
          } //if
350

    
351
        } //if
352
      } //for
353
    }//if
354

    
355
    return dataFileNode;
356
  }//findDataFileNode
357

    
358
  /* Get relative docid list and packagetype */
359
  private Vector getRelativeDocIdList(String id)
360
  {
361
    Vector docList = new Vector();
362
    String sql = "SELECT packagetype, subject from xml_relation " +
363
                 "where object = ?";
364
    ResultSet rs = null;
365
    PreparedStatement pStmt=null;
366
    DBConnection conn = null;
367
    int serialNumber = -1;
368
    try
369
    {
370
      //check out DBConnection
371
      conn=DBConnectionPool.getDBConnection
372
                                   ("ContentTypeProvider.getRelativeDocIdlist");
373
      serialNumber=conn.getCheckOutSerialNumber();
374
      pStmt = conn.prepareStatement(sql);
375
      // binding value
376
      pStmt.setString(1, id);
377
      //execute query
378
      pStmt.execute();
379
      rs = pStmt.getResultSet();
380
      // get result list
381
      String packType = null;
382
      while (rs.next())
383
      {
384
        packType = rs.getString(1);
385
        String subject = rs.getString(2);
386

    
387
        // get rid of duplicate record and add the docid into vector
388
        if (!docList.contains(subject))
389
        {
390

    
391
          docList.add(subject);
392
        }
393
      }//while
394

    
395
      // set up data package type
396
      if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption("packagedoctype"))).
397
                                     contains(packType))
398
      {
399
        //this is beta4 or beta6 version
400
        logMetacat.warn("This is beta package");
401
        packageType = BETA;
402
      }
403
      else if ((MetaCatUtil.getOptionList
404
               (MetaCatUtil.getOption("eml2_0_0namespace"))).contains(packType))
405
      {
406
        // this eml 2 document
407
        logMetacat.warn("This is EML2.0.0 package");
408
        packageType = EML2;
409
      }
410
      else if ((MetaCatUtil.getOptionList
411
               (MetaCatUtil.getOption("eml2_0_1namespace"))).contains(packType))
412
      {
413
        // this eml 2 document
414
        logMetacat.warn("This is EML2.0.1 package");
415
        packageType = EML2;
416
      }
417

    
418

    
419

    
420
    }//try
421
    catch(SQLException e)
422
    {
423

    
424
      logMetacat.error("ContenTypProvider.getRelativeDoclist1 " +
425
                             e.getMessage());
426
    }//catch
427
    finally
428
    {
429
      try
430
      {
431
        pStmt.close();
432
      }
433
      catch (SQLException ee)
434
      {
435
        logMetacat.error("ContenTypProvider.getRelativeDoclist2 " +
436
                             ee.getMessage());
437
      }
438
      finally
439
      {
440
        DBConnectionPool.returnDBConnection(conn, serialNumber);
441
      }
442
    }//finally
443

    
444
    return docList;
445
  }// getRelativeDocIdList
446

    
447
  /* Method to get physical document for data file in xml_documents table for
448
   * beta eml package
449
   */
450
  private String getTargetDocIdForBeta(Vector list, String targetType)
451
  {
452
    String docId = null;
453
    // make sure list is not empty
454
    if (list.isEmpty())
455
    {
456

    
457
      return docId;
458
    }
459
    // get sql command
460
    String sql = "SELECT doctype, docid from xml_documents where docid in (";
461
    // the first element
462
    sql = sql + "'"+(String)list.elementAt(0) + "'";
463
    for (int i=1; i<list.size(); i++)
464
    {
465
      String docid = (String) list.elementAt(i);
466
      sql = sql + ", '" + docid + "'";
467
    }//for
468
    // add parensis
469
    sql = sql + ")";
470
    logMetacat.info("SQL for select doctype: "+ sql);
471
    ResultSet rs = null;
472
    PreparedStatement pStmt=null;
473
    DBConnection conn = null;
474
    int serialNumber = -1;
475
    try
476
    {
477
      //check out DBConnection
478
      conn=DBConnectionPool.getDBConnection
479
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
480
      serialNumber=conn.getCheckOutSerialNumber();
481
      pStmt = conn.prepareStatement(sql);
482
      //execute query
483
      pStmt.execute();
484
      rs = pStmt.getResultSet();
485
      // get result list
486
      while (rs.next())
487
      {
488
        String packType = rs.getString(1);
489
        String targetId  = rs.getString(2);
490
        // find physical document
491
        if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption(targetType))).
492
                                     contains(packType))
493
       {
494
         // assign physical document and jump out the while loop
495
         docId = targetId;
496
         break;
497
       }
498
      }//while
499

    
500
    }//try
501
    catch(SQLException e)
502
    {
503

    
504
      logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta1 " +
505
                             e.getMessage());
506
    }//catch
507
    finally
508
    {
509
      try
510
      {
511
        pStmt.close();
512
      }
513
      catch(SQLException ee)
514
      {
515
        logMetacat.error("ContenTypProvider.setPhysicalDocIdForBeta2 " +
516
                             ee.getMessage());
517
      }//catch
518
      finally
519
      {
520
        DBConnectionPool.returnDBConnection(conn, serialNumber);
521
      }
522
    }//finally
523
    logMetacat.warn("target docid is: "+ docId + " "+
524
                             "for target doctype: "+targetType);
525
    return docId;
526
  }
527

    
528

    
529

    
530

    
531
  /* Parser the beta physical document and find the value in format element*/
532
  private void parsePhysicalDocumentForBeta(String physicalDocid)
533
  {
534
    String xmlDoc = null;
535
    try
536
    {
537
      String accNumber = physicalDocid + MetaCatUtil.getOption("accNumSeparator") +
538
        DBUtil.getLatestRevisionInDocumentTable(physicalDocid);
539
      //System.out.println("the accenumber is !!!!!!!!!!!!!!!!!!" + accNumber);
540
      DocumentImpl doc = new DocumentImpl(accNumber);
541
      xmlDoc = doc.toString();
542
      //System.out.println("The physical xml is "+xmlDoc);
543
    }
544
    catch (Exception e)
545
    {
546
      contentType = DEFAULTCONTENTTYPE;
547
      logMetacat.error("Error in ContentTypeProvider." +
548
                         "parsePhysicalDocumentForBeta()" + e.getMessage());
549
      return;
550
    }
551
      // get format element's text value
552
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
553

    
554
    if (format == null)
555
    {
556
      // if couldn't find the format, set contentype default value;
557
      contentType = DEFAULTCONTENTTYPE;
558
    }
559
    else
560
    {
561
      // if can find a format and look up from hash to get value
562
      contentType = lookUpContentType(format);
563
      // couldn't find the content type for this format in hash table
564
      if (contentType == null)
565
      {
566
        //set default vlaue
567
        contentType = DEFAULTCONTENTTYPE;
568
      }//if
569
    }//else
570
  }//parsePhysicalDocumentForBeta
571

    
572
  private String getTextValueFromPath(StringReader xml, String xPath)
573
  {
574
    String textValue = null;
575
    // get nodelist from doc by path
576
    try
577
    {
578
      NodeList list = EMLParser.getPathContent(xml, xPath);
579
      Node elementNode = list.item(0);
580
      Node textNode = elementNode.getFirstChild();
581
      if (textNode.getNodeType() == Node.TEXT_NODE)
582
      {
583
        textValue = textNode.getNodeValue();// get value
584
      }
585

    
586
    }
587
    catch (Exception e)
588
    {
589
      logMetacat.error("error in ContentTypeProvider."+
590
                               "getTextValueFromPath: "+e.getMessage());
591
    }
592
    logMetacat.info("The text value for " + xPath + " is: "+
593
                              textValue);
594
    return textValue;
595
  }//getTextValueFromPath
596

    
597
  /* A method to look up contentype */
598
  private String lookUpContentType(String format)
599
  {
600
    String newFormat = null;
601
    constructContentHashTable();
602
    newFormat = format.toLowerCase().trim();
603
    String type = null;
604
    type = (String)contentTypeHash.get(newFormat);
605
    logMetacat.info("contentType looked from hashtalbe is: " +
606
                              type);
607
    return type;
608
  }// lookupcontentypes
609

    
610
  /* Construct content type hashtable */
611
  private void constructContentHashTable()
612
  {
613
    contentTypeHash.put(TEXT, TEXTYPE);
614
    contentTypeHash.put(XML, XMLTYPE);
615
    contentTypeHash.put(HTML,HTMLTYPE);
616
    contentTypeHash.put(GIF, GIFTYPE);
617
    contentTypeHash.put(JPEG, JPEGTYPE);
618
    contentTypeHash.put(BMP, BMPTYPE);
619
    contentTypeHash.put(TAR, TARTYPE);
620
    contentTypeHash.put(ZIP, ZIPTYPE);
621
    contentTypeHash.put(BINARY, BINARYTYPE);
622

    
623
  }//constructrContentHashTable();
624

    
625

    
626

    
627
  public static void main(String[] argus)
628
  {
629
     try
630
     {
631
       DBConnectionPool pool = DBConnectionPool.getInstance();
632
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
633
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
634
       String str = provider.getContentType();
635
       logMetacat.warn("content type is : " + str);
636
     }
637
     catch(Exception e)
638
     {
639
       logMetacat.error("erorr in Schemalocation.main: " +
640
                                e.getMessage());
641
     }
642
  }
643
}//ContentTypeProvider
(17-17/65)