Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class to asyncronously do delta-T replication checking
4
 *  Copyright: 2000 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Chad Berkley
7
 *    Release: @release@
8
 *
9
 *   '$Author: tao $'
10
 *     '$Date: 2003-04-15 21:16:51 -0700 (Tue, 15 Apr 2003) $'
11
 * '$Revision: 1553 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
 
28
package edu.ucsb.nceas.metacat;
29

    
30
import java.io.StringReader;
31
import java.sql.PreparedStatement;
32
import java.sql.ResultSet;
33
import java.sql.SQLException;
34
import java.util.Hashtable;
35
import java.util.Vector;
36

    
37
import org.apache.xpath.objects.XObject;
38
import org.apache.xpath.XPathAPI;
39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
41
import org.w3c.dom.Attr;
42
import org.w3c.dom.NamedNodeMap;
43
import org.w3c.dom.NodeList;
44
import org.w3c.dom.Document;
45
import org.w3c.dom.Node;
46
import org.w3c.dom.NodeList;
47
import org.w3c.dom.DocumentType;
48
import org.xml.sax.InputSource;
49

    
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

    
57
import org.ecoinformatics.eml.EMLParser;
58
/**
59
 * This class will figure out which content type it is for a given data file.
60
 * First, from xml_relation to get all relative files to this data file.
61
 * Then from xml_documents to get physical files. From physical file pull out
62
 * the content type
63
 */
64
public class ContentTypeProvider
65
{
66
  private String dataFileId = null;
67
  private String contentType = null;
68
  private String packageType = null;
69
  private Hashtable contentTypeHash = new Hashtable();
70
  
71
  //Constant
72
  private String BETA = "beta";
73
  private String EML2 = "eml2";
74
  private String DEFAULTCONTENTTYPE = MetaCatUtil.
75
                                           getOption("defaultcontenttype");
76
  private String FORMATPATH = "//format";
77
  private String TEXT       = "text";
78
  private String TEXTYPE    ="text/plain";
79
  private String XML        = "xml";
80
  private String XMLTYPE    = "text/xml";
81
  private String HTML       = "HTML";
82
  private String HTMLTYPE   = "text/html";
83
  private String GIF        = "gif";
84
  private String JPEG       = "jpeg";
85
  private String JPEGTYPE   = "image/jpeg";
86
  private String GIFTYPE    = "image/gif";
87
  private String BMP        = "bmp";
88
  private String BMPTYPE    = "image/bmp";
89
  private String TAR        = "tar";
90
  private String TARTYPE    ="application/x-tar";
91
  private String ZIP        = "zip";
92
  private String ZIPTYPE    = "application/x-zip-compressed";
93
  private String BINARY     = "binary";
94
  private String BINARYTYPE = "application/octet-stream";
95
  
96
  private String ENTITYDOCTYPE = "entitydoctype";
97
  private String PHYSICALDOCTYPE = "physicaldoctype";
98
  private String EML2DOCTYPE = "eml2namespace";
99
  private String DATAFORMAT = "dataFormat";
100
  private String TEXTFORMAT = "textFormat";
101
  private String EXTENALFORMAT = "externallyDefinedFormat";
102
  private String FORMATNAME = "formatName";
103
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
104
  
105
  private String DATAFILEPATH ="//physical/distribution/online/url";
106
 
107
  /**
108
   * Constructor of ContentTypeProvider
109
   */
110
  public ContentTypeProvider(String docIdWithRevision)
111
  {
112
    dataFileId = MetaCatUtil.getDocIdFromString(docIdWithRevision);
113
    //get relative doclist for data file and package type
114
    Vector docLists = null;
115
    docLists = getRelativeDocIdList(dataFileId);
116
       
117
    if ( packageType == null)
118
    {
119
      // other situation, contenetype is default value
120
      contentType = DEFAULTCONTENTTYPE;
121
    }
122
    else if (packageType.equals(BETA))
123
    {
124
      // for beta package and get entity docid for the data file
125
      String entityDocid = getTargetDocIdForBeta(docLists, ENTITYDOCTYPE);
126
      // get physical docid for data file
127
      docLists = getRelativeDocIdList(entityDocid);
128
      String physicalDocId = getTargetDocIdForBeta(docLists, PHYSICALDOCTYPE);
129
      // if no physical docid assign to this data file, content type is default
130
      if (physicalDocId == null)
131
      {
132
        
133
        contentType = DEFAULTCONTENTTYPE;
134
      }
135
      else
136
      {
137
        
138
        parsePhysicalDocumentForBeta(physicalDocId);
139
      }
140
    }
141
    else if (packageType.equals(EML2))
142
    {
143
      // for eml2 package
144
      // get eml document for data file
145
      String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
146
      findContentTypeInEML2(eml2Docid);
147
      
148
    }
149
 
150
  }
151
  
152
  /** Method to get content type */
153
  public String getContentType()
154
  {
155
    return contentType;
156
  }//getContentType
157
  
158
  /* Method to find content type base on data format*/
159
  private void findContentTypeInEML2(String eml2DocId)
160
  {
161
    DocumentImpl xmlDoc = null;
162
    String xmlString = null;
163
    StringReader read = null;
164
    InputSource in = null;
165
    DocumentBuilderFactory dfactory = null;
166
    Document doc = null;
167
    // create xml document
168
    try
169
    {
170
      xmlDoc = new DocumentImpl(eml2DocId);
171
      xmlString = xmlDoc.toString();
172
      // create dom tree
173
      read = new StringReader(xmlString);
174
      in = new InputSource(read);
175
      dfactory = DocumentBuilderFactory.newInstance();
176
      dfactory.setNamespaceAware(false);
177
      doc = dfactory.newDocumentBuilder().parse(in);
178
    }
179
    catch (Exception e)
180
    {
181
      // if faild, set default value
182
      contentType = DEFAULTCONTENTTYPE;
183
      MetaCatUtil.debugMessage("Error in ContentTypeProvider." +
184
                         "findContentTypeInEML2()" + e.getMessage(), 30);
185
      return;
186
    }
187
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH, 
188
                                                   dataFileId);
189
    NodeList childList  = dataFormatNode.getChildNodes();
190
    // go through childList
191
    for (int i = 0; i<childList.getLength(); i++)
192
    {
193
      Node child = childList.item(i);
194
      
195
      // if has text format child set to text/plain
196
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
197
      {
198
        MetaCatUtil.debugMessage("in text format", 35);
199
        contentType = TEXTYPE;
200
      }
201
      
202
      //external format
203
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
204
      {
205
        MetaCatUtil.debugMessage("in external format ", 35);
206
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
207
        MetaCatUtil.debugMessage("The format is: "+format, 35);
208
        // if we can find the format in the contentTypeHash table
209
        contentType = (String)lookUpContentType(format);
210
        if (contentType == null)
211
        {
212
          contentType = BINARYTYPE;
213
        }
214
      }
215
      
216
      // binaryRasterFormat
217
      if (child.getNodeName() != null && child.getNodeName().
218
          equals(BINARYRASTERFORMAT))
219
      {
220
        contentType = BINARYTYPE;
221
      }//if
222
    }//for
223
    //if contentype still be null, set default value
224
    if (contentType == null)
225
    {
226
      contentType = DEFAULTCONTENTTYPE;
227
    }
228
  }
229
  
230
  /* Method get text value of given child tagname*/
231
  private String getTextValueForGivenChildTag(Node parentNode, 
232
                                              String childTagName)
233
  {
234
    String textValue = null;
235
    NodeList childList = parentNode.getChildNodes();
236
    for (int i= 0; i<childList.getLength();i++)
237
    {
238
      Node child = childList.item(i);
239
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
240
      {
241
        MetaCatUtil.debugMessage("Find child node: " + childTagName, 35);
242
        Node textNode = child.getFirstChild();
243
        if (textNode.getNodeType() == Node.TEXT_NODE)
244
        {
245
          textValue = textNode.getNodeValue();
246
        }//if
247
      }//if
248
    }//for
249
    MetaCatUtil.debugMessage("The text value for element- " + childTagName +
250
                             " is " + textValue, 30);
251
    return textValue;
252
  }//getTExtValueForGivenChildTag
253
  
254
  /* Find the data format node in eml2 document */
255
  private Node findDataFormatNodeInEML2(Document xml, String xPath, 
256
                                       String targetDocId)
257
  {
258
    Node targetNode = null;
259
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
260
    // get the phycial the prent is online, grandparent is distribution
261
    // the grand'parent is physical
262
    Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
263
    NodeList list = phyicalNode.getChildNodes();
264
    for (int i = 0; i<list.getLength(); i++)
265
    {
266
      Node kid = list.item(i);
267
      // find dataFormat node
268
      if (kid.getNodeType() == node.ELEMENT_NODE && 
269
          kid.getNodeName().equals(DATAFORMAT))
270
      {
271
        targetNode = kid;
272
        break;
273
      }//if
274
    }//for
275
    MetaCatUtil.debugMessage("dataFormat node'name: "+ 
276
                             targetNode.getNodeName(), 35);
277
    return targetNode;
278
  }
279
  /* Find the datafile node */
280
  private Node findDataFileNodeInEML2(Document xml, String xPath, 
281
                                String targetDocId)
282
  {
283
    Node dataFileNode = null;
284
    NodeList list = null;
285
    try
286
    {
287
      list = XPathAPI.selectNodeList(xml, xPath);
288
    }
289
    catch (Exception e)
290
    {
291
      // catch an error and return null
292
      MetaCatUtil.debugMessage("Error in findDataFileNode: "+e.getMessage(), 30);
293
      return dataFileNode;
294
    }
295
    // go through the list and find target docid in online/url
296
    for (int i = 0; i<list.getLength(); i++)
297
    {
298
      Node node = list.item(i);
299
      Node textNode = node.getFirstChild();
300
      if (textNode.getNodeType() == node.TEXT_NODE)
301
      {
302
        String URLData = textNode.getNodeValue();
303
        MetaCatUtil.debugMessage("online/url text data: " + URLData, 30);
304
        //Only handle data file in local metacat server
305
        if (URLData.indexOf(MetaCatUtil.getOption("httpserver")) != -1 || 
306
            URLData.indexOf(MetaCatUtil.getOption("server")) != -1)
307
        {
308
          // Get docid from url
309
          String docId =MetaCatUtil.getDocIdWithRevFromOnlineURL(URLData);
310
          // Get rid of revision
311
          docId = MetaCatUtil.getDocIdFromString(docId);
312
          MetaCatUtil.debugMessage("docid from url element in xml is: "+
313
                                   docId, 30);
314
          //if this docid equals target one, we find it
315
          if (docId != null && docId.equals(targetDocId))
316
          {
317
            MetaCatUtil.debugMessage("Find target docid in online/url: "+
318
                                      docId, 30);
319
            dataFileNode = node;
320
            break;
321
          }
322
        }//if
323
        
324
      }//if
325
    }//for
326
    MetaCatUtil.debugMessage("online/url node's name: " +
327
                             dataFileNode.getNodeName(), 35);
328
    return dataFileNode;
329
  }//findDataFileNode
330
  
331
  /* Get relative docid list and packagetype */
332
  private Vector getRelativeDocIdList(String id) 
333
  {
334
    Vector docList = new Vector();
335
    String sql = "SELECT packagetype, subject from xml_relation " + 
336
                 "where object = ?";
337
    ResultSet rs = null;
338
    PreparedStatement pStmt=null;
339
    DBConnection conn = null;
340
    int serialNumber = -1;
341
    try
342
    {
343
      //check out DBConnection
344
      conn=DBConnectionPool.getDBConnection
345
                                   ("ContentTypeProvider.getRelativeDocIdlist");
346
      serialNumber=conn.getCheckOutSerialNumber();
347
      pStmt = conn.prepareStatement(sql);
348
      // binding value
349
      pStmt.setString(1, id);
350
      //execute query
351
      pStmt.execute();
352
      rs = pStmt.getResultSet();
353
      // get result list
354
      String packType = null;
355
      while (rs.next())
356
      {
357
        packType = rs.getString(1);
358
        String subject = rs.getString(2);
359
       
360
        // get rid of duplicate record and add the docid into vector
361
        if (!docList.contains(subject))
362
        {
363
          
364
          docList.add(subject);
365
        }
366
      }//while
367
      
368
      // set up data package type
369
      if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption("packagedoctype"))).
370
                                     contains(packType))
371
      {
372
        //this is beta4 or beta6 version
373
        MetaCatUtil.debugMessage("This is beta package", 30);
374
        packageType = BETA;
375
      }
376
      else if ((MetaCatUtil.getOptionList
377
               (MetaCatUtil.getOption("eml2namespace"))).contains(packType))
378
      {
379
        // this eml 2 document
380
        MetaCatUtil.debugMessage("This is EML2 package", 30);
381
        packageType = EML2;
382
      }
383
     
384
        
385
    }//try
386
    catch(SQLException e)
387
    {
388
       
389
      MetaCatUtil.debugMessage("ContenTypProvider.getRelativeDoclist1 " +
390
                             e.getMessage(), 30);
391
    }//catch
392
    finally
393
    {
394
      try
395
      {
396
        pStmt.close();
397
      }
398
      catch (SQLException ee)
399
      {
400
        MetaCatUtil.debugMessage("ContenTypProvider.getRelativeDoclist2 " +
401
                             ee.getMessage(), 30);
402
      }
403
      finally
404
      {
405
        DBConnectionPool.returnDBConnection(conn, serialNumber);
406
      }
407
    }//finally
408
      
409
    return docList;
410
  }// getRelativeDocIdList
411
  
412
  /* Method to get physical document for data file in xml_documents table for
413
   * beta eml package
414
   */
415
  private String getTargetDocIdForBeta(Vector list, String targetType)
416
  {
417
    String docId = null;
418
    // make sure list is not empty
419
    if (list.isEmpty())
420
    {
421
      
422
      return docId;
423
    }
424
    // get sql command
425
    String sql = "SELECT doctype, docid from xml_documents where docid in (";
426
    // the first element
427
    sql = sql + "'"+(String)list.elementAt(0) + "'";
428
    for (int i=1; i<list.size(); i++)
429
    {
430
      String docid = (String) list.elementAt(i);
431
      sql = sql + ", '" + docid + "'";
432
    }//for
433
    // add parensis
434
    sql = sql + ")";
435
    MetaCatUtil.debugMessage("SQL for select doctype: "+ sql, 35);
436
    ResultSet rs = null;
437
    PreparedStatement pStmt=null;
438
    DBConnection conn = null;
439
    int serialNumber = -1;
440
    try
441
    {
442
      //check out DBConnection
443
      conn=DBConnectionPool.getDBConnection
444
                                 ("ContentTypeProvider.setPhycialDocIdForBeta");
445
      serialNumber=conn.getCheckOutSerialNumber();
446
      pStmt = conn.prepareStatement(sql);
447
      //execute query
448
      pStmt.execute();
449
      rs = pStmt.getResultSet();
450
      // get result list
451
      while (rs.next())
452
      {
453
        String packType = rs.getString(1);
454
        String targetId  = rs.getString(2);
455
        // find physical document
456
        if ((MetaCatUtil.getOptionList(MetaCatUtil.getOption(targetType))).
457
                                     contains(packType))
458
       {
459
         // assign physical document and jump out the while loop
460
         docId = targetId;
461
         break;
462
       }
463
      }//while
464
    
465
    }//try
466
    catch(SQLException e)
467
    {
468
       
469
      MetaCatUtil.debugMessage("ContenTypProvider.setPhysicalDocIdForBeta1 " +
470
                             e.getMessage(), 30);
471
    }//catch
472
    finally
473
    {
474
      try
475
      {
476
        pStmt.close();
477
      }
478
      catch(SQLException ee)
479
      {
480
        MetaCatUtil.debugMessage("ContenTypProvider.setPhysicalDocIdForBeta2 " +
481
                             ee.getMessage(), 30);
482
      }//catch
483
      finally
484
      {
485
        DBConnectionPool.returnDBConnection(conn, serialNumber);
486
      }
487
    }//finally
488
    MetaCatUtil.debugMessage("target docid is: "+ docId + " "+
489
                             "for target doctype: "+targetType, 25);
490
    return docId;
491
  }
492
  
493

    
494
  
495
  
496
  /* Parser the beta physical document and find the value in format element*/ 
497
  private void parsePhysicalDocumentForBeta(String physicalDocid)
498
  {
499
    String xmlDoc = null;
500
    try
501
    {
502
      DocumentImpl doc = new DocumentImpl(physicalDocid);
503
      xmlDoc = doc.toString();
504
    }
505
    catch (Exception e)
506
    {
507
      contentType = DEFAULTCONTENTTYPE;
508
      MetaCatUtil.debugMessage("Error in ContentTypeProvider." +
509
                         "parsePhysicalDocumentForBeta()" + e.getMessage(), 30);
510
      return;
511
    }
512
      // get format element's text value
513
    String format = getTextValueFromPath(new StringReader(xmlDoc), FORMATPATH);
514
    
515
    if (format == null)
516
    {
517
      // if couldn't find the format, set contentype default value;
518
      contentType = DEFAULTCONTENTTYPE;
519
    }
520
    else
521
    {
522
      // if can find a format and look up from hash to get value
523
      contentType = lookUpContentType(format);
524
      // couldn't find the content type for this format in hash table
525
      if (contentType == null)
526
      {
527
        //set default vlaue
528
        contentType = DEFAULTCONTENTTYPE;
529
      }//if
530
    }//else
531
  }//parsePhysicalDocumentForBeta
532
  
533
  private String getTextValueFromPath(StringReader xml, String xPath)
534
  {
535
    String textValue = null;
536
    // get nodelist from doc by path
537
    try
538
    {
539
      NodeList list = EMLParser.getPathContent(xml, xPath);
540
      Node elementNode = list.item(0);
541
      Node textNode = elementNode.getFirstChild();
542
      if (textNode.getNodeType() == Node.TEXT_NODE)
543
      {
544
        textValue = textNode.getNodeValue();// get value
545
      }
546
     
547
    }
548
    catch (Exception e)
549
    {
550
      MetaCatUtil.debugMessage("error in ContentTypeProvider."+
551
                               "getTextValueFromPath: "+e.getMessage(), 30);
552
    }
553
    MetaCatUtil.debugMessage("The text value for " + xPath + " is: "+ 
554
                              textValue, 30);
555
    return textValue;
556
  }//getTextValueFromPath
557
  
558
  /* A method to look up contentype */
559
  private String lookUpContentType(String format)
560
  {
561
    String newFormat = null;
562
    constructContentHashTable();
563
    newFormat = format.toLowerCase().trim();
564
    String type = null;
565
    type = (String)contentTypeHash.get(newFormat);
566
    MetaCatUtil.debugMessage("contentType looked from hashtalbe is: " +
567
                              type, 30);
568
    return type;
569
  }// lookupcontentypes
570
  
571
  /* Construct content type hashtable */
572
  private void constructContentHashTable()
573
  {
574
    contentTypeHash.put(TEXT, TEXTYPE);
575
    contentTypeHash.put(XML, XMLTYPE);
576
    contentTypeHash.put(HTML,HTMLTYPE);
577
    contentTypeHash.put(GIF, GIFTYPE);
578
    contentTypeHash.put(JPEG, JPEGTYPE);
579
    contentTypeHash.put(BMP, BMPTYPE);
580
    contentTypeHash.put(TAR, TARTYPE);
581
    contentTypeHash.put(ZIP, ZIPTYPE);
582
    contentTypeHash.put(BINARY, BINARYTYPE);
583
    
584
  }//constructrContentHashTable();
585
  
586
  
587
  
588
  public static void main(String[] argus)
589
  {
590
     try
591
     {
592
       DBConnectionPool pool = DBConnectionPool.getInstance();
593
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
594
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
595
       String str = provider.getContentType();
596
       MetaCatUtil.debugMessage("content type is : " + str, 20);
597
     }
598
     catch(Exception e)
599
     {
600
       MetaCatUtil.debugMessage("erorr in Schemalocation.main: " + 
601
                                e.getMessage(), 30);
602
     }
603
  }
604
}//ContentTypeProvider
(17-17/57)