Project

General

Profile

« Previous | Next » 

Revision 1553

Added by Jing Tao over 21 years ago

Add code to handle eml2 contentype.

View differences:

src/edu/ucsb/nceas/metacat/ContentTypeProvider.java
34 34
import java.util.Hashtable;
35 35
import java.util.Vector;
36 36

  
37
import org.apache.xpath.objects.XObject;
37 38
import org.apache.xpath.XPathAPI;
38 39
import org.apache.xerces.parsers.DOMParser;
40
import org.apache.xerces.dom.DocumentTypeImpl;
39 41
import org.w3c.dom.Attr;
40 42
import org.w3c.dom.NamedNodeMap;
41 43
import org.w3c.dom.NodeList;
......
43 45
import org.w3c.dom.Node;
44 46
import org.w3c.dom.NodeList;
45 47
import org.w3c.dom.DocumentType;
46
import org.apache.xerces.dom.DocumentTypeImpl;
47
import org.apache.xpath.objects.XObject;
48
import org.xml.sax.InputSource;
48 49

  
50
import javax.xml.parsers.DocumentBuilder;
51
import javax.xml.parsers.DocumentBuilderFactory;
52
import javax.xml.parsers.ParserConfigurationException;
53
import javax.xml.transform.*;
54
import javax.xml.transform.stream.*;
55
import javax.xml.transform.dom.*;
56

  
49 57
import org.ecoinformatics.eml.EMLParser;
50 58
/**
51 59
 * This class will figure out which content type it is for a given data file.
......
73 81
  private String HTML       = "HTML";
74 82
  private String HTMLTYPE   = "text/html";
75 83
  private String GIF        = "gif";
84
  private String JPEG       = "jpeg";
85
  private String JPEGTYPE   = "image/jpeg";
76 86
  private String GIFTYPE    = "image/gif";
77 87
  private String BMP        = "bmp";
78 88
  private String BMPTYPE    = "image/bmp";
......
85 95
  
86 96
  private String ENTITYDOCTYPE = "entitydoctype";
87 97
  private String PHYSICALDOCTYPE = "physicaldoctype";
98
  private String EML2DOCTYPE = "eml2namespace";
99
  private String DATAFORMAT = "dataFormat";
100
  private String TEXTFORMAT = "textFormat";
101
  private String EXTENALFORMAT = "externallyDefinedFormat";
102
  private String FORMATNAME = "formatName";
103
  private String BINARYRASTERFORMAT = "binaryRasterFormat";
88 104
  
105
  private String DATAFILEPATH ="//physical/distribution/online/url";
89 106
 
90 107
  /**
91 108
   * Constructor of ContentTypeProvider
......
124 141
    else if (packageType.equals(EML2))
125 142
    {
126 143
      // for eml2 package
144
      // get eml document for data file
145
      String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE);
146
      findContentTypeInEML2(eml2Docid);
127 147
      
128 148
    }
129 149
 
......
135 155
    return contentType;
136 156
  }//getContentType
137 157
  
158
  /* Method to find content type base on data format*/
159
  private void findContentTypeInEML2(String eml2DocId)
160
  {
161
    DocumentImpl xmlDoc = null;
162
    String xmlString = null;
163
    StringReader read = null;
164
    InputSource in = null;
165
    DocumentBuilderFactory dfactory = null;
166
    Document doc = null;
167
    // create xml document
168
    try
169
    {
170
      xmlDoc = new DocumentImpl(eml2DocId);
171
      xmlString = xmlDoc.toString();
172
      // create dom tree
173
      read = new StringReader(xmlString);
174
      in = new InputSource(read);
175
      dfactory = DocumentBuilderFactory.newInstance();
176
      dfactory.setNamespaceAware(false);
177
      doc = dfactory.newDocumentBuilder().parse(in);
178
    }
179
    catch (Exception e)
180
    {
181
      // if faild, set default value
182
      contentType = DEFAULTCONTENTTYPE;
183
      MetaCatUtil.debugMessage("Error in ContentTypeProvider." +
184
                         "findContentTypeInEML2()" + e.getMessage(), 30);
185
      return;
186
    }
187
    Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH, 
188
                                                   dataFileId);
189
    NodeList childList  = dataFormatNode.getChildNodes();
190
    // go through childList
191
    for (int i = 0; i<childList.getLength(); i++)
192
    {
193
      Node child = childList.item(i);
194
      
195
      // if has text format child set to text/plain
196
      if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT))
197
      {
198
        MetaCatUtil.debugMessage("in text format", 35);
199
        contentType = TEXTYPE;
200
      }
201
      
202
      //external format
203
      if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT))
204
      {
205
        MetaCatUtil.debugMessage("in external format ", 35);
206
        String format = getTextValueForGivenChildTag(child, FORMATNAME);
207
        MetaCatUtil.debugMessage("The format is: "+format, 35);
208
        // if we can find the format in the contentTypeHash table
209
        contentType = (String)lookUpContentType(format);
210
        if (contentType == null)
211
        {
212
          contentType = BINARYTYPE;
213
        }
214
      }
215
      
216
      // binaryRasterFormat
217
      if (child.getNodeName() != null && child.getNodeName().
218
          equals(BINARYRASTERFORMAT))
219
      {
220
        contentType = BINARYTYPE;
221
      }//if
222
    }//for
223
    //if contentype still be null, set default value
224
    if (contentType == null)
225
    {
226
      contentType = DEFAULTCONTENTTYPE;
227
    }
228
  }
229
  
230
  /* Method get text value of given child tagname*/
231
  private String getTextValueForGivenChildTag(Node parentNode, 
232
                                              String childTagName)
233
  {
234
    String textValue = null;
235
    NodeList childList = parentNode.getChildNodes();
236
    for (int i= 0; i<childList.getLength();i++)
237
    {
238
      Node child = childList.item(i);
239
      if (child.getNodeName() != null && child.getNodeName().equals(childTagName))
240
      {
241
        MetaCatUtil.debugMessage("Find child node: " + childTagName, 35);
242
        Node textNode = child.getFirstChild();
243
        if (textNode.getNodeType() == Node.TEXT_NODE)
244
        {
245
          textValue = textNode.getNodeValue();
246
        }//if
247
      }//if
248
    }//for
249
    MetaCatUtil.debugMessage("The text value for element- " + childTagName +
250
                             " is " + textValue, 30);
251
    return textValue;
252
  }//getTExtValueForGivenChildTag
253
  
254
  /* Find the data format node in eml2 document */
255
  private Node findDataFormatNodeInEML2(Document xml, String xPath, 
256
                                       String targetDocId)
257
  {
258
    Node targetNode = null;
259
    Node node = findDataFileNodeInEML2(xml, xPath, targetDocId);
260
    // get the phycial the prent is online, grandparent is distribution
261
    // the grand'parent is physical
262
    Node phyicalNode = node.getParentNode().getParentNode().getParentNode();
263
    NodeList list = phyicalNode.getChildNodes();
264
    for (int i = 0; i<list.getLength(); i++)
265
    {
266
      Node kid = list.item(i);
267
      // find dataFormat node
268
      if (kid.getNodeType() == node.ELEMENT_NODE && 
269
          kid.getNodeName().equals(DATAFORMAT))
270
      {
271
        targetNode = kid;
272
        break;
273
      }//if
274
    }//for
275
    MetaCatUtil.debugMessage("dataFormat node'name: "+ 
276
                             targetNode.getNodeName(), 35);
277
    return targetNode;
278
  }
279
  /* Find the datafile node */
280
  private Node findDataFileNodeInEML2(Document xml, String xPath, 
281
                                String targetDocId)
282
  {
283
    Node dataFileNode = null;
284
    NodeList list = null;
285
    try
286
    {
287
      list = XPathAPI.selectNodeList(xml, xPath);
288
    }
289
    catch (Exception e)
290
    {
291
      // catch an error and return null
292
      MetaCatUtil.debugMessage("Error in findDataFileNode: "+e.getMessage(), 30);
293
      return dataFileNode;
294
    }
295
    // go through the list and find target docid in online/url
296
    for (int i = 0; i<list.getLength(); i++)
297
    {
298
      Node node = list.item(i);
299
      Node textNode = node.getFirstChild();
300
      if (textNode.getNodeType() == node.TEXT_NODE)
301
      {
302
        String URLData = textNode.getNodeValue();
303
        MetaCatUtil.debugMessage("online/url text data: " + URLData, 30);
304
        //Only handle data file in local metacat server
305
        if (URLData.indexOf(MetaCatUtil.getOption("httpserver")) != -1 || 
306
            URLData.indexOf(MetaCatUtil.getOption("server")) != -1)
307
        {
308
          // Get docid from url
309
          String docId =MetaCatUtil.getDocIdWithRevFromOnlineURL(URLData);
310
          // Get rid of revision
311
          docId = MetaCatUtil.getDocIdFromString(docId);
312
          MetaCatUtil.debugMessage("docid from url element in xml is: "+
313
                                   docId, 30);
314
          //if this docid equals target one, we find it
315
          if (docId != null && docId.equals(targetDocId))
316
          {
317
            MetaCatUtil.debugMessage("Find target docid in online/url: "+
318
                                      docId, 30);
319
            dataFileNode = node;
320
            break;
321
          }
322
        }//if
323
        
324
      }//if
325
    }//for
326
    MetaCatUtil.debugMessage("online/url node's name: " +
327
                             dataFileNode.getNodeName(), 35);
328
    return dataFileNode;
329
  }//findDataFileNode
330
  
138 331
  /* Get relative docid list and packagetype */
139 332
  private Vector getRelativeDocIdList(String id) 
140 333
  {
......
292 485
        DBConnectionPool.returnDBConnection(conn, serialNumber);
293 486
      }
294 487
    }//finally
295
    MetaCatUtil.debugMessage("!!!!!!!!!target docid is: "+ docId + " "+
488
    MetaCatUtil.debugMessage("target docid is: "+ docId + " "+
296 489
                             "for target doctype: "+targetType, 25);
297 490
    return docId;
298 491
  }
......
382 575
    contentTypeHash.put(XML, XMLTYPE);
383 576
    contentTypeHash.put(HTML,HTMLTYPE);
384 577
    contentTypeHash.put(GIF, GIFTYPE);
578
    contentTypeHash.put(JPEG, JPEGTYPE);
385 579
    contentTypeHash.put(BMP, BMPTYPE);
386 580
    contentTypeHash.put(TAR, TARTYPE);
387 581
    contentTypeHash.put(ZIP, ZIPTYPE);
......
389 583
    
390 584
  }//constructrContentHashTable();
391 585
  
586
  
587
  
392 588
  public static void main(String[] argus)
393 589
  {
394 590
     try
395 591
     {
396 592
       DBConnectionPool pool = DBConnectionPool.getInstance();
397
       ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
593
       //ContentTypeProvider provider = new ContentTypeProvider("tao.9830");
594
       ContentTypeProvider provider = new ContentTypeProvider("tao.0001");
398 595
       String str = provider.getContentType();
399 596
       MetaCatUtil.debugMessage("content type is : " + str, 20);
400
       
401 597
     }
402 598
     catch(Exception e)
403 599
     {

Also available in: Unified diff