Revision 1553
Added by Jing Tao over 21 years ago
src/edu/ucsb/nceas/metacat/ContentTypeProvider.java | ||
---|---|---|
34 | 34 |
import java.util.Hashtable; |
35 | 35 |
import java.util.Vector; |
36 | 36 |
|
37 |
import org.apache.xpath.objects.XObject; |
|
37 | 38 |
import org.apache.xpath.XPathAPI; |
38 | 39 |
import org.apache.xerces.parsers.DOMParser; |
40 |
import org.apache.xerces.dom.DocumentTypeImpl; |
|
39 | 41 |
import org.w3c.dom.Attr; |
40 | 42 |
import org.w3c.dom.NamedNodeMap; |
41 | 43 |
import org.w3c.dom.NodeList; |
... | ... | |
43 | 45 |
import org.w3c.dom.Node; |
44 | 46 |
import org.w3c.dom.NodeList; |
45 | 47 |
import org.w3c.dom.DocumentType; |
46 |
import org.apache.xerces.dom.DocumentTypeImpl; |
|
47 |
import org.apache.xpath.objects.XObject; |
|
48 |
import org.xml.sax.InputSource; |
|
48 | 49 |
|
50 |
import javax.xml.parsers.DocumentBuilder; |
|
51 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
52 |
import javax.xml.parsers.ParserConfigurationException; |
|
53 |
import javax.xml.transform.*; |
|
54 |
import javax.xml.transform.stream.*; |
|
55 |
import javax.xml.transform.dom.*; |
|
56 |
|
|
49 | 57 |
import org.ecoinformatics.eml.EMLParser; |
50 | 58 |
/** |
51 | 59 |
* This class will figure out which content type it is for a given data file. |
... | ... | |
73 | 81 |
private String HTML = "HTML"; |
74 | 82 |
private String HTMLTYPE = "text/html"; |
75 | 83 |
private String GIF = "gif"; |
84 |
private String JPEG = "jpeg"; |
|
85 |
private String JPEGTYPE = "image/jpeg"; |
|
76 | 86 |
private String GIFTYPE = "image/gif"; |
77 | 87 |
private String BMP = "bmp"; |
78 | 88 |
private String BMPTYPE = "image/bmp"; |
... | ... | |
85 | 95 |
|
86 | 96 |
private String ENTITYDOCTYPE = "entitydoctype"; |
87 | 97 |
private String PHYSICALDOCTYPE = "physicaldoctype"; |
98 |
private String EML2DOCTYPE = "eml2namespace"; |
|
99 |
private String DATAFORMAT = "dataFormat"; |
|
100 |
private String TEXTFORMAT = "textFormat"; |
|
101 |
private String EXTENALFORMAT = "externallyDefinedFormat"; |
|
102 |
private String FORMATNAME = "formatName"; |
|
103 |
private String BINARYRASTERFORMAT = "binaryRasterFormat"; |
|
88 | 104 |
|
105 |
private String DATAFILEPATH ="//physical/distribution/online/url"; |
|
89 | 106 |
|
90 | 107 |
/** |
91 | 108 |
* Constructor of ContentTypeProvider |
... | ... | |
124 | 141 |
else if (packageType.equals(EML2)) |
125 | 142 |
{ |
126 | 143 |
// for eml2 package |
144 |
// get eml document for data file |
|
145 |
String eml2Docid = getTargetDocIdForBeta(docLists, EML2DOCTYPE); |
|
146 |
findContentTypeInEML2(eml2Docid); |
|
127 | 147 |
|
128 | 148 |
} |
129 | 149 |
|
... | ... | |
135 | 155 |
return contentType; |
136 | 156 |
}//getContentType |
137 | 157 |
|
158 |
/* Method to find content type base on data format*/ |
|
159 |
private void findContentTypeInEML2(String eml2DocId) |
|
160 |
{ |
|
161 |
DocumentImpl xmlDoc = null; |
|
162 |
String xmlString = null; |
|
163 |
StringReader read = null; |
|
164 |
InputSource in = null; |
|
165 |
DocumentBuilderFactory dfactory = null; |
|
166 |
Document doc = null; |
|
167 |
// create xml document |
|
168 |
try |
|
169 |
{ |
|
170 |
xmlDoc = new DocumentImpl(eml2DocId); |
|
171 |
xmlString = xmlDoc.toString(); |
|
172 |
// create dom tree |
|
173 |
read = new StringReader(xmlString); |
|
174 |
in = new InputSource(read); |
|
175 |
dfactory = DocumentBuilderFactory.newInstance(); |
|
176 |
dfactory.setNamespaceAware(false); |
|
177 |
doc = dfactory.newDocumentBuilder().parse(in); |
|
178 |
} |
|
179 |
catch (Exception e) |
|
180 |
{ |
|
181 |
// if faild, set default value |
|
182 |
contentType = DEFAULTCONTENTTYPE; |
|
183 |
MetaCatUtil.debugMessage("Error in ContentTypeProvider." + |
|
184 |
"findContentTypeInEML2()" + e.getMessage(), 30); |
|
185 |
return; |
|
186 |
} |
|
187 |
Node dataFormatNode = findDataFormatNodeInEML2(doc, DATAFILEPATH, |
|
188 |
dataFileId); |
|
189 |
NodeList childList = dataFormatNode.getChildNodes(); |
|
190 |
// go through childList |
|
191 |
for (int i = 0; i<childList.getLength(); i++) |
|
192 |
{ |
|
193 |
Node child = childList.item(i); |
|
194 |
|
|
195 |
// if has text format child set to text/plain |
|
196 |
if (child.getNodeName() != null && child.getNodeName().equals(TEXTFORMAT)) |
|
197 |
{ |
|
198 |
MetaCatUtil.debugMessage("in text format", 35); |
|
199 |
contentType = TEXTYPE; |
|
200 |
} |
|
201 |
|
|
202 |
//external format |
|
203 |
if (child.getNodeName() != null && child.getNodeName().equals(EXTENALFORMAT)) |
|
204 |
{ |
|
205 |
MetaCatUtil.debugMessage("in external format ", 35); |
|
206 |
String format = getTextValueForGivenChildTag(child, FORMATNAME); |
|
207 |
MetaCatUtil.debugMessage("The format is: "+format, 35); |
|
208 |
// if we can find the format in the contentTypeHash table |
|
209 |
contentType = (String)lookUpContentType(format); |
|
210 |
if (contentType == null) |
|
211 |
{ |
|
212 |
contentType = BINARYTYPE; |
|
213 |
} |
|
214 |
} |
|
215 |
|
|
216 |
// binaryRasterFormat |
|
217 |
if (child.getNodeName() != null && child.getNodeName(). |
|
218 |
equals(BINARYRASTERFORMAT)) |
|
219 |
{ |
|
220 |
contentType = BINARYTYPE; |
|
221 |
}//if |
|
222 |
}//for |
|
223 |
//if contentype still be null, set default value |
|
224 |
if (contentType == null) |
|
225 |
{ |
|
226 |
contentType = DEFAULTCONTENTTYPE; |
|
227 |
} |
|
228 |
} |
|
229 |
|
|
230 |
/* Method get text value of given child tagname*/ |
|
231 |
private String getTextValueForGivenChildTag(Node parentNode, |
|
232 |
String childTagName) |
|
233 |
{ |
|
234 |
String textValue = null; |
|
235 |
NodeList childList = parentNode.getChildNodes(); |
|
236 |
for (int i= 0; i<childList.getLength();i++) |
|
237 |
{ |
|
238 |
Node child = childList.item(i); |
|
239 |
if (child.getNodeName() != null && child.getNodeName().equals(childTagName)) |
|
240 |
{ |
|
241 |
MetaCatUtil.debugMessage("Find child node: " + childTagName, 35); |
|
242 |
Node textNode = child.getFirstChild(); |
|
243 |
if (textNode.getNodeType() == Node.TEXT_NODE) |
|
244 |
{ |
|
245 |
textValue = textNode.getNodeValue(); |
|
246 |
}//if |
|
247 |
}//if |
|
248 |
}//for |
|
249 |
MetaCatUtil.debugMessage("The text value for element- " + childTagName + |
|
250 |
" is " + textValue, 30); |
|
251 |
return textValue; |
|
252 |
}//getTExtValueForGivenChildTag |
|
253 |
|
|
254 |
/* Find the data format node in eml2 document */ |
|
255 |
private Node findDataFormatNodeInEML2(Document xml, String xPath, |
|
256 |
String targetDocId) |
|
257 |
{ |
|
258 |
Node targetNode = null; |
|
259 |
Node node = findDataFileNodeInEML2(xml, xPath, targetDocId); |
|
260 |
// get the phycial the prent is online, grandparent is distribution |
|
261 |
// the grand'parent is physical |
|
262 |
Node phyicalNode = node.getParentNode().getParentNode().getParentNode(); |
|
263 |
NodeList list = phyicalNode.getChildNodes(); |
|
264 |
for (int i = 0; i<list.getLength(); i++) |
|
265 |
{ |
|
266 |
Node kid = list.item(i); |
|
267 |
// find dataFormat node |
|
268 |
if (kid.getNodeType() == node.ELEMENT_NODE && |
|
269 |
kid.getNodeName().equals(DATAFORMAT)) |
|
270 |
{ |
|
271 |
targetNode = kid; |
|
272 |
break; |
|
273 |
}//if |
|
274 |
}//for |
|
275 |
MetaCatUtil.debugMessage("dataFormat node'name: "+ |
|
276 |
targetNode.getNodeName(), 35); |
|
277 |
return targetNode; |
|
278 |
} |
|
279 |
/* Find the datafile node */ |
|
280 |
private Node findDataFileNodeInEML2(Document xml, String xPath, |
|
281 |
String targetDocId) |
|
282 |
{ |
|
283 |
Node dataFileNode = null; |
|
284 |
NodeList list = null; |
|
285 |
try |
|
286 |
{ |
|
287 |
list = XPathAPI.selectNodeList(xml, xPath); |
|
288 |
} |
|
289 |
catch (Exception e) |
|
290 |
{ |
|
291 |
// catch an error and return null |
|
292 |
MetaCatUtil.debugMessage("Error in findDataFileNode: "+e.getMessage(), 30); |
|
293 |
return dataFileNode; |
|
294 |
} |
|
295 |
// go through the list and find target docid in online/url |
|
296 |
for (int i = 0; i<list.getLength(); i++) |
|
297 |
{ |
|
298 |
Node node = list.item(i); |
|
299 |
Node textNode = node.getFirstChild(); |
|
300 |
if (textNode.getNodeType() == node.TEXT_NODE) |
|
301 |
{ |
|
302 |
String URLData = textNode.getNodeValue(); |
|
303 |
MetaCatUtil.debugMessage("online/url text data: " + URLData, 30); |
|
304 |
//Only handle data file in local metacat server |
|
305 |
if (URLData.indexOf(MetaCatUtil.getOption("httpserver")) != -1 || |
|
306 |
URLData.indexOf(MetaCatUtil.getOption("server")) != -1) |
|
307 |
{ |
|
308 |
// Get docid from url |
|
309 |
String docId =MetaCatUtil.getDocIdWithRevFromOnlineURL(URLData); |
|
310 |
// Get rid of revision |
|
311 |
docId = MetaCatUtil.getDocIdFromString(docId); |
|
312 |
MetaCatUtil.debugMessage("docid from url element in xml is: "+ |
|
313 |
docId, 30); |
|
314 |
//if this docid equals target one, we find it |
|
315 |
if (docId != null && docId.equals(targetDocId)) |
|
316 |
{ |
|
317 |
MetaCatUtil.debugMessage("Find target docid in online/url: "+ |
|
318 |
docId, 30); |
|
319 |
dataFileNode = node; |
|
320 |
break; |
|
321 |
} |
|
322 |
}//if |
|
323 |
|
|
324 |
}//if |
|
325 |
}//for |
|
326 |
MetaCatUtil.debugMessage("online/url node's name: " + |
|
327 |
dataFileNode.getNodeName(), 35); |
|
328 |
return dataFileNode; |
|
329 |
}//findDataFileNode |
|
330 |
|
|
138 | 331 |
/* Get relative docid list and packagetype */ |
139 | 332 |
private Vector getRelativeDocIdList(String id) |
140 | 333 |
{ |
... | ... | |
292 | 485 |
DBConnectionPool.returnDBConnection(conn, serialNumber); |
293 | 486 |
} |
294 | 487 |
}//finally |
295 |
MetaCatUtil.debugMessage("!!!!!!!!!target docid is: "+ docId + " "+
|
|
488 |
MetaCatUtil.debugMessage("target docid is: "+ docId + " "+ |
|
296 | 489 |
"for target doctype: "+targetType, 25); |
297 | 490 |
return docId; |
298 | 491 |
} |
... | ... | |
382 | 575 |
contentTypeHash.put(XML, XMLTYPE); |
383 | 576 |
contentTypeHash.put(HTML,HTMLTYPE); |
384 | 577 |
contentTypeHash.put(GIF, GIFTYPE); |
578 |
contentTypeHash.put(JPEG, JPEGTYPE); |
|
385 | 579 |
contentTypeHash.put(BMP, BMPTYPE); |
386 | 580 |
contentTypeHash.put(TAR, TARTYPE); |
387 | 581 |
contentTypeHash.put(ZIP, ZIPTYPE); |
... | ... | |
389 | 583 |
|
390 | 584 |
}//constructrContentHashTable(); |
391 | 585 |
|
586 |
|
|
587 |
|
|
392 | 588 |
public static void main(String[] argus) |
393 | 589 |
{ |
394 | 590 |
try |
395 | 591 |
{ |
396 | 592 |
DBConnectionPool pool = DBConnectionPool.getInstance(); |
397 |
ContentTypeProvider provider = new ContentTypeProvider("tao.9830"); |
|
593 |
//ContentTypeProvider provider = new ContentTypeProvider("tao.9830"); |
|
594 |
ContentTypeProvider provider = new ContentTypeProvider("tao.0001"); |
|
398 | 595 |
String str = provider.getContentType(); |
399 | 596 |
MetaCatUtil.debugMessage("content type is : " + str, 20); |
400 |
|
|
401 | 597 |
} |
402 | 598 |
catch(Exception e) |
403 | 599 |
{ |
Also available in: Unified diff
Add code to handle eml2 contentype.