1 |
7491
|
hahn
|
package edu.ucsb.nceas.metacat.index;
|
2 |
|
|
|
3 |
|
|
import org.xml.sax.InputSource;
|
4 |
|
|
|
5 |
|
|
import javax.xml.xpath.*;
|
6 |
|
|
import java.io.Reader;
|
7 |
|
|
import java.util.ArrayList;
|
8 |
|
|
import java.util.List;
|
9 |
|
|
|
10 |
|
|
/**
|
11 |
|
|
* Identify document classes for indexing. Replaces equivalent Spring-based configuration
|
12 |
|
|
* used by DataONE indexer. It's not clear that we actually need to perfectly mimic this
|
13 |
|
|
* stuff, but for the now we shall. Note that the replicated DataONE classifications
|
14 |
|
|
* apply only to DataONE System Metadata documents. Additional classifiers have been
|
15 |
|
|
* added for plain EML etc.
|
16 |
|
|
*
|
17 |
|
|
* There are better ways to configure this stuff, but this is in effect a direct transcript
|
18 |
|
|
* of the DataONE material. Automatic extraction from Spring configuration is doable, but
|
19 |
|
|
* not trivial.
|
20 |
|
|
*/
|
21 |
|
|
public class DocType {
|
22 |
|
|
//
|
23 |
|
|
private static final XPath xpath = XPathFactory.newInstance().newXPath();
|
24 |
|
|
private static final List<XPathExpression> d1sys = new ArrayList<XPathExpression>(1);
|
25 |
|
|
private static final List<XPathExpression> d1eml = new ArrayList<XPathExpression>(4);
|
26 |
|
|
private static final List<XPathExpression> d1dryad = new ArrayList<XPathExpression>(1);
|
27 |
|
|
private static final List<XPathExpression> d1fgdc = new ArrayList<XPathExpression>(3);
|
28 |
|
|
private static final List<XPathExpression> eml = new ArrayList<XPathExpression>(4);
|
29 |
|
|
static {
|
30 |
|
|
xpath.setNamespaceContext(new MCXmlNamespace());
|
31 |
|
|
|
32 |
|
|
try {
|
33 |
|
|
d1sys.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata"));
|
34 |
|
|
|
35 |
|
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.0']"));
|
36 |
|
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.1']"));
|
37 |
|
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
|
38 |
|
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
|
39 |
|
|
|
40 |
|
|
d1dryad.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'http://purl.org/dryad/terms/']"));
|
41 |
|
|
|
42 |
|
|
d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001-1998']"));
|
43 |
|
|
d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.1-1999']"));
|
44 |
|
|
d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.2-1999']"));
|
45 |
|
|
|
46 |
|
|
eml.add(xpath.compile("/" + MCXmlNamespace.E200 + ":eml"));
|
47 |
|
|
eml.add(xpath.compile("/" + MCXmlNamespace.E201 + ":eml"));
|
48 |
|
|
eml.add(xpath.compile("/" + MCXmlNamespace.E210 + ":eml"));
|
49 |
|
|
eml.add(xpath.compile("/" + MCXmlNamespace.E211 + ":eml"));
|
50 |
|
|
} catch (XPathExpressionException e) {
|
51 |
|
|
// TODO: logs
|
52 |
|
|
e.printStackTrace();
|
53 |
|
|
}
|
54 |
|
|
}
|
55 |
|
|
public static boolean isSysmeta(Reader in) {
|
56 |
|
|
return check(d1sys, in);
|
57 |
|
|
}
|
58 |
|
|
public static boolean isSyseml(Reader in) {
|
59 |
|
|
return check(d1eml, in);
|
60 |
|
|
}
|
61 |
|
|
public static boolean isSysdryad(Reader in) {
|
62 |
|
|
return check(d1dryad, in);
|
63 |
|
|
}
|
64 |
|
|
public static boolean isSysfgdc(Reader in) {
|
65 |
|
|
return check(d1fgdc, in);
|
66 |
|
|
}
|
67 |
|
|
public static boolean isEml(Reader in) {
|
68 |
|
|
return check(eml, in);
|
69 |
|
|
}
|
70 |
|
|
|
71 |
|
|
public static boolean check(List<XPathExpression> exprs, Reader in) {
|
72 |
|
|
InputSource src = new InputSource(in);
|
73 |
|
|
try {
|
74 |
|
|
for (XPathExpression x : exprs) {
|
75 |
|
|
Boolean match = (Boolean) x.evaluate(src, XPathConstants.BOOLEAN);
|
76 |
|
|
if (match != null && match.booleanValue()) {
|
77 |
|
|
return true;
|
78 |
|
|
}
|
79 |
|
|
}
|
80 |
|
|
} catch (XPathExpressionException e) {
|
81 |
|
|
}
|
82 |
|
|
return false;
|
83 |
|
|
}
|
84 |
|
|
}
|