1
|
package edu.ucsb.nceas.metacat.index;
|
2
|
|
3
|
import org.xml.sax.InputSource;
|
4
|
|
5
|
import javax.xml.xpath.*;
|
6
|
import java.io.Reader;
|
7
|
import java.util.ArrayList;
|
8
|
import java.util.List;
|
9
|
|
10
|
/**
|
11
|
* Identify document classes for indexing. Replaces equivalent Spring-based configuration
|
12
|
* used by DataONE indexer. It's not clear that we actually need to perfectly mimic this
|
13
|
* stuff, but for the now we shall. Note that the replicated DataONE classifications
|
14
|
* apply only to DataONE System Metadata documents. Additional classifiers have been
|
15
|
* added for plain EML etc.
|
16
|
*
|
17
|
* There are better ways to configure this stuff, but this is in effect a direct transcript
|
18
|
* of the DataONE material. Automatic extraction from Spring configuration is doable, but
|
19
|
* not trivial.
|
20
|
*/
|
21
|
public class DocType {
|
22
|
//
|
23
|
private static final XPath xpath = XPathFactory.newInstance().newXPath();
|
24
|
private static final List<XPathExpression> d1sys = new ArrayList<XPathExpression>(1);
|
25
|
private static final List<XPathExpression> d1eml = new ArrayList<XPathExpression>(4);
|
26
|
private static final List<XPathExpression> d1dryad = new ArrayList<XPathExpression>(1);
|
27
|
private static final List<XPathExpression> d1fgdc = new ArrayList<XPathExpression>(3);
|
28
|
private static final List<XPathExpression> eml = new ArrayList<XPathExpression>(4);
|
29
|
static {
|
30
|
xpath.setNamespaceContext(new MCXmlNamespace());
|
31
|
|
32
|
try {
|
33
|
d1sys.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata"));
|
34
|
|
35
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.0']"));
|
36
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.1']"));
|
37
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
|
38
|
d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
|
39
|
|
40
|
d1dryad.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'http://purl.org/dryad/terms/']"));
|
41
|
|
42
|
d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001-1998']"));
|
43
|
d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.1-1999']"));
|
44
|
d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.2-1999']"));
|
45
|
|
46
|
eml.add(xpath.compile("/" + MCXmlNamespace.E200 + ":eml"));
|
47
|
eml.add(xpath.compile("/" + MCXmlNamespace.E201 + ":eml"));
|
48
|
eml.add(xpath.compile("/" + MCXmlNamespace.E210 + ":eml"));
|
49
|
eml.add(xpath.compile("/" + MCXmlNamespace.E211 + ":eml"));
|
50
|
} catch (XPathExpressionException e) {
|
51
|
// TODO: logs
|
52
|
e.printStackTrace();
|
53
|
}
|
54
|
}
|
55
|
public static boolean isSysmeta(Reader in) {
|
56
|
return check(d1sys, in);
|
57
|
}
|
58
|
public static boolean isSyseml(Reader in) {
|
59
|
return check(d1eml, in);
|
60
|
}
|
61
|
public static boolean isSysdryad(Reader in) {
|
62
|
return check(d1dryad, in);
|
63
|
}
|
64
|
public static boolean isSysfgdc(Reader in) {
|
65
|
return check(d1fgdc, in);
|
66
|
}
|
67
|
public static boolean isEml(Reader in) {
|
68
|
return check(eml, in);
|
69
|
}
|
70
|
|
71
|
public static boolean check(List<XPathExpression> exprs, Reader in) {
|
72
|
InputSource src = new InputSource(in);
|
73
|
try {
|
74
|
for (XPathExpression x : exprs) {
|
75
|
Boolean match = (Boolean) x.evaluate(src, XPathConstants.BOOLEAN);
|
76
|
if (match != null && match.booleanValue()) {
|
77
|
return true;
|
78
|
}
|
79
|
}
|
80
|
} catch (XPathExpressionException e) {
|
81
|
}
|
82
|
return false;
|
83
|
}
|
84
|
}
|