Project

General

Profile

1
package edu.ucsb.nceas.metacat.index;
2

    
3
import org.xml.sax.InputSource;
4

    
5
import javax.xml.xpath.*;
6
import java.io.Reader;
7
import java.util.ArrayList;
8
import java.util.List;
9

    
10
/**
11
 * Identify document classes for indexing. Replaces equivalent Spring-based configuration
12
 * used by DataONE indexer. It's not clear that we actually need to perfectly mimic this
13
 * stuff, but for the now we shall. Note that the replicated DataONE classifications
14
 * apply only to DataONE System Metadata documents. Additional classifiers have been
15
 * added for plain EML etc.
16
 *
17
 * There are better ways to configure this stuff, but this is in effect a direct transcript
18
 * of the DataONE material. Automatic extraction from Spring configuration is doable, but
19
 * not trivial.
20
 */
21
public class DocType {
22
	//
23
	private static final XPath xpath = XPathFactory.newInstance().newXPath();
24
	private static final List<XPathExpression> d1sys = new ArrayList<XPathExpression>(1);
25
	private static final List<XPathExpression> d1eml = new ArrayList<XPathExpression>(4);
26
	private static final List<XPathExpression> d1dryad = new ArrayList<XPathExpression>(1);
27
	private static final List<XPathExpression> d1fgdc = new ArrayList<XPathExpression>(3);
28
	private static final List<XPathExpression> eml = new ArrayList<XPathExpression>(4);
29
	static {
30
		xpath.setNamespaceContext(new MCXmlNamespace());
31

    
32
		try {
33
			d1sys.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata"));
34

    
35
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.0']"));
36
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.1']"));
37
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
38
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
39

    
40
			d1dryad.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'http://purl.org/dryad/terms/']"));
41

    
42
			d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001-1998']"));
43
			d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.1-1999']"));
44
			d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.2-1999']"));
45

    
46
			eml.add(xpath.compile("/" + MCXmlNamespace.E200 + ":eml"));
47
			eml.add(xpath.compile("/" + MCXmlNamespace.E201 + ":eml"));
48
			eml.add(xpath.compile("/" + MCXmlNamespace.E210 + ":eml"));
49
			eml.add(xpath.compile("/" + MCXmlNamespace.E211 + ":eml"));
50
		} catch (XPathExpressionException e) {
51
			// TODO: logs
52
			e.printStackTrace();
53
		}
54
	}
55
	public static boolean isSysmeta(Reader in) {
56
		return check(d1sys, in);
57
	}
58
	public static boolean isSyseml(Reader in) {
59
		return check(d1eml, in);
60
	}
61
	public static boolean isSysdryad(Reader in) {
62
		return check(d1dryad, in);
63
	}
64
	public static boolean isSysfgdc(Reader in) {
65
		return check(d1fgdc, in);
66
	}
67
	public static boolean isEml(Reader in) {
68
		return check(eml, in);
69
	}
70

    
71
	public static boolean check(List<XPathExpression> exprs, Reader in) {
72
		InputSource src = new InputSource(in);
73
		try {
74
			for (XPathExpression x : exprs) {
75
				Boolean match = (Boolean) x.evaluate(src, XPathConstants.BOOLEAN);
76
				if (match != null && match.booleanValue()) {
77
					return true;
78
				}
79
			}
80
		} catch (XPathExpressionException e) {
81
		}
82
		return false;
83
	}
84
}
(3-3/14)