1
|
package edu.ucsb.nceas.metacat.index;
|
2
|
import org.dataone.cn.indexer.convert.*;
|
3
|
import org.dataone.cn.indexer.parser.SolrField;
|
4
|
import org.dataone.cn.indexer.solrhttp.SolrElementField;
|
5
|
import org.w3c.dom.Document;
|
6
|
import org.xml.sax.InputSource;
|
7
|
import org.xml.sax.SAXException;
|
8
|
|
9
|
import javax.xml.parsers.DocumentBuilder;
|
10
|
import javax.xml.parsers.DocumentBuilderFactory;
|
11
|
import javax.xml.parsers.ParserConfigurationException;
|
12
|
import javax.xml.xpath.XPath;
|
13
|
import javax.xml.xpath.XPathExpressionException;
|
14
|
import javax.xml.xpath.XPathFactory;
|
15
|
import java.io.IOException;
|
16
|
import java.io.Reader;
|
17
|
import java.util.List;
|
18
|
|
19
|
/*
|
20
|
* Wrapper to use DataONE indexer's field-processing code for extraction.
|
21
|
*
|
22
|
*/
|
23
|
|
24
|
public class D1IndexField extends FieldSpec {
|
25
|
// man Java enums are awful
|
26
|
public enum DataFormat { SINGLE, SET, MULTISET }
|
27
|
public enum Conversion { NONE, DATE, FGDCDATE, LATITUDE, LONGITUDE, FORMAT }
|
28
|
|
29
|
private static DocumentBuilder docBuilder = null;
|
30
|
private static final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
31
|
private static final XPath xpath = XPathFactory.newInstance().newXPath();
|
32
|
static {
|
33
|
dbf.setNamespaceAware(true);
|
34
|
|
35
|
try {
|
36
|
docBuilder = dbf.newDocumentBuilder();
|
37
|
} catch (ParserConfigurationException e) {
|
38
|
e.printStackTrace();
|
39
|
}
|
40
|
xpath.setNamespaceContext(new MCXmlNamespace());
|
41
|
}
|
42
|
|
43
|
private SolrField d1SolrField;
|
44
|
|
45
|
public D1IndexField(String name, String xp, DataFormat d, Conversion c) {
|
46
|
super(name);
|
47
|
|
48
|
IConverter conv = null;
|
49
|
switch (c) {
|
50
|
case NONE: conv = null; break;
|
51
|
case DATE: conv = new SolrDateConverter(); break;
|
52
|
case FGDCDATE: conv = new FgdcDateConverter(); break;
|
53
|
case LATITUDE: conv = new SolrLatitudeConverter(); break;
|
54
|
case LONGITUDE: conv = new SolrLongitudeConverter(); break;
|
55
|
case FORMAT: conv = new FormatIdToFormatTypeConverter(); break;
|
56
|
}
|
57
|
this.d1SolrField = new SolrField(name, xp, d != DataFormat.SINGLE, conv);
|
58
|
if (d == DataFormat.SINGLE) {
|
59
|
this.d1SolrField.setCombineNodes(true);
|
60
|
}
|
61
|
else if (d == DataFormat.SET) {
|
62
|
this.d1SolrField.setDedupe(true);
|
63
|
}
|
64
|
|
65
|
this.d1SolrField.initExpression(D1IndexField.xpath);
|
66
|
}
|
67
|
|
68
|
@Override
|
69
|
public String[] extract(final Reader in) {
|
70
|
List<SolrElementField> fieldList = null;
|
71
|
try {
|
72
|
// TODO: encodings
|
73
|
Document doc = docBuilder.parse(new InputSource(in));
|
74
|
fieldList = this.solrFields(doc);
|
75
|
} catch (Exception e) {
|
76
|
e.printStackTrace();
|
77
|
}
|
78
|
if (fieldList != null) {
|
79
|
String[] fields = new String[fieldList.size()];
|
80
|
int i = 0;
|
81
|
for (SolrElementField f : fieldList) {
|
82
|
fields[i++] = f.getValue();
|
83
|
}
|
84
|
return fields;
|
85
|
} else {
|
86
|
return new String[0];
|
87
|
}
|
88
|
}
|
89
|
|
90
|
// convenience method for use with DataONE SolrDoc
|
91
|
public List<SolrElementField> solrFields(final Document doc)
|
92
|
throws XPathExpressionException, IOException, ParserConfigurationException, SAXException {
|
93
|
return this.d1SolrField.processField(doc);
|
94
|
}
|
95
|
}
|