Project

General

Profile

1
package edu.ucsb.nceas.metacat.index;
2
import org.dataone.cn.indexer.convert.*;
3
import org.dataone.cn.indexer.parser.SolrField;
4
import org.dataone.cn.indexer.solrhttp.SolrElementField;
5
import org.w3c.dom.Document;
6
import org.xml.sax.InputSource;
7
import org.xml.sax.SAXException;
8

    
9
import javax.xml.parsers.DocumentBuilder;
10
import javax.xml.parsers.DocumentBuilderFactory;
11
import javax.xml.parsers.ParserConfigurationException;
12
import javax.xml.xpath.XPath;
13
import javax.xml.xpath.XPathExpressionException;
14
import javax.xml.xpath.XPathFactory;
15
import java.io.IOException;
16
import java.io.Reader;
17
import java.util.List;
18

    
19
/*
20
 * Wrapper to use DataONE indexer's field-processing code for extraction.
21
 *
22
*/
23

    
24
public class D1IndexField extends FieldSpec {
25
	// man Java enums are awful
26
	public enum DataFormat { SINGLE, SET, MULTISET }
27
	public enum Conversion { NONE, DATE, FGDCDATE, LATITUDE, LONGITUDE, FORMAT }
28

    
29
	private static DocumentBuilder docBuilder = null;
30
	private static final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
31
	private static final XPath xpath = XPathFactory.newInstance().newXPath();
32
	static {
33
		dbf.setNamespaceAware(true);
34

    
35
		try {
36
			docBuilder = dbf.newDocumentBuilder();
37
		} catch (ParserConfigurationException e) {
38
			e.printStackTrace();
39
		}
40
		xpath.setNamespaceContext(new MCXmlNamespace());
41
	}
42

    
43
	private SolrField d1SolrField;
44

    
45
	public D1IndexField(String name, String xp, DataFormat d, Conversion c) {
46
		super(name);
47

    
48
		IConverter conv = null;
49
		switch (c) {
50
			case NONE: conv = null; break;
51
			case DATE: conv = new SolrDateConverter(); break;
52
			case FGDCDATE: conv = new FgdcDateConverter(); break;
53
			case LATITUDE: conv = new SolrLatitudeConverter(); break;
54
			case LONGITUDE: conv = new SolrLongitudeConverter(); break;
55
			case FORMAT: conv = new FormatIdToFormatTypeConverter(); break;
56
		}
57
		this.d1SolrField = new SolrField(name, xp, d != DataFormat.SINGLE, conv);
58
		if (d == DataFormat.SINGLE) {
59
			this.d1SolrField.setCombineNodes(true);
60
		}
61
		else if (d == DataFormat.SET) {
62
			this.d1SolrField.setDedupe(true);
63
		}
64

    
65
		this.d1SolrField.initExpression(D1IndexField.xpath);
66
	}
67

    
68
	@Override
69
	public String[] extract(final Reader in) {
70
		List<SolrElementField> fieldList = null;
71
		try {
72
			// TODO: encodings
73
			Document doc = docBuilder.parse(new InputSource(in));
74
			fieldList = this.solrFields(doc);
75
		} catch (Exception e) {
76
			e.printStackTrace();
77
		}
78
		if (fieldList != null) {
79
			String[] fields = new String[fieldList.size()];
80
			int i = 0;
81
			for (SolrElementField f : fieldList) {
82
				fields[i++] = f.getValue();
83
			}
84
			return fields;
85
		} else {
86
			return new String[0];
87
		}
88
	}
89

    
90
	// convenience method for use with DataONE SolrDoc
91
	public List<SolrElementField> solrFields(final Document doc)
92
			throws XPathExpressionException, IOException, ParserConfigurationException, SAXException {
93
		return this.d1SolrField.processField(doc);
94
	}
95
}
(2-2/13)