Project

General

Profile

« Previous | Next » 

Revision 7491

Search and indexing with Lucene/SOLR
Requires a manually configured SOLR installation
Not currently used by the rest of metacat

View differences:

build.xml
101 101
	<target name="init" depends="config">
102 102
		<path id="compile.classpath">
103 103
			<pathelement location="${jsdk}" />
104
			<pathelement location="lib" />
104
            <fileset dir='lib/lucene'>
105
                <include name='*.jar'/>
106
            </fileset>
107
            <fileset dir='lib/solr'>
108
                <include name='*.jar'/>
109
            </fileset>
110

  
111
            <pathelement location="lib" />
105 112
			<fileset dir="lib">
106 113
				<include name="*.jar" />
107 114
			</fileset>
lib/style/common/eml_xsl.css
1192 1192
    font-style:                 normal;
1193 1193
    font-weight:                700;
1194 1194
}
1195

  
1196
/**
1197
* For EML spec styling
1198
**/
1199
div.book {
1200
	margin: 20px;	
1201
}
1202

  
1203
div.book a:link {
1204
	text-decoration: underline;		
1205
}
1206

  
1207
div.title {
1208
    color:                      #003366;
1209
    background-color:           #ffffff;
1210
    padding:                    0;
1211
    font-size:                  16pt;
1212
    font-style:                 normal;
1213
    font-weight:                bold;
1214
    text-decoration:            none;
1215
}
1216

  
1217
div.sectiontitle {
1218
    color:                      #003366;
1219
    background-color:           #ffffff;
1220
    padding:                    0;
1221
    font-size:                  14pt;
1222
    font-style:                 normal;
1223
    font-weight:                700;
1224
    text-decoration:            none;
1225
}
1226

  
src/edu/ucsb/nceas/metacat/index/D1IndexField.java
1
package edu.ucsb.nceas.metacat.index;
2
import org.dataone.cn.indexer.convert.*;
3
import org.dataone.cn.indexer.parser.SolrField;
4
import org.dataone.cn.indexer.solrhttp.SolrElementField;
5
import org.w3c.dom.Document;
6
import org.xml.sax.InputSource;
7
import org.xml.sax.SAXException;
8

  
9
import javax.xml.parsers.DocumentBuilder;
10
import javax.xml.parsers.DocumentBuilderFactory;
11
import javax.xml.parsers.ParserConfigurationException;
12
import javax.xml.xpath.XPath;
13
import javax.xml.xpath.XPathExpressionException;
14
import javax.xml.xpath.XPathFactory;
15
import java.io.IOException;
16
import java.io.Reader;
17
import java.util.List;
18

  
19
/*
20
 * Wrapper to use DataONE indexer's field-processing code for extraction.
21
 *
22
*/
23

  
24
public class D1IndexField extends FieldSpec {
25
	// man Java enums are awful
26
	public enum DataFormat { SINGLE, SET, MULTISET }
27
	public enum Conversion { NONE, DATE, FGDCDATE, LATITUDE, LONGITUDE, FORMAT }
28

  
29
	private static DocumentBuilder docBuilder = null;
30
	private static final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
31
	private static final XPath xpath = XPathFactory.newInstance().newXPath();
32
	static {
33
		dbf.setNamespaceAware(true);
34

  
35
		try {
36
			docBuilder = dbf.newDocumentBuilder();
37
		} catch (ParserConfigurationException e) {
38
			e.printStackTrace();
39
		}
40
		xpath.setNamespaceContext(new MCXmlNamespace());
41
	}
42

  
43
	private SolrField d1SolrField;
44

  
45
	public D1IndexField(String name, String xp, DataFormat d, Conversion c) {
46
		super(name);
47

  
48
		IConverter conv = null;
49
		switch (c) {
50
			case NONE: conv = null; break;
51
			case DATE: conv = new SolrDateConverter(); break;
52
			case FGDCDATE: conv = new FgdcDateConverter(); break;
53
			case LATITUDE: conv = new SolrLatitudeConverter(); break;
54
			case LONGITUDE: conv = new SolrLongitudeConverter(); break;
55
			case FORMAT: conv = new FormatIdToFormatTypeConverter(); break;
56
		}
57
		this.d1SolrField = new SolrField(name, xp, d != DataFormat.SINGLE, conv);
58
		if (d == DataFormat.SINGLE) {
59
			this.d1SolrField.setCombineNodes(true);
60
		}
61
		else if (d == DataFormat.SET) {
62
			this.d1SolrField.setDedupe(true);
63
		}
64

  
65
		this.d1SolrField.initExpression(D1IndexField.xpath);
66
	}
67

  
68
	@Override
69
	public String[] extract(final Reader in) {
70
		List<SolrElementField> fieldList = null;
71
		try {
72
			// TODO: encodings
73
			Document doc = docBuilder.parse(new InputSource(in));
74
			fieldList = this.solrFields(doc);
75
		} catch (Exception e) {
76
			e.printStackTrace();
77
		}
78
		if (fieldList != null) {
79
			String[] fields = new String[fieldList.size()];
80
			int i = 0;
81
			for (SolrElementField f : fieldList) {
82
				fields[i++] = f.getValue();
83
			}
84
			return fields;
85
		} else {
86
			return new String[0];
87
		}
88
	}
89

  
90
	// convenience method for use with DataONE SolrDoc
91
	public List<SolrElementField> solrFields(final Document doc)
92
			throws XPathExpressionException, IOException, ParserConfigurationException, SAXException {
93
		return this.d1SolrField.processField(doc);
94
	}
95
}
src/edu/ucsb/nceas/metacat/index/XpathIndexField.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import java.io.Reader;
4

  
5
import edu.ucsb.nceas.metacat.index.D1IndexField.DataFormat;
6
import edu.ucsb.nceas.metacat.index.D1IndexField.Conversion;
7

  
8
/*
9
 * Simple xpath-based indexing. Accepts DOM-parseable data, extracts text data from all
10
 * nodes selected by an XPath expression concatenated into a single Lucene/SOLR field
11
 * body. Equivalent to the DataONE indexer's MergeSolrField bean.
12
 *
13
 * Currently just wraps DataONE SolrField code.
14
 */
15

  
16
public class XpathIndexField extends FieldSpec {
17
	private static final String separator = " ";
18
	private static final String textSelector = "text()";
19
	public final String xpath;
20

  
21
	public XpathIndexField(String name, String xp) {
22
		super(name);
23
		this.xpath = xp;
24
	}
25

  
26
	@Override
27
	public String[] extract(Reader in) {
28
		String s = this.xpath;
29
		if (!this.xpath.endsWith(textSelector)) {
30
			if (this.xpath.charAt(this.xpath.length()-1) != '/') {
31
				s = s + "/";
32
			}
33
			s = s + textSelector;
34
		}
35

  
36
		D1IndexField field = new D1IndexField(this.name, s, DataFormat.SINGLE, Conversion.NONE);
37
		return field.extract(in);
38
	}
39
}
src/edu/ucsb/nceas/metacat/index/SolrjIndex.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import org.apache.solr.client.solrj.SolrQuery;
4
import org.apache.solr.client.solrj.SolrServer;
5
import org.apache.solr.client.solrj.SolrServerException;
6
import org.apache.solr.client.solrj.impl.HttpSolrServer;
7
import org.apache.solr.client.solrj.response.QueryResponse;
8
import org.apache.solr.common.SolrDocument;
9
import org.apache.solr.common.SolrDocumentList;
10
import org.apache.solr.common.SolrInputDocument;
11

  
12
import java.io.IOException;
13
import java.util.Map;
14

  
15
public class SolrjIndex implements GenericIndex {
16
	protected SolrServer solr = null;
17

  
18
	SolrjIndex(String uri) {
19
		this.solr = new HttpSolrServer(uri);
20
	}
21

  
22
	public void insert(String docID, Map<String, String[]> fields) throws IOException {
23
		this.update(docID, fields);
24
	}
25

  
26
	public void update(String docID, Map<String, String[]> fields) throws IOException {
27
		SolrInputDocument doc = new SolrInputDocument();
28
		doc.addField(MetacatIndex.MCIDFIELD, docID);
29
		for (String k : fields.keySet()) {
30
			for (String v : fields.get(k)) {
31
				doc.addField(k, v);
32
			}
33
		}
34
		try {
35
			this.solr.add(doc);
36
		} catch (SolrServerException e) {
37
			e.printStackTrace();
38
		}
39
	}
40

  
41
	public void remove(String docID) throws IOException {
42
		try {
43
			solr.deleteByQuery(MetacatIndex.MCIDFIELD + ":" + docID);
44
		} catch (SolrServerException e) {
45
			// TODO: handling
46
			e.printStackTrace();
47
		}
48
	}
49

  
50
	public String[] query(String q) {
51
		SolrQuery sq = new SolrQuery(q);
52
		QueryResponse rsp = null;
53
		try {
54
			rsp = solr.query(sq);
55
		} catch (SolrServerException e) {
56
			// TODO: handling
57
			e.printStackTrace();
58
		}
59
		SolrDocumentList docs = rsp.getResults();
60
		String[] docIDs = new String[docs.size()];
61
		int i = 0;
62
		for (SolrDocument d : docs) {
63
			docIDs[i++] = d.getFieldValue(MetacatIndex.MCIDFIELD).toString();
64
		}
65
		return docIDs;
66
	}
67
}
src/edu/ucsb/nceas/metacat/index/LuceneIndex.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import org.apache.lucene.analysis.standard.StandardAnalyzer;
4
import org.apache.lucene.document.Document;
5
import org.apache.lucene.document.Field;
6
import org.apache.lucene.document.StringField;
7
import org.apache.lucene.document.TextField;
8
import org.apache.lucene.index.*;
9
import org.apache.lucene.queryparser.classic.ParseException;
10
import org.apache.lucene.queryparser.classic.QueryParser;
11
import org.apache.lucene.search.IndexSearcher;
12
import org.apache.lucene.search.ScoreDoc;
13
import org.apache.lucene.search.TopDocs;
14
import org.apache.lucene.store.Directory;
15
import org.apache.lucene.store.FSDirectory;
16
import org.apache.lucene.util.Version;
17

  
18
import java.io.File;
19
import java.io.IOException;
20
import java.util.Map;
21

  
22
public class LuceneIndex implements GenericIndex {
23
	protected final StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
24
	protected final IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_41, analyzer);
25
	protected final QueryParser parser = new QueryParser(Version.LUCENE_41, MetacatIndex.MCIDFIELD, analyzer);
26
	protected IndexWriter writer = null;
27
	protected DirectoryReader reader = null;
28
	protected Directory index = null;
29

  
30
	public LuceneIndex(File fsIndex) {
31
		try {
32
			this.index = FSDirectory.open(fsIndex);
33
			this.cfg.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
34
			this.writer = new IndexWriter(this.index, this.cfg);
35
			this.reader = DirectoryReader.open(this.index);
36
		} catch (Exception e) {
37
			e.printStackTrace();
38
		}
39
	}
40

  
41
	public void insert(String docID, Map<String, String[]> fields) throws IOException {
42
		this.update(docID, fields);
43
	}
44

  
45
	public void update(String docID, Map<String, String[]> fields) throws IOException {
46
		Document d = new Document();
47
		d.add(new StringField(MetacatIndex.MCIDFIELD, docID, Field.Store.YES));
48
		for (String k : fields.keySet()) {
49
			for (String v : fields.get(k)) {
50
				d.add(new TextField(k, v, Field.Store.NO));
51
			}
52
		}
53
		this.writer.addDocument(d);
54
		this.writer.commit();
55
	}
56

  
57
	public void remove(String docID) throws IOException {
58
		this.writer.deleteDocuments(new Term(MetacatIndex.MCIDFIELD, docID));
59
		this.writer.commit();
60
	}
61

  
62
	public String[] query(String q) {
63
		try {
64
			DirectoryReader newReader = DirectoryReader.openIfChanged(this.reader);
65
			if (newReader != null) { // not sure if this is right...
66
				this.reader.close();
67
				this.reader = newReader;
68
			}
69
			IndexSearcher searcher = new IndexSearcher(this.reader);
70
			TopDocs docs = searcher.search(this.parser.parse(q), 100);
71
			String docIDs[] = new String[docs.scoreDocs.length];
72
			int i = 0;
73
			for (ScoreDoc sd : docs.scoreDocs) {
74
				Document d = searcher.doc(sd.doc);
75
				docIDs[i++] = d.getField(MetacatIndex.MCIDFIELD).toString();
76
			}
77
			return docIDs;
78
		} catch (IOException e) {
79
			e.printStackTrace();
80
		} catch (ParseException e) {
81
			e.printStackTrace();
82
		}
83
		return new String[0];
84
	}
85
}
src/edu/ucsb/nceas/metacat/index/GenericIndex.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import java.io.IOException;
4
import java.util.Map;
5

  
6
/**
7
 * The simplest of index interfaces. Create and remove associations between document
8
 * IDs and sets of tagged data; search with arbitrary query language.
9
 */
10
public interface GenericIndex {
11
	public void insert(String docID, final Map<String,String[]> fields) throws IOException;
12
	public void update(String docID, final Map<String,String[]> fields) throws IOException;
13
	public void remove(String docID) throws IOException;
14
	public String[] query(String q);
15
}
src/edu/ucsb/nceas/metacat/index/FieldDefReader.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileReader;
6
import java.io.IOException;
7
import java.util.ArrayList;
8
import java.util.List;
9

  
10
/**
11
 * Read field definitions from a file. Replacement for Spring-based config used in DataONE.
12
 * Presently uses csv-style text which is primitive and fragile; should probably be some
13
 * sort of xml to be properly javaee-esque. It's possible to use Spring configuration info
14
 * directly, but the parsing and evaluation involved is non-trivial, even though the data
15
 * involved is really quite simple. Thus at present our config info has been manually
16
 * generated in this format, based on the DataONE indexer's definitions.
17
 *
18
 * Oh Java, when will you get a decent object syntax?
19
 */
20
public class FieldDefReader {
21
	public static List<FieldSpec> read(File f) {
22
		ArrayList<FieldSpec> specs = new ArrayList<FieldSpec>(20);
23
		try {
24
			BufferedReader r = new BufferedReader(new FileReader(f));
25
			String s;
26
			while ((s = r.readLine()) != null) {
27
				String[] def = s.split(" ");
28
				D1IndexField.DataFormat df = D1IndexField.DataFormat.SINGLE;
29
				if (def[2].equals("set")) df = D1IndexField.DataFormat.SET;
30
				else if (def[2].equals("multi")) df = D1IndexField.DataFormat.MULTISET;
31
				D1IndexField.Conversion dc = D1IndexField.Conversion.NONE;
32
				if (def[3].equals("date")) dc = D1IndexField.Conversion.DATE;
33
				else if (def[3].equals("fgdc")) dc = D1IndexField.Conversion.FGDCDATE;
34
				else if (def[3].equals("lat")) dc = D1IndexField.Conversion.LATITUDE;
35
				else if (def[3].equals("lon")) dc = D1IndexField.Conversion.LONGITUDE;
36
				specs.add(new D1IndexField(def[0], def[1], df, dc));
37
			}
38
			r.close();
39
		} catch (IOException e) {
40
			// TODO: logs
41
			e.printStackTrace();
42
		}
43
		return specs;
44
	}
45
}
src/edu/ucsb/nceas/metacat/index/HttpComponentsClientHttpRequestFactory.java
1
//package edu.ucsb.nceas.metacat.index;
2
package org.springframework.http.client;
3

  
4
import org.apache.http.client.HttpClient;
5

  
6
/**
7
 * A fake implementation of a Spring framework object for use by the DataONE HttpService class.
8
 *
9
 */
10

  
11
public class HttpComponentsClientHttpRequestFactory {
12
	protected HttpClient client;
13
	public HttpComponentsClientHttpRequestFactory(HttpClient c) {
14
		this.client = c;
15
	}
16
	public HttpClient getHttpClient() {
17
		return client;
18
	}
19
}
src/edu/ucsb/nceas/metacat/index/MetacatIndex.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
//import edu.ucsb.nceas.metacat.util.SystemUtil;
4

  
5
import java.io.*;
6
import java.util.*;
7

  
8
/*
9
*
10
*/
11

  
12
public class MetacatIndex {
13
	// singleton, though it doesn't really need to be -- simplifies configuration a bit
14
	private static MetacatIndex instance = null;
15

  
16
	public MetacatIndex getInstance() {
17
		if (instance != null) {
18
			instance = new MetacatIndex();
19
		}
20
		return instance;
21
		// return new MetacatIndex();
22
	}
23

  
24
	public static final String MCIDFIELD = "metacat-id";
25
	public static final String MCIndexName = "index";
26
	protected GenericIndex index = null;
27
	protected String dataPath = null;
28
	protected List<FieldSpec> fields;
29
	// These are separated to more easily replicate the exact behavior of the DataONE indexer
30
	// but probably this is unnecessary and they can simply be agglomerated
31
	protected List<FieldSpec> d1SysFields, d1EmlFields, d1DryadFields, d1FgdcFields;
32

  
33
	private MetacatIndex() {
34
		this.dataPath = "/Users/brendan/metacat/"; //PropertyService.getProperty("application.datafilepath");
35
		this.index = new SolrjIndex("http://localhost:8983"); //PropertyService.getProperty("");
36
		this.d1SysFields = FieldDefReader.read(new File(this.dataPath + "d1sys"));
37
		this.d1EmlFields = FieldDefReader.read(new File(this.dataPath + "d1eml"));
38
		this.d1DryadFields = FieldDefReader.read(new File(this.dataPath + "d1dryad"));
39
		this.d1FgdcFields = FieldDefReader.read(new File(this.dataPath + "d1fgdc"));
40
		readMCIndexPaths();
41
	}
42

  
43
	protected void readMCIndexPaths() {
44
		List<String> paths = new ArrayList<String>();//SystemUtil.getPathsforIndexing(); //nullable?
45
		for (String p : paths) {
46
			this.fields.add(new XpathIndexField("mcidx_" + p, p));
47
		}
48
	}
49

  
50
	public void update(String docID, Reader doc) {
51
		Map<String, String[]> idx = new HashMap<String, String[]>();
52
		// this stuff is pretty gross, but it's done to match exactly the behavior of the D1
53
		// index processor. Probably isn't necessary and should be replaced by a generalized
54
		// dispatch by document/data type.
55
		if (DocType.isSysmeta(doc)) {
56
			idx.putAll(getFields(this.d1SysFields, doc));
57
			if (DocType.isSyseml(doc)) {
58
				idx.putAll(getFields(this.d1EmlFields, doc));
59
			} else if (DocType.isSysdryad(doc)) {
60
				idx.putAll(getFields(this.d1DryadFields, doc));
61
			} else if (DocType.isSysfgdc(doc)) {
62
				idx.putAll(getFields(this.d1FgdcFields, doc));
63
			}
64
		} else {
65
			if (DocType.isEml(doc)) {
66
				idx.putAll(getFields(this.d1EmlFields, doc));
67
			}
68
			idx.putAll(getFields(this.fields, doc));
69
		}
70
		try {
71
			index.update(docID, idx);
72
		} catch (IOException e) {
73
			// TODO: logs etc
74
			e.printStackTrace();
75
		}
76
	}
77

  
78
	public void remove(String docID) {
79
		try {
80
			index.remove(docID);
81
		} catch (IOException e) {
82
			// TODO: logs etc
83
			e.printStackTrace();
84
		}
85
	}
86

  
87
	public List<String> retrieve (String query) {
88
		String result[] = index.query(query);
89
		return new ArrayList<String>(Arrays.asList(result));
90
	}
91

  
92
	protected Map<String, String[]> getFields(List<FieldSpec> fields, Reader doc) {
93
		Map<String, String[]> idx = new HashMap<String, String[]>();
94
		for (FieldSpec fs : fields) {
95
			idx.put(fs.name, fs.extract(doc));
96
		}
97
		return idx;
98
	}
99
}
src/edu/ucsb/nceas/metacat/index/DocType.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import org.xml.sax.InputSource;
4

  
5
import javax.xml.xpath.*;
6
import java.io.Reader;
7
import java.util.ArrayList;
8
import java.util.List;
9

  
10
/**
11
 * Identify document classes for indexing. Replaces equivalent Spring-based configuration
12
 * used by DataONE indexer. It's not clear that we actually need to perfectly mimic this
13
 * stuff, but for the now we shall. Note that the replicated DataONE classifications
14
 * apply only to DataONE System Metadata documents. Additional classifiers have been
15
 * added for plain EML etc.
16
 *
17
 * There are better ways to configure this stuff, but this is in effect a direct transcript
18
 * of the DataONE material. Automatic extraction from Spring configuration is doable, but
19
 * not trivial.
20
 */
21
public class DocType {
22
	//
23
	private static final XPath xpath = XPathFactory.newInstance().newXPath();
24
	private static final List<XPathExpression> d1sys = new ArrayList<XPathExpression>(1);
25
	private static final List<XPathExpression> d1eml = new ArrayList<XPathExpression>(4);
26
	private static final List<XPathExpression> d1dryad = new ArrayList<XPathExpression>(1);
27
	private static final List<XPathExpression> d1fgdc = new ArrayList<XPathExpression>(3);
28
	private static final List<XPathExpression> eml = new ArrayList<XPathExpression>(4);
29
	static {
30
		xpath.setNamespaceContext(new MCXmlNamespace());
31

  
32
		try {
33
			d1sys.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata"));
34

  
35
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.0']"));
36
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.0.1']"));
37
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
38
			d1eml.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'eml://ecoinformatics.org/eml-2.1.1']"));
39

  
40
			d1dryad.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'http://purl.org/dryad/terms/']"));
41

  
42
			d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001-1998']"));
43
			d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.1-1999']"));
44
			d1fgdc.add(xpath.compile("/" + MCXmlNamespace.D1 + ":systemMetadata/formatId[text() = 'FGDC-STD-001.2-1999']"));
45

  
46
			eml.add(xpath.compile("/" + MCXmlNamespace.E200 + ":eml"));
47
			eml.add(xpath.compile("/" + MCXmlNamespace.E201 + ":eml"));
48
			eml.add(xpath.compile("/" + MCXmlNamespace.E210 + ":eml"));
49
			eml.add(xpath.compile("/" + MCXmlNamespace.E211 + ":eml"));
50
		} catch (XPathExpressionException e) {
51
			// TODO: logs
52
			e.printStackTrace();
53
		}
54
	}
55
	public static boolean isSysmeta(Reader in) {
56
		return check(d1sys, in);
57
	}
58
	public static boolean isSyseml(Reader in) {
59
		return check(d1eml, in);
60
	}
61
	public static boolean isSysdryad(Reader in) {
62
		return check(d1dryad, in);
63
	}
64
	public static boolean isSysfgdc(Reader in) {
65
		return check(d1fgdc, in);
66
	}
67
	public static boolean isEml(Reader in) {
68
		return check(eml, in);
69
	}
70

  
71
	public static boolean check(List<XPathExpression> exprs, Reader in) {
72
		InputSource src = new InputSource(in);
73
		try {
74
			for (XPathExpression x : exprs) {
75
				Boolean match = (Boolean) x.evaluate(src, XPathConstants.BOOLEAN);
76
				if (match != null && match.booleanValue()) {
77
					return true;
78
				}
79
			}
80
		} catch (XPathExpressionException e) {
81
		}
82
		return false;
83
	}
84
}
src/edu/ucsb/nceas/metacat/index/Embedded.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
4
import org.apache.solr.core.CoreContainer;
5

  
6
public class Embedded extends SolrjIndex {
7
	Embedded() {
8
		super("");
9
		// this is just for experiment; proper configuration will be different
10
		System.setProperty("solr.solr.home", "/Users/brendan/solrhome/");
11
		CoreContainer.Initializer init = new CoreContainer.Initializer();
12
		CoreContainer c = init.initialize();
13
		solr = new EmbeddedSolrServer(c, "mc-core");
14
	}
15
}
src/edu/ucsb/nceas/metacat/index/D1Index.java
1
package edu.ucsb.nceas.metacat.index;
2
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
3

  
4
import org.apache.http.impl.client.DefaultHttpClient;
5
import org.dataone.cn.indexer.solrhttp.HTTPService;
6
import org.dataone.cn.indexer.solrhttp.SolrDoc;
7
import org.dataone.cn.indexer.solrhttp.SolrElementAdd;
8
import org.dataone.cn.indexer.solrhttp.SolrElementField;
9

  
10
import java.io.IOException;
11
import java.util.ArrayList;
12
import java.util.List;
13
import java.util.Map;
14

  
15
/* * * * disabled for classpath complications * * * */
16
public class D1Index implements GenericIndex {
17
	protected final DefaultHttpClient client = new DefaultHttpClient();
18
	protected final HTTPService solrSvc =
19
			new HTTPService(new HttpComponentsClientHttpRequestFactory(this.client));
20
	protected String uri = "";
21

  
22
	D1Index(String uri) {
23
		this.uri = uri;
24
	}
25

  
26
	public void insert(String docID, Map<String, String[]> fields) throws IOException {
27
		this.update(docID, fields);
28
	}
29

  
30
	public void update(String docID, Map<String, String[]> fields) throws IOException {
31
		SolrDoc doc = new SolrDoc();
32
		// this works for our purposes, but violates DataONE expectations
33
		doc.addField(new SolrElementField(SolrElementField.FIELD_ID, docID));
34
		for (String k : fields.keySet()) {
35
			for (String v : fields.get(k)) {
36
				doc.addField(new SolrElementField(k, v));
37
			}
38
		}
39
		List<SolrDoc> docList = new ArrayList<SolrDoc>(1);
40
		docList.add(doc);
41
		solrSvc.sendUpdate(this.uri, new SolrElementAdd(docList));
42
	}
43

  
44
	public void remove(String docID) {
45
		solrSvc.sendSolrDelete(docID);
46
	}
47

  
48
	public String[] query(String q) throws IndexOutOfBoundsException {
49
		// the indexer's HttpService class doesn't provide general querying -- it's
50
		// an indexer only. Query handling is done by separate code in a different
51
		// part of the D1 architecture. That code is rather tightly bound to other
52
		// D1 elements and consequently challenging to integrate with Metacat.
53
		// For this and other reasons I feel that the other index interfaces are
54
		// a better choice for Metacat, so as yet this remains incomplete. It is
55
		// however possible to continue down this path.
56
		return new String[0];
57
	}
58
}
59
*/
src/edu/ucsb/nceas/metacat/index/MCIndexDocDef.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import java.util.*;
4

  
5
/*
6
 * Schema for document indexing. The Lucene/SOLR model of a 'document' is a bag of named
7
 * text objects.
8
 */
9
public abstract class MCIndexDocDef {
10
	protected Set<FieldSpec> fields;
11

  
12
	// do we really want this here? I guess this is the whole question..starting to think not.
13
	public abstract boolean applies();
14

  
15

  
16
	public MCIndexDocDef(Collection<FieldSpec> fields) {
17
		this.fields = new HashSet<FieldSpec>(fields);
18
	}
19

  
20
	public void add(FieldSpec f) {
21
		this.fields.add(f);
22
	}
23

  
24
	public void add(Collection<FieldSpec> f) {
25
		this.fields.addAll(f);
26
	}
27

  
28
	public void remove(FieldSpec f) {
29
		this.fields.remove(f);
30
	}
31

  
32
	public void remove(Collection<FieldSpec> f) {
33
		this.fields.removeAll(f);
34
	}
35
}
src/edu/ucsb/nceas/metacat/index/MCXmlNamespace.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import javax.xml.XMLConstants;
4
import javax.xml.namespace.NamespaceContext;
5
import java.util.HashMap;
6
import java.util.Iterator;
7
import java.util.Map;
8

  
9
/**
10
 */
11
public class MCXmlNamespace implements NamespaceContext {
12
	// Namespace prefixes for use in XPath expressions
13
	// The literal values assigned here should be distinct but are not significant -- they
14
	// may equally be "a" "b" "c" etc as long as the symbolic names are used consistently
15
	public static final String E200 = "e200";
16
	public static final String E211 = "e211";
17
	public static final String E210 = "e210";
18
	public static final String E201 = "e201";
19
	public static final String D1 = "d1";
20
	public static final String ORE = "ore";
21
	public static final String DC = "dc";
22
	public static final String DCT = "dct";
23
	public static final String FOAF = "foaf";
24

  
25
	private static final Map<String, String> prefixes;
26
	static {
27
		prefixes = new HashMap<String, String>();
28
		prefixes.put(E200, "eml://ecoinformatics.org/eml-2.0.0");
29
		prefixes.put(E201, "eml://ecoinformatics.org/eml-2.0.1");
30
		prefixes.put(E210, "eml://ecoinformatics.org/eml-2.1.0");
31
		prefixes.put(E211, "eml://ecoinformatics.org/eml-2.1.1");
32
		prefixes.put(D1, "http://ns.dataone.org/service/types/v1");
33
		prefixes.put(ORE, "http://www.openarchives.org/ore/terms/");
34
		prefixes.put(DC, "http://purl.org/dc/elements/1.1/");
35
		prefixes.put(DCT, "http://purl.org/dc/terms/");
36
		prefixes.put(FOAF, "http://xmlns.com/foaf/0.1/");
37
	}
38
	@Override
39
	public String getNamespaceURI(String prefix) {
40
		String ns = prefixes.get(prefix);
41
		return (ns != null) ? ns : XMLConstants.NULL_NS_URI;
42
	}
43
	@Override
44
	public String getPrefix(String namespaceURI) {
45
		throw new UnsupportedOperationException();
46
	}
47
	@Override
48
	public Iterator getPrefixes(String namespaceURI) {
49
		throw new UnsupportedOperationException();
50
	}
51
}
52

  
src/edu/ucsb/nceas/metacat/index/FieldSpec.java
1
package edu.ucsb.nceas.metacat.index;
2

  
3
import org.w3c.dom.Document;
4
import sun.beans.editors.ByteEditor;
5

  
6
import java.io.ByteArrayInputStream;
7
import java.io.InputStream;
8
import java.io.Reader;
9

  
10
/**
11
 */
12
public abstract class FieldSpec {
13
	public final String name;
14

  
15
	public FieldSpec(String name) {
16
		this.name = name;
17
	}
18
	/* */
19
	public abstract String[] extract(final Reader s);
20

  
21
	// public abstract String extract(ByteArrayInputStream s);
22
}

Also available in: Unified diff