1
|
package edu.ucsb.nceas.metacat.index;
|
2
|
|
3
|
//import edu.ucsb.nceas.metacat.util.SystemUtil;
|
4
|
|
5
|
import java.io.*;
|
6
|
import java.util.*;
|
7
|
|
8
|
/*
|
9
|
*
|
10
|
*/
|
11
|
|
12
|
public class MetacatIndex {
|
13
|
// singleton, though it doesn't really need to be -- simplifies configuration a bit
|
14
|
private static MetacatIndex instance = null;
|
15
|
|
16
|
public MetacatIndex getInstance() {
|
17
|
if (instance != null) {
|
18
|
instance = new MetacatIndex();
|
19
|
}
|
20
|
return instance;
|
21
|
// return new MetacatIndex();
|
22
|
}
|
23
|
|
24
|
public static final String MCIDFIELD = "metacat-id";
|
25
|
public static final String MCIndexName = "index";
|
26
|
protected GenericIndex index = null;
|
27
|
protected String dataPath = null;
|
28
|
protected List<FieldSpec> fields;
|
29
|
// These are separated to more easily replicate the exact behavior of the DataONE indexer
|
30
|
// but probably this is unnecessary and they can simply be agglomerated
|
31
|
protected List<FieldSpec> d1SysFields, d1EmlFields, d1DryadFields, d1FgdcFields;
|
32
|
|
33
|
private MetacatIndex() {
|
34
|
this.dataPath = "/Users/brendan/metacat/"; //PropertyService.getProperty("application.datafilepath");
|
35
|
this.index = new SolrjIndex("http://localhost:8983"); //PropertyService.getProperty("");
|
36
|
this.d1SysFields = FieldDefReader.read(new File(this.dataPath + "d1sys"));
|
37
|
this.d1EmlFields = FieldDefReader.read(new File(this.dataPath + "d1eml"));
|
38
|
this.d1DryadFields = FieldDefReader.read(new File(this.dataPath + "d1dryad"));
|
39
|
this.d1FgdcFields = FieldDefReader.read(new File(this.dataPath + "d1fgdc"));
|
40
|
readMCIndexPaths();
|
41
|
}
|
42
|
|
43
|
protected void readMCIndexPaths() {
|
44
|
List<String> paths = new ArrayList<String>();//SystemUtil.getPathsforIndexing(); //nullable?
|
45
|
for (String p : paths) {
|
46
|
this.fields.add(new XpathIndexField("mcidx_" + p, p));
|
47
|
}
|
48
|
}
|
49
|
|
50
|
public void update(String docID, Reader doc) {
|
51
|
Map<String, String[]> idx = new HashMap<String, String[]>();
|
52
|
// this stuff is pretty gross, but it's done to match exactly the behavior of the D1
|
53
|
// index processor. Probably isn't necessary and should be replaced by a generalized
|
54
|
// dispatch by document/data type.
|
55
|
if (DocType.isSysmeta(doc)) {
|
56
|
idx.putAll(getFields(this.d1SysFields, doc));
|
57
|
if (DocType.isSyseml(doc)) {
|
58
|
idx.putAll(getFields(this.d1EmlFields, doc));
|
59
|
} else if (DocType.isSysdryad(doc)) {
|
60
|
idx.putAll(getFields(this.d1DryadFields, doc));
|
61
|
} else if (DocType.isSysfgdc(doc)) {
|
62
|
idx.putAll(getFields(this.d1FgdcFields, doc));
|
63
|
}
|
64
|
} else {
|
65
|
if (DocType.isEml(doc)) {
|
66
|
idx.putAll(getFields(this.d1EmlFields, doc));
|
67
|
}
|
68
|
idx.putAll(getFields(this.fields, doc));
|
69
|
}
|
70
|
try {
|
71
|
index.update(docID, idx);
|
72
|
} catch (IOException e) {
|
73
|
// TODO: logs etc
|
74
|
e.printStackTrace();
|
75
|
}
|
76
|
}
|
77
|
|
78
|
public void remove(String docID) {
|
79
|
try {
|
80
|
index.remove(docID);
|
81
|
} catch (IOException e) {
|
82
|
// TODO: logs etc
|
83
|
e.printStackTrace();
|
84
|
}
|
85
|
}
|
86
|
|
87
|
public List<String> retrieve (String query) {
|
88
|
String result[] = index.query(query);
|
89
|
return new ArrayList<String>(Arrays.asList(result));
|
90
|
}
|
91
|
|
92
|
protected Map<String, String[]> getFields(List<FieldSpec> fields, Reader doc) {
|
93
|
Map<String, String[]> idx = new HashMap<String, String[]>();
|
94
|
for (FieldSpec fs : fields) {
|
95
|
idx.put(fs.name, fs.extract(doc));
|
96
|
}
|
97
|
return idx;
|
98
|
}
|
99
|
}
|