6 |
6 |
import java.sql.SQLException;
|
7 |
7 |
import java.util.ArrayList;
|
8 |
8 |
import java.util.Arrays;
|
|
9 |
import java.util.HashMap;
|
9 |
10 |
import java.util.Iterator;
|
10 |
11 |
import java.util.List;
|
11 |
12 |
import java.util.Map;
|
12 |
13 |
import java.util.Vector;
|
13 |
14 |
|
|
15 |
import org.apache.commons.io.IOUtils;
|
14 |
16 |
import org.apache.log4j.Logger;
|
15 |
17 |
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
|
16 |
18 |
import org.dataone.service.types.v1.Identifier;
|
... | ... | |
28 |
30 |
import com.hp.hpl.jena.ontology.OntClass;
|
29 |
31 |
import com.hp.hpl.jena.ontology.OntModel;
|
30 |
32 |
import com.hp.hpl.jena.ontology.Ontology;
|
|
33 |
import com.hp.hpl.jena.query.Dataset;
|
|
34 |
import com.hp.hpl.jena.query.Query;
|
|
35 |
import com.hp.hpl.jena.query.QueryExecution;
|
|
36 |
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
|
37 |
import com.hp.hpl.jena.query.QueryFactory;
|
|
38 |
import com.hp.hpl.jena.query.QuerySolution;
|
|
39 |
import com.hp.hpl.jena.query.ResultSet;
|
31 |
40 |
import com.hp.hpl.jena.rdf.model.ModelFactory;
|
32 |
41 |
import com.hp.hpl.jena.rdf.model.Property;
|
33 |
42 |
import com.hp.hpl.jena.rdf.model.Resource;
|
|
43 |
import com.hp.hpl.jena.tdb.TDBFactory;
|
34 |
44 |
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
|
35 |
45 |
|
36 |
46 |
import edu.ucsb.nceas.metacat.DBUtil;
|
... | ... | |
40 |
50 |
import edu.ucsb.nceas.metacat.database.DBConnection;
|
41 |
51 |
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
|
42 |
52 |
import edu.ucsb.nceas.metacat.dataone.MNodeService;
|
|
53 |
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
|
43 |
54 |
import edu.ucsb.nceas.metacat.properties.PropertyService;
|
44 |
55 |
import edu.ucsb.nceas.metacat.replication.ReplicationService;
|
45 |
56 |
import edu.ucsb.nceas.metacat.util.DocumentUtil;
|
... | ... | |
70 |
81 |
|
71 |
82 |
// package visibility for testing only
|
72 |
83 |
boolean randomize = false;
|
|
84 |
|
|
85 |
public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
|
73 |
86 |
|
|
87 |
// generate an annotation for the metadata given
|
|
88 |
String rdfContent = this.generateAnnotation(metadataPid);
|
|
89 |
|
|
90 |
// load to triple store
|
|
91 |
Dataset dataset = TDBFactory.createDataset("./tbd");
|
|
92 |
|
|
93 |
// read the annotation into the triplestore
|
|
94 |
InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
|
|
95 |
String name = "http://annotation";
|
|
96 |
boolean loaded = dataset.containsNamedModel(name);
|
|
97 |
if (loaded) {
|
|
98 |
dataset.removeNamedModel(name);
|
|
99 |
loaded = false;
|
|
100 |
}
|
|
101 |
if (!loaded) {
|
|
102 |
OntModel ontModel = ModelFactory.createOntologyModel();
|
|
103 |
ontModel.read(source, name);
|
|
104 |
dataset.addNamedModel(name, ontModel);
|
|
105 |
}
|
|
106 |
|
|
107 |
// query for fields to add to index
|
|
108 |
Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
|
|
109 |
|
|
110 |
// TODO: look up the query to use (support multiple like in the indexing project)
|
|
111 |
String q = null;
|
|
112 |
|
|
113 |
q = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
|
114 |
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
|
115 |
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
|
|
116 |
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
|
|
117 |
+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
|
|
118 |
+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
|
|
119 |
+ "SELECT ?standard_sm ?id "
|
|
120 |
+ "FROM <$GRAPH_NAME> "
|
|
121 |
+ "WHERE { "
|
|
122 |
+ " ?measurement rdf:type oboe-core:Measurement . "
|
|
123 |
+ " ?measurement rdf:type ?restriction . "
|
|
124 |
+ " ?restriction owl:onProperty oboe-core:usesStandard . "
|
|
125 |
+ " ?restriction owl:allValuesFrom ?standard . "
|
|
126 |
+ " ?standard rdfs:subClassOf+ ?standard_sm . "
|
|
127 |
+ " ?standard_sm rdfs:subClassOf oboe-core:Standard . "
|
|
128 |
+ " ?annotation oa:hasBody ?measurement . "
|
|
129 |
+ " ?annotation oa:hasTarget ?target . "
|
|
130 |
+ " ?target oa:hasSource ?metadata . "
|
|
131 |
+ " ?metadata dcterms:identifier ?id . "
|
|
132 |
+ "}";
|
|
133 |
|
|
134 |
q = q.replaceAll("\\$GRAPH_NAME", name);
|
|
135 |
Query query = QueryFactory.create(q);
|
|
136 |
QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
|
|
137 |
ResultSet results = qexec.execSelect();
|
|
138 |
|
|
139 |
while (results.hasNext()) {
|
|
140 |
QuerySolution solution = results.next();
|
|
141 |
System.out.println(solution.toString());
|
|
142 |
|
|
143 |
// find the index document we are trying to augment with the annotation
|
|
144 |
if (solution.contains("id")) {
|
|
145 |
String id = solution.getLiteral("id").getString();
|
|
146 |
if (!id.equals(metadataPid.getValue())) {
|
|
147 |
// skip any solution that does not annotate the given pid
|
|
148 |
continue;
|
|
149 |
}
|
|
150 |
|
|
151 |
}
|
|
152 |
// loop through the solution variables, add an index value for each
|
|
153 |
Iterator<String> varNameIter = solution.varNames();
|
|
154 |
while (varNameIter.hasNext()) {
|
|
155 |
String key = varNameIter.next();
|
|
156 |
if (key.equals("id")) {
|
|
157 |
// don't include the id
|
|
158 |
continue;
|
|
159 |
}
|
|
160 |
String value = solution.get(key).toString();
|
|
161 |
List<Object> values = fields.get(key);
|
|
162 |
if (values == null) {
|
|
163 |
values = new ArrayList<Object>();
|
|
164 |
}
|
|
165 |
values.add(value);
|
|
166 |
fields.put(key, values);
|
|
167 |
}
|
|
168 |
}
|
|
169 |
|
|
170 |
// clean up the triple store
|
|
171 |
TDBFactory.release(dataset);
|
|
172 |
|
|
173 |
// add to index
|
|
174 |
MetacatSolrIndex.getInstance().submit(metadataPid, null, fields, true);
|
|
175 |
|
|
176 |
|
|
177 |
}
|
|
178 |
|
74 |
179 |
/**
|
75 |
180 |
* Generate annotation for given metadata identifier
|
76 |
181 |
* @param metadataPid
|
first pass at direct EML->semantic index method. Still produces an RDF model, but does not persist it in Metacat, only in the triplestore. Allows us to re-run without adding stale RDF to the MN store.