41 |
41 |
import com.hp.hpl.jena.rdf.model.Property;
|
42 |
42 |
import com.hp.hpl.jena.rdf.model.Resource;
|
43 |
43 |
import com.hp.hpl.jena.tdb.TDBFactory;
|
44 |
|
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
|
45 |
44 |
|
46 |
45 |
import edu.ucsb.nceas.metacat.DBUtil;
|
47 |
46 |
import edu.ucsb.nceas.metacat.DocumentImpl;
|
... | ... | |
79 |
78 |
|
80 |
79 |
public static String OBOE_SBC = "OBOE-SBC";
|
81 |
80 |
|
82 |
|
// package visibility for testing only
|
83 |
|
boolean randomize = false;
|
84 |
|
|
85 |
81 |
public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
|
86 |
82 |
|
87 |
83 |
// generate an annotation for the metadata given
|
... | ... | |
92 |
88 |
|
93 |
89 |
// read the annotation into the triplestore
|
94 |
90 |
InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
|
95 |
|
String name = "http://annotation";
|
|
91 |
String name = "http://annotation/" + metadataPid.getValue();
|
96 |
92 |
boolean loaded = dataset.containsNamedModel(name);
|
97 |
93 |
if (loaded) {
|
98 |
94 |
dataset.removeNamedModel(name);
|
... | ... | |
108 |
104 |
Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
|
109 |
105 |
|
110 |
106 |
// TODO: look up the query to use (support multiple like in the indexing project)
|
111 |
|
String q = null;
|
112 |
|
|
113 |
|
q = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
|
107 |
List<String> queries = new ArrayList<String>();
|
|
108 |
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
114 |
109 |
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
115 |
110 |
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
|
116 |
111 |
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
|
117 |
112 |
+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
|
118 |
113 |
+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
|
119 |
|
+ "SELECT ?standard_sm ?id "
|
|
114 |
+ "SELECT ?standard_sm ?pid "
|
120 |
115 |
+ "FROM <$GRAPH_NAME> "
|
121 |
116 |
+ "WHERE { "
|
122 |
117 |
+ " ?measurement rdf:type oboe-core:Measurement . "
|
... | ... | |
128 |
123 |
+ " ?annotation oa:hasBody ?measurement . "
|
129 |
124 |
+ " ?annotation oa:hasTarget ?target . "
|
130 |
125 |
+ " ?target oa:hasSource ?metadata . "
|
131 |
|
+ " ?metadata dcterms:identifier ?id . "
|
132 |
|
+ "}";
|
133 |
|
|
134 |
|
q = q.replaceAll("\\$GRAPH_NAME", name);
|
135 |
|
Query query = QueryFactory.create(q);
|
136 |
|
QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
|
137 |
|
ResultSet results = qexec.execSelect();
|
138 |
|
|
139 |
|
while (results.hasNext()) {
|
140 |
|
QuerySolution solution = results.next();
|
141 |
|
System.out.println(solution.toString());
|
|
126 |
+ " ?metadata dcterms:identifier ?pid . "
|
|
127 |
+ "}");
|
|
128 |
|
|
129 |
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
|
130 |
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
|
131 |
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
|
|
132 |
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
|
|
133 |
+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
|
|
134 |
+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
|
|
135 |
+ "SELECT ?characteristic_sm ?pid "
|
|
136 |
+ "FROM <$GRAPH_NAME>"
|
|
137 |
+ "WHERE { "
|
|
138 |
+ " ?measurement rdf:type oboe-core:Measurement . "
|
|
139 |
+ " ?measurement rdf:type ?restriction . "
|
|
140 |
+ " ?restriction owl:onProperty oboe-core:ofCharacteristic . "
|
|
141 |
+ " ?restriction owl:allValuesFrom ?characteristic . "
|
|
142 |
+ " ?characteristic rdfs:subClassOf+ ?characteristic_sm . "
|
|
143 |
+ " ?characteristic_sm rdfs:subClassOf oboe-core:Characteristic . "
|
|
144 |
+ " ?annotation oa:hasBody ?measurement . "
|
|
145 |
+ " ?annotation oa:hasTarget ?target . "
|
|
146 |
+ " ?target oa:hasSource ?metadata . "
|
|
147 |
+ " ?metadata dcterms:identifier ?pid . "
|
|
148 |
+ "}");
|
|
149 |
|
|
150 |
for (String q: queries) {
|
|
151 |
q = q.replaceAll("\\$GRAPH_NAME", name);
|
|
152 |
Query query = QueryFactory.create(q);
|
|
153 |
QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
|
|
154 |
ResultSet results = qexec.execSelect();
|
142 |
155 |
|
143 |
|
// find the index document we are trying to augment with the annotation
|
144 |
|
if (solution.contains("id")) {
|
145 |
|
String id = solution.getLiteral("id").getString();
|
146 |
|
if (!id.equals(metadataPid.getValue())) {
|
147 |
|
// skip any solution that does not annotate the given pid
|
148 |
|
continue;
|
149 |
|
}
|
|
156 |
while (results.hasNext()) {
|
|
157 |
QuerySolution solution = results.next();
|
|
158 |
System.out.println(solution.toString());
|
150 |
159 |
|
151 |
|
}
|
152 |
|
// loop through the solution variables, add an index value for each
|
153 |
|
Iterator<String> varNameIter = solution.varNames();
|
154 |
|
while (varNameIter.hasNext()) {
|
155 |
|
String key = varNameIter.next();
|
156 |
|
if (key.equals("id")) {
|
157 |
|
// don't include the id
|
158 |
|
continue;
|
|
160 |
// find the index document we are trying to augment with the annotation
|
|
161 |
if (solution.contains("pid")) {
|
|
162 |
String id = solution.getLiteral("pid").getString();
|
|
163 |
if (!id.equals(metadataPid.getValue())) {
|
|
164 |
// skip any solution that does not annotate the given pid
|
|
165 |
continue;
|
|
166 |
}
|
|
167 |
|
159 |
168 |
}
|
160 |
|
String value = solution.get(key).toString();
|
161 |
|
List<Object> values = fields.get(key);
|
162 |
|
if (values == null) {
|
163 |
|
values = new ArrayList<Object>();
|
|
169 |
// loop through the solution variables, add an index value for each
|
|
170 |
Iterator<String> varNameIter = solution.varNames();
|
|
171 |
while (varNameIter.hasNext()) {
|
|
172 |
String key = varNameIter.next();
|
|
173 |
if (key.equals("pid")) {
|
|
174 |
// don't include the id
|
|
175 |
continue;
|
|
176 |
}
|
|
177 |
String value = solution.get(key).toString();
|
|
178 |
List<Object> values = fields.get(key);
|
|
179 |
if (values == null) {
|
|
180 |
values = new ArrayList<Object>();
|
|
181 |
}
|
|
182 |
values.add(value);
|
|
183 |
fields.put(key, values);
|
164 |
184 |
}
|
165 |
|
values.add(value);
|
166 |
|
fields.put(key, values);
|
167 |
185 |
}
|
168 |
|
}
|
|
186 |
}
|
169 |
187 |
|
170 |
188 |
// clean up the triple store
|
171 |
189 |
TDBFactory.release(dataset);
|
... | ... | |
363 |
381 |
private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
|
364 |
382 |
// what's our unit?
|
365 |
383 |
String unit = attribute.getUnit().toLowerCase();
|
|
384 |
|
|
385 |
/*
|
|
386 |
boolean found = false;
|
366 |
387 |
List<String> tokens = Arrays.asList(unit.split(" "));
|
367 |
|
|
368 |
|
boolean found = false;
|
369 |
388 |
ExtendedIterator iter = standardClass.listSubClasses(false);
|
370 |
|
if (randomize) {
|
371 |
|
List subclasses = iter.toList();
|
372 |
|
int size = subclasses.size();
|
373 |
|
Long index = new Long(Math.round(Math.floor((Math.random() * (size-1)))));
|
374 |
|
OntClass subclass = (OntClass) subclasses.get( index.intValue() );
|
375 |
|
return subclass;
|
376 |
|
}
|
377 |
389 |
while (iter.hasNext()) {
|
378 |
390 |
OntClass subclass = (OntClass) iter.next();
|
379 |
391 |
String subclassName = subclass.getLocalName().toLowerCase();
|
... | ... | |
388 |
400 |
return subclass;
|
389 |
401 |
}
|
390 |
402 |
}
|
|
403 |
*/
|
|
404 |
|
391 |
405 |
// try to look it up if we got this far
|
392 |
406 |
return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC);
|
393 |
407 |
}
|
394 |
408 |
|
395 |
409 |
private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
|
396 |
|
// what's our label?
|
|
410 |
// what are we looking for?
|
397 |
411 |
String label = attribute.getLabel().toLowerCase();
|
398 |
|
List<String> tokens = Arrays.asList(label.split(" "));
|
|
412 |
String definition = attribute.getDefinition();
|
|
413 |
String text = label + " " + definition;
|
399 |
414 |
|
|
415 |
/*
|
|
416 |
// find something that matches
|
400 |
417 |
boolean found = false;
|
401 |
|
// find something that matches
|
|
418 |
List<String> tokens = Arrays.asList(label.split(" "));
|
402 |
419 |
ExtendedIterator iter = characteristicClass.listSubClasses();
|
403 |
|
if (randomize) {
|
404 |
|
List subclasses = iter.toList();
|
405 |
|
int size = subclasses.size();
|
406 |
|
Long index = new Long(Math.round(Math.floor((Math.random() * (size-1)))));
|
407 |
|
OntClass subclass = (OntClass) subclasses.get( index.intValue() );
|
408 |
|
return subclass;
|
409 |
|
}
|
410 |
420 |
while (iter.hasNext()) {
|
411 |
421 |
OntClass subclass = (OntClass) iter.next();
|
412 |
422 |
String subclassName = subclass.getLocalName().toLowerCase();
|
... | ... | |
421 |
431 |
return subclass;
|
422 |
432 |
}
|
423 |
433 |
}
|
|
434 |
*/
|
424 |
435 |
|
425 |
|
// try to look it up if we got this far
|
426 |
|
return BioPortalService.lookupAnnotationClass(characteristicClass, attribute.getDefinition(), OBOE_SBC);
|
|
436 |
// try to look it up from the service
|
|
437 |
return BioPortalService.lookupAnnotationClass(characteristicClass, text, OBOE_SBC);
|
427 |
438 |
|
428 |
439 |
}
|
429 |
440 |
|
simplify lookup for classes and orcid. remove the "random" annotation code branches -- just too confusing to look at those bogus classes especially now that we have "real" generated annotations.