Revision 8763
Added by ben leinfelder about 10 years ago
DatapackageSummarizer.java | ||
---|---|---|
41 | 41 |
import com.hp.hpl.jena.rdf.model.Property; |
42 | 42 |
import com.hp.hpl.jena.rdf.model.Resource; |
43 | 43 |
import com.hp.hpl.jena.tdb.TDBFactory; |
44 |
import com.hp.hpl.jena.util.iterator.ExtendedIterator; |
|
45 | 44 |
|
46 | 45 |
import edu.ucsb.nceas.metacat.DBUtil; |
47 | 46 |
import edu.ucsb.nceas.metacat.DocumentImpl; |
... | ... | |
79 | 78 |
|
80 | 79 |
public static String OBOE_SBC = "OBOE-SBC"; |
81 | 80 |
|
82 |
// package visibility for testing only |
|
83 |
boolean randomize = false; |
|
84 |
|
|
85 | 81 |
public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception { |
86 | 82 |
|
87 | 83 |
// generate an annotation for the metadata given |
... | ... | |
92 | 88 |
|
93 | 89 |
// read the annotation into the triplestore |
94 | 90 |
InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8"); |
95 |
String name = "http://annotation";
|
|
91 |
String name = "http://annotation/" + metadataPid.getValue();
|
|
96 | 92 |
boolean loaded = dataset.containsNamedModel(name); |
97 | 93 |
if (loaded) { |
98 | 94 |
dataset.removeNamedModel(name); |
... | ... | |
108 | 104 |
Map<String, List<Object>> fields = new HashMap<String, List<Object>>(); |
109 | 105 |
|
110 | 106 |
// TODO: look up the query to use (support multiple like in the indexing project) |
111 |
String q = null; |
|
112 |
|
|
113 |
q = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " |
|
107 |
List<String> queries = new ArrayList<String>(); |
|
108 |
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " |
|
114 | 109 |
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> " |
115 | 110 |
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> " |
116 | 111 |
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> " |
117 | 112 |
+ "PREFIX oa: <http://www.w3.org/ns/oa#> " |
118 | 113 |
+ "PREFIX dcterms: <http://purl.org/dc/terms/> " |
119 |
+ "SELECT ?standard_sm ?id " |
|
114 |
+ "SELECT ?standard_sm ?pid "
|
|
120 | 115 |
+ "FROM <$GRAPH_NAME> " |
121 | 116 |
+ "WHERE { " |
122 | 117 |
+ " ?measurement rdf:type oboe-core:Measurement . " |
... | ... | |
128 | 123 |
+ " ?annotation oa:hasBody ?measurement . " |
129 | 124 |
+ " ?annotation oa:hasTarget ?target . " |
130 | 125 |
+ " ?target oa:hasSource ?metadata . " |
131 |
+ " ?metadata dcterms:identifier ?id . " |
|
132 |
+ "}"; |
|
133 |
|
|
134 |
q = q.replaceAll("\\$GRAPH_NAME", name); |
|
135 |
Query query = QueryFactory.create(q); |
|
136 |
QueryExecution qexec = QueryExecutionFactory.create(query, dataset); |
|
137 |
ResultSet results = qexec.execSelect(); |
|
138 |
|
|
139 |
while (results.hasNext()) { |
|
140 |
QuerySolution solution = results.next(); |
|
141 |
System.out.println(solution.toString()); |
|
126 |
+ " ?metadata dcterms:identifier ?pid . " |
|
127 |
+ "}"); |
|
128 |
|
|
129 |
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " |
|
130 |
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> " |
|
131 |
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> " |
|
132 |
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> " |
|
133 |
+ "PREFIX oa: <http://www.w3.org/ns/oa#> " |
|
134 |
+ "PREFIX dcterms: <http://purl.org/dc/terms/> " |
|
135 |
+ "SELECT ?characteristic_sm ?pid " |
|
136 |
+ "FROM <$GRAPH_NAME>" |
|
137 |
+ "WHERE { " |
|
138 |
+ " ?measurement rdf:type oboe-core:Measurement . " |
|
139 |
+ " ?measurement rdf:type ?restriction . " |
|
140 |
+ " ?restriction owl:onProperty oboe-core:ofCharacteristic . " |
|
141 |
+ " ?restriction owl:allValuesFrom ?characteristic . " |
|
142 |
+ " ?characteristic rdfs:subClassOf+ ?characteristic_sm . " |
|
143 |
+ " ?characteristic_sm rdfs:subClassOf oboe-core:Characteristic . " |
|
144 |
+ " ?annotation oa:hasBody ?measurement . " |
|
145 |
+ " ?annotation oa:hasTarget ?target . " |
|
146 |
+ " ?target oa:hasSource ?metadata . " |
|
147 |
+ " ?metadata dcterms:identifier ?pid . " |
|
148 |
+ "}"); |
|
149 |
|
|
150 |
for (String q: queries) { |
|
151 |
q = q.replaceAll("\\$GRAPH_NAME", name); |
|
152 |
Query query = QueryFactory.create(q); |
|
153 |
QueryExecution qexec = QueryExecutionFactory.create(query, dataset); |
|
154 |
ResultSet results = qexec.execSelect(); |
|
142 | 155 |
|
143 |
// find the index document we are trying to augment with the annotation |
|
144 |
if (solution.contains("id")) { |
|
145 |
String id = solution.getLiteral("id").getString(); |
|
146 |
if (!id.equals(metadataPid.getValue())) { |
|
147 |
// skip any solution that does not annotate the given pid |
|
148 |
continue; |
|
149 |
} |
|
156 |
while (results.hasNext()) { |
|
157 |
QuerySolution solution = results.next(); |
|
158 |
System.out.println(solution.toString()); |
|
150 | 159 |
|
151 |
}
|
|
152 |
// loop through the solution variables, add an index value for each
|
|
153 |
Iterator<String> varNameIter = solution.varNames();
|
|
154 |
while (varNameIter.hasNext()) {
|
|
155 |
String key = varNameIter.next();
|
|
156 |
if (key.equals("id")) {
|
|
157 |
// don't include the id
|
|
158 |
continue; |
|
160 |
// find the index document we are trying to augment with the annotation
|
|
161 |
if (solution.contains("pid")) {
|
|
162 |
String id = solution.getLiteral("pid").getString();
|
|
163 |
if (!id.equals(metadataPid.getValue())) {
|
|
164 |
// skip any solution that does not annotate the given pid
|
|
165 |
continue;
|
|
166 |
}
|
|
167 |
|
|
159 | 168 |
} |
160 |
String value = solution.get(key).toString(); |
|
161 |
List<Object> values = fields.get(key); |
|
162 |
if (values == null) { |
|
163 |
values = new ArrayList<Object>(); |
|
169 |
// loop through the solution variables, add an index value for each |
|
170 |
Iterator<String> varNameIter = solution.varNames(); |
|
171 |
while (varNameIter.hasNext()) { |
|
172 |
String key = varNameIter.next(); |
|
173 |
if (key.equals("pid")) { |
|
174 |
// don't include the id |
|
175 |
continue; |
|
176 |
} |
|
177 |
String value = solution.get(key).toString(); |
|
178 |
List<Object> values = fields.get(key); |
|
179 |
if (values == null) { |
|
180 |
values = new ArrayList<Object>(); |
|
181 |
} |
|
182 |
values.add(value); |
|
183 |
fields.put(key, values); |
|
164 | 184 |
} |
165 |
values.add(value); |
|
166 |
fields.put(key, values); |
|
167 | 185 |
} |
168 |
}
|
|
186 |
}
|
|
169 | 187 |
|
170 | 188 |
// clean up the triple store |
171 | 189 |
TDBFactory.release(dataset); |
... | ... | |
363 | 381 |
private Resource lookupStandard(OntClass standardClass, Attribute attribute) { |
364 | 382 |
// what's our unit? |
365 | 383 |
String unit = attribute.getUnit().toLowerCase(); |
384 |
|
|
385 |
/* |
|
386 |
boolean found = false; |
|
366 | 387 |
List<String> tokens = Arrays.asList(unit.split(" ")); |
367 |
|
|
368 |
boolean found = false; |
|
369 | 388 |
ExtendedIterator iter = standardClass.listSubClasses(false); |
370 |
if (randomize) { |
|
371 |
List subclasses = iter.toList(); |
|
372 |
int size = subclasses.size(); |
|
373 |
Long index = new Long(Math.round(Math.floor((Math.random() * (size-1))))); |
|
374 |
OntClass subclass = (OntClass) subclasses.get( index.intValue() ); |
|
375 |
return subclass; |
|
376 |
} |
|
377 | 389 |
while (iter.hasNext()) { |
378 | 390 |
OntClass subclass = (OntClass) iter.next(); |
379 | 391 |
String subclassName = subclass.getLocalName().toLowerCase(); |
... | ... | |
388 | 400 |
return subclass; |
389 | 401 |
} |
390 | 402 |
} |
403 |
*/ |
|
404 |
|
|
391 | 405 |
// try to look it up if we got this far |
392 | 406 |
return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC); |
393 | 407 |
} |
394 | 408 |
|
395 | 409 |
private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) { |
396 |
// what's our label?
|
|
410 |
// what are we looking for?
|
|
397 | 411 |
String label = attribute.getLabel().toLowerCase(); |
398 |
List<String> tokens = Arrays.asList(label.split(" ")); |
|
412 |
String definition = attribute.getDefinition(); |
|
413 |
String text = label + " " + definition; |
|
399 | 414 |
|
415 |
/* |
|
416 |
// find something that matches |
|
400 | 417 |
boolean found = false; |
401 |
// find something that matches
|
|
418 |
List<String> tokens = Arrays.asList(label.split(" "));
|
|
402 | 419 |
ExtendedIterator iter = characteristicClass.listSubClasses(); |
403 |
if (randomize) { |
|
404 |
List subclasses = iter.toList(); |
|
405 |
int size = subclasses.size(); |
|
406 |
Long index = new Long(Math.round(Math.floor((Math.random() * (size-1))))); |
|
407 |
OntClass subclass = (OntClass) subclasses.get( index.intValue() ); |
|
408 |
return subclass; |
|
409 |
} |
|
410 | 420 |
while (iter.hasNext()) { |
411 | 421 |
OntClass subclass = (OntClass) iter.next(); |
412 | 422 |
String subclassName = subclass.getLocalName().toLowerCase(); |
... | ... | |
421 | 431 |
return subclass; |
422 | 432 |
} |
423 | 433 |
} |
434 |
*/ |
|
424 | 435 |
|
425 |
// try to look it up if we got this far
|
|
426 |
return BioPortalService.lookupAnnotationClass(characteristicClass, attribute.getDefinition(), OBOE_SBC);
|
|
436 |
// try to look it up from the service
|
|
437 |
return BioPortalService.lookupAnnotationClass(characteristicClass, text, OBOE_SBC);
|
|
427 | 438 |
|
428 | 439 |
} |
429 | 440 |
|
Also available in: Unified diff
simplify lookup for classes and orcid. remove the "random" annotation code branches -- just too confusing to look at those bogus classes especially now that we have "real" generated annotations.