Revision 8757
Added by ben leinfelder about 10 years ago
DatapackageSummarizer.java | ||
---|---|---|
6 | 6 |
import java.sql.SQLException; |
7 | 7 |
import java.util.ArrayList; |
8 | 8 |
import java.util.Arrays; |
9 |
import java.util.HashMap; |
|
9 | 10 |
import java.util.Iterator; |
10 | 11 |
import java.util.List; |
11 | 12 |
import java.util.Map; |
12 | 13 |
import java.util.Vector; |
13 | 14 |
|
15 |
import org.apache.commons.io.IOUtils; |
|
14 | 16 |
import org.apache.log4j.Logger; |
15 | 17 |
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest; |
16 | 18 |
import org.dataone.service.types.v1.Identifier; |
... | ... | |
28 | 30 |
import com.hp.hpl.jena.ontology.OntClass; |
29 | 31 |
import com.hp.hpl.jena.ontology.OntModel; |
30 | 32 |
import com.hp.hpl.jena.ontology.Ontology; |
33 |
import com.hp.hpl.jena.query.Dataset; |
|
34 |
import com.hp.hpl.jena.query.Query; |
|
35 |
import com.hp.hpl.jena.query.QueryExecution; |
|
36 |
import com.hp.hpl.jena.query.QueryExecutionFactory; |
|
37 |
import com.hp.hpl.jena.query.QueryFactory; |
|
38 |
import com.hp.hpl.jena.query.QuerySolution; |
|
39 |
import com.hp.hpl.jena.query.ResultSet; |
|
31 | 40 |
import com.hp.hpl.jena.rdf.model.ModelFactory; |
32 | 41 |
import com.hp.hpl.jena.rdf.model.Property; |
33 | 42 |
import com.hp.hpl.jena.rdf.model.Resource; |
43 |
import com.hp.hpl.jena.tdb.TDBFactory; |
|
34 | 44 |
import com.hp.hpl.jena.util.iterator.ExtendedIterator; |
35 | 45 |
|
36 | 46 |
import edu.ucsb.nceas.metacat.DBUtil; |
... | ... | |
40 | 50 |
import edu.ucsb.nceas.metacat.database.DBConnection; |
41 | 51 |
import edu.ucsb.nceas.metacat.database.DBConnectionPool; |
42 | 52 |
import edu.ucsb.nceas.metacat.dataone.MNodeService; |
53 |
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex; |
|
43 | 54 |
import edu.ucsb.nceas.metacat.properties.PropertyService; |
44 | 55 |
import edu.ucsb.nceas.metacat.replication.ReplicationService; |
45 | 56 |
import edu.ucsb.nceas.metacat.util.DocumentUtil; |
... | ... | |
70 | 81 |
|
71 | 82 |
// package visibility for testing only |
72 | 83 |
boolean randomize = false; |
84 |
|
|
85 |
public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception { |
|
73 | 86 |
|
87 |
// generate an annotation for the metadata given |
|
88 |
String rdfContent = this.generateAnnotation(metadataPid); |
|
89 |
|
|
90 |
// load to triple store |
|
91 |
Dataset dataset = TDBFactory.createDataset("./tbd"); |
|
92 |
|
|
93 |
// read the annotation into the triplestore |
|
94 |
InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8"); |
|
95 |
String name = "http://annotation"; |
|
96 |
boolean loaded = dataset.containsNamedModel(name); |
|
97 |
if (loaded) { |
|
98 |
dataset.removeNamedModel(name); |
|
99 |
loaded = false; |
|
100 |
} |
|
101 |
if (!loaded) { |
|
102 |
OntModel ontModel = ModelFactory.createOntologyModel(); |
|
103 |
ontModel.read(source, name); |
|
104 |
dataset.addNamedModel(name, ontModel); |
|
105 |
} |
|
106 |
|
|
107 |
// query for fields to add to index |
|
108 |
Map<String, List<Object>> fields = new HashMap<String, List<Object>>(); |
|
109 |
|
|
110 |
// TODO: look up the query to use (support multiple like in the indexing project) |
|
111 |
String q = null; |
|
112 |
|
|
113 |
q = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " |
|
114 |
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> " |
|
115 |
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> " |
|
116 |
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> " |
|
117 |
+ "PREFIX oa: <http://www.w3.org/ns/oa#> " |
|
118 |
+ "PREFIX dcterms: <http://purl.org/dc/terms/> " |
|
119 |
+ "SELECT ?standard_sm ?id " |
|
120 |
+ "FROM <$GRAPH_NAME> " |
|
121 |
+ "WHERE { " |
|
122 |
+ " ?measurement rdf:type oboe-core:Measurement . " |
|
123 |
+ " ?measurement rdf:type ?restriction . " |
|
124 |
+ " ?restriction owl:onProperty oboe-core:usesStandard . " |
|
125 |
+ " ?restriction owl:allValuesFrom ?standard . " |
|
126 |
+ " ?standard rdfs:subClassOf+ ?standard_sm . " |
|
127 |
+ " ?standard_sm rdfs:subClassOf oboe-core:Standard . " |
|
128 |
+ " ?annotation oa:hasBody ?measurement . " |
|
129 |
+ " ?annotation oa:hasTarget ?target . " |
|
130 |
+ " ?target oa:hasSource ?metadata . " |
|
131 |
+ " ?metadata dcterms:identifier ?id . " |
|
132 |
+ "}"; |
|
133 |
|
|
134 |
q = q.replaceAll("\\$GRAPH_NAME", name); |
|
135 |
Query query = QueryFactory.create(q); |
|
136 |
QueryExecution qexec = QueryExecutionFactory.create(query, dataset); |
|
137 |
ResultSet results = qexec.execSelect(); |
|
138 |
|
|
139 |
while (results.hasNext()) { |
|
140 |
QuerySolution solution = results.next(); |
|
141 |
System.out.println(solution.toString()); |
|
142 |
|
|
143 |
// find the index document we are trying to augment with the annotation |
|
144 |
if (solution.contains("id")) { |
|
145 |
String id = solution.getLiteral("id").getString(); |
|
146 |
if (!id.equals(metadataPid.getValue())) { |
|
147 |
// skip any solution that does not annotate the given pid |
|
148 |
continue; |
|
149 |
} |
|
150 |
|
|
151 |
} |
|
152 |
// loop through the solution variables, add an index value for each |
|
153 |
Iterator<String> varNameIter = solution.varNames(); |
|
154 |
while (varNameIter.hasNext()) { |
|
155 |
String key = varNameIter.next(); |
|
156 |
if (key.equals("id")) { |
|
157 |
// don't include the id |
|
158 |
continue; |
|
159 |
} |
|
160 |
String value = solution.get(key).toString(); |
|
161 |
List<Object> values = fields.get(key); |
|
162 |
if (values == null) { |
|
163 |
values = new ArrayList<Object>(); |
|
164 |
} |
|
165 |
values.add(value); |
|
166 |
fields.put(key, values); |
|
167 |
} |
|
168 |
} |
|
169 |
|
|
170 |
// clean up the triple store |
|
171 |
TDBFactory.release(dataset); |
|
172 |
|
|
173 |
// add to index |
|
174 |
MetacatSolrIndex.getInstance().submit(metadataPid, null, fields, true); |
|
175 |
|
|
176 |
|
|
177 |
} |
|
178 |
|
|
74 | 179 |
/** |
75 | 180 |
* Generate annotation for given metadata identifier |
76 | 181 |
* @param metadataPid |
Also available in: Unified diff
first pass at direct EML->semantic index method. Still produces an RDF model, but does not persist it in Metacat, only in the triplestore. Allows us to re-run without adding stale RDF to the MN store.