Revision 10227
Added by Jing Tao over 7 years ago
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/annotation/MetacatRdfXmlSubprocessor.java | ||
---|---|---|
175 | 175 |
|
176 | 176 |
// get the triplestore dataset |
177 | 177 |
long start = System.currentTimeMillis(); |
178 |
Map<String, SolrDoc> mergedDocuments; |
|
178 | 179 |
Dataset dataset = TripleStoreService.getInstance().getDataset(); |
179 |
perfLog.log("RdfXmlSubprocess.process gets a dataset from tripe store service ", System.currentTimeMillis() - start); |
|
180 |
|
|
181 |
// read the annotation |
|
182 |
String indexDocId = indexDocument.getIdentifier(); |
|
183 |
String name = indexDocId; |
|
184 |
|
|
185 |
//Check if the identifier is a valid URI and if not, make it one by prepending "http://" |
|
186 |
URI nameURI; |
|
187 |
String scheme = null; |
|
188 | 180 |
try { |
189 |
nameURI = new URI(indexDocId); |
|
190 |
scheme = nameURI.getScheme(); |
|
181 |
perfLog.log("RdfXmlSubprocess.process gets a dataset from tripe store service ", System.currentTimeMillis() - start); |
|
191 | 182 |
|
192 |
} catch (URISyntaxException use) { |
|
193 |
// The identifier can't be parsed due to offending characters. It's not a URL |
|
183 |
// read the annotation |
|
184 |
String indexDocId = indexDocument.getIdentifier(); |
|
185 |
String name = indexDocId; |
|
186 |
|
|
187 |
//Check if the identifier is a valid URI and if not, make it one by prepending "http://" |
|
188 |
URI nameURI; |
|
189 |
String scheme = null; |
|
190 |
try { |
|
191 |
nameURI = new URI(indexDocId); |
|
192 |
scheme = nameURI.getScheme(); |
|
193 |
|
|
194 |
} catch (URISyntaxException use) { |
|
195 |
// The identifier can't be parsed due to offending characters. It's not a URL |
|
196 |
|
|
197 |
name = "https://cn.dataone.org/cn/v1/resolve/"+indexDocId; |
|
198 |
} |
|
194 | 199 |
|
195 |
name = "https://cn.dataone.org/cn/v1/resolve/"+indexDocId; |
|
196 |
} |
|
197 |
|
|
198 |
// The had no scheme prefix. It's not a URL |
|
199 |
if ((scheme == null) || (scheme.isEmpty())) { |
|
200 |
name = "https://cn.dataone.org/cn/v1/resolve/"+indexDocId; |
|
200 |
// The had no scheme prefix. It's not a URL |
|
201 |
if ((scheme == null) || (scheme.isEmpty())) { |
|
202 |
name = "https://cn.dataone.org/cn/v1/resolve/"+indexDocId; |
|
203 |
|
|
204 |
} |
|
201 | 205 |
|
202 |
} |
|
203 |
|
|
204 |
long startOntModel = System.currentTimeMillis(); |
|
205 |
boolean loaded = dataset.containsNamedModel(name); |
|
206 |
if (!loaded) { |
|
207 |
OntModel ontModel = ModelFactory.createOntologyModel(); |
|
208 |
ontModel.read(is, name); |
|
209 |
dataset.addNamedModel(name, ontModel); |
|
210 |
} |
|
211 |
perfLog.log("RdfXmlSubprocess.process adds ont-model ", System.currentTimeMillis() - startOntModel); |
|
212 |
//dataset.getDefaultModel().add(ontModel); |
|
213 |
|
|
214 |
// process each field query |
|
215 |
Map<String, SolrDoc> documentsToIndex = new HashMap<String, SolrDoc>(); |
|
216 |
long startField = System.currentTimeMillis(); |
|
217 |
for (ISolrDataField field : this.fieldList) { |
|
218 |
long filed = System.currentTimeMillis(); |
|
219 |
String q = null; |
|
220 |
if (field instanceof SparqlField) { |
|
221 |
q = ((SparqlField) field).getQuery(); |
|
222 |
q = q.replaceAll("\\$GRAPH_NAME", name); |
|
223 |
Query query = QueryFactory.create(q); |
|
224 |
log.trace("Executing SPARQL query:\n" + query.toString()); |
|
225 |
QueryExecution qexec = QueryExecutionFactory.create(query, dataset); |
|
226 |
ResultSet results = qexec.execSelect(); |
|
227 |
while (results.hasNext()) { |
|
228 |
SolrDoc solrDoc = null; |
|
229 |
QuerySolution solution = results.next(); |
|
230 |
log.trace(solution.toString()); |
|
231 |
|
|
232 |
// find the index document we are trying to augment with the annotation |
|
233 |
if (solution.contains("pid")) { |
|
234 |
String id = solution.getLiteral("pid").getString(); |
|
235 |
|
|
236 |
// TODO: check if anyone with permissions on the annotation document has write permission on the document we are annotating |
|
237 |
boolean statementAuthorized = true; |
|
238 |
if (!statementAuthorized) { |
|
239 |
continue; |
|
206 |
long startOntModel = System.currentTimeMillis(); |
|
207 |
boolean loaded = dataset.containsNamedModel(name); |
|
208 |
if (!loaded) { |
|
209 |
OntModel ontModel = ModelFactory.createOntologyModel(); |
|
210 |
ontModel.read(is, name); |
|
211 |
dataset.addNamedModel(name, ontModel); |
|
212 |
} |
|
213 |
perfLog.log("RdfXmlSubprocess.process adds ont-model ", System.currentTimeMillis() - startOntModel); |
|
214 |
//dataset.getDefaultModel().add(ontModel); |
|
215 |
|
|
216 |
// process each field query |
|
217 |
Map<String, SolrDoc> documentsToIndex = new HashMap<String, SolrDoc>(); |
|
218 |
long startField = System.currentTimeMillis(); |
|
219 |
for (ISolrDataField field : this.fieldList) { |
|
220 |
long filed = System.currentTimeMillis(); |
|
221 |
String q = null; |
|
222 |
if (field instanceof SparqlField) { |
|
223 |
q = ((SparqlField) field).getQuery(); |
|
224 |
q = q.replaceAll("\\$GRAPH_NAME", name); |
|
225 |
Query query = QueryFactory.create(q); |
|
226 |
log.trace("Executing SPARQL query:\n" + query.toString()); |
|
227 |
QueryExecution qexec = QueryExecutionFactory.create(query, dataset); |
|
228 |
ResultSet results = qexec.execSelect(); |
|
229 |
while (results.hasNext()) { |
|
230 |
SolrDoc solrDoc = null; |
|
231 |
QuerySolution solution = results.next(); |
|
232 |
log.trace(solution.toString()); |
|
233 |
|
|
234 |
// find the index document we are trying to augment with the annotation |
|
235 |
if (solution.contains("pid")) { |
|
236 |
String id = solution.getLiteral("pid").getString(); |
|
237 |
|
|
238 |
// TODO: check if anyone with permissions on the annotation document has write permission on the document we are annotating |
|
239 |
boolean statementAuthorized = true; |
|
240 |
if (!statementAuthorized) { |
|
241 |
continue; |
|
242 |
} |
|
243 |
|
|
244 |
// otherwise carry on with the indexing |
|
245 |
solrDoc = documentsToIndex.get(id); |
|
246 |
if (solrDoc == null) { |
|
247 |
solrDoc = new SolrDoc(); |
|
248 |
solrDoc.addField(new SolrElementField(SolrElementField.FIELD_ID, id)); |
|
249 |
documentsToIndex.put(id, solrDoc); |
|
250 |
} |
|
240 | 251 |
} |
241 |
|
|
242 |
// otherwise carry on with the indexing |
|
243 |
solrDoc = documentsToIndex.get(id); |
|
244 |
if (solrDoc == null) { |
|
245 |
solrDoc = new SolrDoc(); |
|
246 |
solrDoc.addField(new SolrElementField(SolrElementField.FIELD_ID, id)); |
|
247 |
documentsToIndex.put(id, solrDoc); |
|
252 |
|
|
253 |
// add the field to the index document |
|
254 |
if (solution.contains(field.getName())) { |
|
255 |
String value = solution.get(field.getName()).toString(); |
|
256 |
SolrElementField f = new SolrElementField(field.getName(), value); |
|
257 |
if (!solrDoc.hasFieldWithValue(f.getName(), f.getValue())) { |
|
258 |
solrDoc.addField(f); |
|
259 |
} |
|
248 | 260 |
} |
249 | 261 |
} |
250 |
|
|
251 |
// add the field to the index document |
|
252 |
if (solution.contains(field.getName())) { |
|
253 |
String value = solution.get(field.getName()).toString(); |
|
254 |
SolrElementField f = new SolrElementField(field.getName(), value); |
|
255 |
if (!solrDoc.hasFieldWithValue(f.getName(), f.getValue())) { |
|
256 |
solrDoc.addField(f); |
|
257 |
} |
|
258 |
} |
|
259 | 262 |
} |
263 |
perfLog.log("RdfXmlSubprocess.process process the field "+field.getName(), System.currentTimeMillis() - filed); |
|
260 | 264 |
} |
261 |
perfLog.log("RdfXmlSubprocess.process process the field "+field.getName(), System.currentTimeMillis() - filed); |
|
265 |
perfLog.log("RdfXmlSubprocess.process process the fields total ", System.currentTimeMillis() - startField); |
|
266 |
// clean up the triple store |
|
267 |
//TDBFactory.release(dataset); |
|
268 |
|
|
269 |
// merge the existing index with the new[er] values |
|
270 |
long getStart = System.currentTimeMillis(); |
|
271 |
Map<String, SolrDoc> existingDocuments = getSolrDocs(documentsToIndex.keySet()); |
|
272 |
perfLog.log("RdfXmlSubprocess.process get existing solr docs ", System.currentTimeMillis() - getStart); |
|
273 |
mergedDocuments = mergeDocs(documentsToIndex, existingDocuments); |
|
274 |
mergedDocuments.put(indexDocument.getIdentifier(), indexDocument); |
|
275 |
|
|
276 |
perfLog.log("RdfXmlSubprocess.process() total take ", System.currentTimeMillis() - start); |
|
277 |
} finally { |
|
278 |
try { |
|
279 |
TripleStoreService.getInstance().destoryDataset(dataset); |
|
280 |
} catch (Exception e) { |
|
281 |
log.warn("A tdb directory can't be removed since "+e.getMessage(), e); |
|
282 |
} |
|
262 | 283 |
} |
263 |
perfLog.log("RdfXmlSubprocess.process process the fields total ", System.currentTimeMillis() - startField); |
|
264 |
// clean up the triple store |
|
265 |
TDBFactory.release(dataset); |
|
266 |
|
|
267 |
// merge the existing index with the new[er] values |
|
268 |
long getStart = System.currentTimeMillis(); |
|
269 |
Map<String, SolrDoc> existingDocuments = getSolrDocs(documentsToIndex.keySet()); |
|
270 |
perfLog.log("RdfXmlSubprocess.process get existing solr docs ", System.currentTimeMillis() - getStart); |
|
271 |
Map<String, SolrDoc> mergedDocuments = mergeDocs(documentsToIndex, existingDocuments); |
|
272 |
mergedDocuments.put(indexDocument.getIdentifier(), indexDocument); |
|
273 |
|
|
274 |
perfLog.log("RdfXmlSubprocess.process() total take ", System.currentTimeMillis() - start); |
|
275 | 284 |
return new ArrayList<SolrDoc>(mergedDocuments.values()); |
276 | 285 |
} |
277 | 286 |
|
Also available in: Unified diff
Call the method TripleStoreService.destroy to delete the dataset.