1
|
package edu.ucsb.nceas.metacat.annotation;
|
2
|
|
3
|
import java.io.InputStream;
|
4
|
import java.io.StringWriter;
|
5
|
import java.sql.PreparedStatement;
|
6
|
import java.sql.SQLException;
|
7
|
import java.util.ArrayList;
|
8
|
import java.util.Arrays;
|
9
|
import java.util.HashMap;
|
10
|
import java.util.Iterator;
|
11
|
import java.util.List;
|
12
|
import java.util.Map;
|
13
|
import java.util.Vector;
|
14
|
|
15
|
import org.apache.commons.io.IOUtils;
|
16
|
import org.apache.log4j.Logger;
|
17
|
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
|
18
|
import org.dataone.service.types.v1.Identifier;
|
19
|
import org.dataone.service.types.v1.Session;
|
20
|
import org.dataone.service.types.v1.Subject;
|
21
|
import org.ecoinformatics.datamanager.parser.Attribute;
|
22
|
import org.ecoinformatics.datamanager.parser.DataPackage;
|
23
|
import org.ecoinformatics.datamanager.parser.Entity;
|
24
|
import org.ecoinformatics.datamanager.parser.Party;
|
25
|
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
|
26
|
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
|
27
|
|
28
|
import com.hp.hpl.jena.ontology.AllValuesFromRestriction;
|
29
|
import com.hp.hpl.jena.ontology.Individual;
|
30
|
import com.hp.hpl.jena.ontology.ObjectProperty;
|
31
|
import com.hp.hpl.jena.ontology.OntClass;
|
32
|
import com.hp.hpl.jena.ontology.OntDocumentManager;
|
33
|
import com.hp.hpl.jena.ontology.OntModel;
|
34
|
import com.hp.hpl.jena.ontology.Ontology;
|
35
|
import com.hp.hpl.jena.query.Dataset;
|
36
|
import com.hp.hpl.jena.query.Query;
|
37
|
import com.hp.hpl.jena.query.QueryExecution;
|
38
|
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
39
|
import com.hp.hpl.jena.query.QueryFactory;
|
40
|
import com.hp.hpl.jena.query.QuerySolution;
|
41
|
import com.hp.hpl.jena.query.ResultSet;
|
42
|
import com.hp.hpl.jena.rdf.model.ModelFactory;
|
43
|
import com.hp.hpl.jena.rdf.model.Property;
|
44
|
import com.hp.hpl.jena.rdf.model.Resource;
|
45
|
import com.hp.hpl.jena.tdb.TDBFactory;
|
46
|
|
47
|
import edu.ucsb.nceas.metacat.DBUtil;
|
48
|
import edu.ucsb.nceas.metacat.DocumentImpl;
|
49
|
import edu.ucsb.nceas.metacat.IdentifierManager;
|
50
|
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
|
51
|
import edu.ucsb.nceas.metacat.database.DBConnection;
|
52
|
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
|
53
|
import edu.ucsb.nceas.metacat.dataone.MNodeService;
|
54
|
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
|
55
|
import edu.ucsb.nceas.metacat.properties.PropertyService;
|
56
|
import edu.ucsb.nceas.metacat.replication.ReplicationService;
|
57
|
import edu.ucsb.nceas.metacat.util.DocumentUtil;
|
58
|
import edu.ucsb.nceas.utilities.SortedProperties;
|
59
|
|
60
|
public class DatapackageSummarizer {
|
61
|
|
62
|
private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
|
63
|
|
64
|
public static String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
65
|
public static String rdfs = "http://www.w3.org/2000/01/rdf-schema#";
|
66
|
public static String owl = "http://www.w3.org/2002/07/owl#";
|
67
|
public static String oboe = "http://ecoinformatics.org/oboe/oboe.1.0/oboe.owl#";
|
68
|
public static String oboe_core = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#";
|
69
|
public static String oboe_characteristics = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-characteristics.owl#";
|
70
|
public static String oboe_sbc = "http://ecoinformatics.org/oboe-ext/sbclter.1.0/oboe-sbclter.owl#";
|
71
|
public static String oa = "http://www.w3.org/ns/oa#";
|
72
|
public static String oa_source = "http://www.w3.org/ns/oa.rdf";
|
73
|
public static String dcterms = "http://purl.org/dc/terms/";
|
74
|
public static String dcterms_source = "http://dublincore.org/2012/06/14/dcterms.rdf";
|
75
|
public static String foaf = "http://xmlns.com/foaf/0.1/";
|
76
|
public static String foaf_source = "http://xmlns.com/foaf/spec/index.rdf";
|
77
|
public static String prov = "http://www.w3.org/ns/prov#";
|
78
|
public static String prov_source = "http://www.w3.org/ns/prov.owl";
|
79
|
public static String cito = "http://purl.org/spar/cito/";
|
80
|
|
81
|
public static String OBOE_SBC = "OBOE-SBC";
|
82
|
|
83
|
private static boolean cacheInitialized;
|
84
|
|
85
|
private static void initializeCache() {
|
86
|
if (!cacheInitialized) {
|
87
|
// cache the ontologies we use
|
88
|
OntDocumentManager.getInstance().addModel(oboe, ModelFactory.createOntologyModel().read(oboe));
|
89
|
OntDocumentManager.getInstance().addModel(oboe_sbc, ModelFactory.createOntologyModel().read(oboe_sbc));
|
90
|
OntDocumentManager.getInstance().addModel(oa, ModelFactory.createOntologyModel().read(oa_source));
|
91
|
OntDocumentManager.getInstance().addModel(dcterms, ModelFactory.createOntologyModel().read(dcterms_source));
|
92
|
OntDocumentManager.getInstance().addModel(foaf, ModelFactory.createOntologyModel().read(foaf_source));
|
93
|
OntDocumentManager.getInstance().addModel(prov, ModelFactory.createOntologyModel().read(prov));
|
94
|
OntDocumentManager.getInstance().addModel(cito, ModelFactory.createOntologyModel().read(cito));
|
95
|
cacheInitialized = true;
|
96
|
}
|
97
|
}
|
98
|
|
99
|
public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
|
100
|
|
101
|
// generate an annotation for the metadata given
|
102
|
String rdfContent = this.generateAnnotation(metadataPid);
|
103
|
|
104
|
// load to triple store
|
105
|
//Dataset dataset = TDBFactory.createDataset("./tbd");
|
106
|
Dataset dataset = TDBFactory.createDataset();
|
107
|
//Dataset dataset = DatasetFactory.createMem();
|
108
|
|
109
|
// read the annotation into the triplestore
|
110
|
InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
|
111
|
String name = "http://annotation/" + metadataPid.getValue();
|
112
|
boolean loaded = dataset.containsNamedModel(name);
|
113
|
if (loaded) {
|
114
|
dataset.removeNamedModel(name);
|
115
|
loaded = false;
|
116
|
}
|
117
|
OntModel ontModel = null;
|
118
|
if (!loaded) {
|
119
|
ontModel = ModelFactory.createOntologyModel();
|
120
|
ontModel.read(source, name);
|
121
|
dataset.addNamedModel(name, ontModel);
|
122
|
}
|
123
|
|
124
|
// query for fields to add to index
|
125
|
Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
|
126
|
|
127
|
// TODO: look up the query to use (support multiple like in the indexing project)
|
128
|
List<String> queries = new ArrayList<String>();
|
129
|
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
130
|
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
131
|
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
|
132
|
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
|
133
|
+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
|
134
|
+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
|
135
|
+ "SELECT ?standard_sm ?pid "
|
136
|
+ "FROM <$GRAPH_NAME> "
|
137
|
+ "WHERE { "
|
138
|
+ " ?measurement rdf:type oboe-core:Measurement . "
|
139
|
+ " ?measurement rdf:type ?restriction . "
|
140
|
+ " ?restriction owl:onProperty oboe-core:usesStandard . "
|
141
|
+ " ?restriction owl:allValuesFrom ?standard . "
|
142
|
+ " ?standard rdfs:subClassOf+ ?standard_sm . "
|
143
|
+ " ?standard_sm rdfs:subClassOf oboe-core:Standard . "
|
144
|
+ " ?annotation oa:hasBody ?measurement . "
|
145
|
+ " ?annotation oa:hasTarget ?target . "
|
146
|
+ " ?target oa:hasSource ?metadata . "
|
147
|
+ " ?metadata dcterms:identifier ?pid . "
|
148
|
+ "}");
|
149
|
|
150
|
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
151
|
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
152
|
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
|
153
|
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
|
154
|
+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
|
155
|
+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
|
156
|
+ "SELECT ?characteristic_sm ?pid "
|
157
|
+ "FROM <$GRAPH_NAME>"
|
158
|
+ "WHERE { "
|
159
|
+ " ?measurement rdf:type oboe-core:Measurement . "
|
160
|
+ " ?measurement rdf:type ?restriction . "
|
161
|
+ " ?restriction owl:onProperty oboe-core:ofCharacteristic . "
|
162
|
+ " ?restriction owl:allValuesFrom ?characteristic . "
|
163
|
+ " ?characteristic rdfs:subClassOf+ ?characteristic_sm . "
|
164
|
+ " ?characteristic_sm rdfs:subClassOf oboe-core:Characteristic . "
|
165
|
+ " ?annotation oa:hasBody ?measurement . "
|
166
|
+ " ?annotation oa:hasTarget ?target . "
|
167
|
+ " ?target oa:hasSource ?metadata . "
|
168
|
+ " ?metadata dcterms:identifier ?pid . "
|
169
|
+ "}");
|
170
|
|
171
|
for (String q: queries) {
|
172
|
q = q.replaceAll("\\$GRAPH_NAME", name);
|
173
|
Query query = QueryFactory.create(q);
|
174
|
QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
|
175
|
ResultSet results = qexec.execSelect();
|
176
|
|
177
|
while (results.hasNext()) {
|
178
|
QuerySolution solution = results.next();
|
179
|
System.out.println(solution.toString());
|
180
|
|
181
|
// find the index document we are trying to augment with the annotation
|
182
|
if (solution.contains("pid")) {
|
183
|
String id = solution.getLiteral("pid").getString();
|
184
|
if (!id.equals(metadataPid.getValue())) {
|
185
|
// skip any solution that does not annotate the given pid
|
186
|
continue;
|
187
|
}
|
188
|
|
189
|
}
|
190
|
// loop through the solution variables, add an index value for each
|
191
|
Iterator<String> varNameIter = solution.varNames();
|
192
|
while (varNameIter.hasNext()) {
|
193
|
String key = varNameIter.next();
|
194
|
if (key.equals("pid")) {
|
195
|
// don't include the id
|
196
|
continue;
|
197
|
}
|
198
|
String value = solution.get(key).toString();
|
199
|
List<Object> values = fields.get(key);
|
200
|
if (values == null) {
|
201
|
values = new ArrayList<Object>();
|
202
|
}
|
203
|
values.add(value);
|
204
|
fields.put(key, values);
|
205
|
}
|
206
|
}
|
207
|
}
|
208
|
|
209
|
// remove the graph to save storage
|
210
|
// ontModel.removeAll();
|
211
|
// ontModel.commit();
|
212
|
// ontModel.close();
|
213
|
dataset.removeNamedModel(name);
|
214
|
|
215
|
// clean up the triple store
|
216
|
TDBFactory.release(dataset);
|
217
|
|
218
|
// add to index
|
219
|
MetacatSolrIndex.getInstance().submit(metadataPid, null, fields, true);
|
220
|
|
221
|
|
222
|
}
|
223
|
|
224
|
/**
|
225
|
* Generate annotation for given metadata identifier
|
226
|
* @param metadataPid
|
227
|
*/
|
228
|
public String generateAnnotation(Identifier metadataPid) throws Exception {
|
229
|
|
230
|
DataPackage dataPackage = this.getDataPackage(metadataPid);
|
231
|
|
232
|
OntModel m = ModelFactory.createOntologyModel();
|
233
|
Ontology ont = m.createOntology("http://annotation/" + metadataPid.getValue());
|
234
|
|
235
|
// TODO: import the ontologies we use
|
236
|
initializeCache();
|
237
|
|
238
|
ont.addImport(m.createResource(oboe));
|
239
|
m.addSubModel(OntDocumentManager.getInstance().getModel(oboe));
|
240
|
|
241
|
ont.addImport(m.createResource(oboe_sbc));
|
242
|
m.addSubModel(OntDocumentManager.getInstance().getModel(oboe_sbc));
|
243
|
|
244
|
ont.addImport(m.createResource(oa));
|
245
|
m.addSubModel(OntDocumentManager.getInstance().getModel(oa));
|
246
|
|
247
|
ont.addImport(m.createResource(dcterms));
|
248
|
m.addSubModel(OntDocumentManager.getInstance().getModel(dcterms));
|
249
|
|
250
|
ont.addImport(m.createResource(foaf));
|
251
|
m.addSubModel(OntDocumentManager.getInstance().getModel(foaf));
|
252
|
|
253
|
ont.addImport(m.createResource(prov));
|
254
|
//m.addSubModel(ModelFactory.createOntologyModel().read(prov_source));
|
255
|
|
256
|
ont.addImport(m.createResource(cito));
|
257
|
|
258
|
// properties
|
259
|
ObjectProperty hasBodyProperty = m.getObjectProperty(oa + "hasBody");
|
260
|
ObjectProperty hasTargetProperty = m.getObjectProperty(oa + "hasTarget");
|
261
|
ObjectProperty hasSourceProperty = m.getObjectProperty(oa + "hasSource");
|
262
|
ObjectProperty hasSelectorProperty = m.getObjectProperty(oa + "hasSelector");
|
263
|
ObjectProperty annotatedByProperty = m.getObjectProperty(oa + "annotatedBy");
|
264
|
Property identifierProperty = m.getProperty(dcterms + "identifier");
|
265
|
Property conformsToProperty = m.getProperty(dcterms + "conformsTo");
|
266
|
Property wasAttributedTo = m.getProperty(prov + "wasAttributedTo");
|
267
|
Property nameProperty = m.getProperty(foaf + "name");
|
268
|
Property rdfValue = m.getProperty(rdf + "value");
|
269
|
|
270
|
ObjectProperty ofCharacteristic = m.getObjectProperty(oboe_core + "ofCharacteristic");
|
271
|
ObjectProperty usesStandard = m.getObjectProperty(oboe_core + "usesStandard");
|
272
|
ObjectProperty ofEntity = m.getObjectProperty(oboe_core + "ofEntity");
|
273
|
ObjectProperty hasMeasurement = m.getObjectProperty(oboe_core + "hasMeasurement");
|
274
|
|
275
|
// classes
|
276
|
OntClass entityClass = m.getOntClass(oboe_core + "Entity");
|
277
|
OntClass observationClass = m.getOntClass(oboe_core + "Observation");
|
278
|
OntClass measurementClass = m.getOntClass(oboe_core + "Measurement");
|
279
|
OntClass characteristicClass = m.getOntClass(oboe_core + "Characteristic");
|
280
|
OntClass standardClass = m.getOntClass(oboe_core + "Standard");
|
281
|
|
282
|
Resource annotationClass = m.getOntClass(oa + "Annotation");
|
283
|
Resource specificResourceClass = m.getOntClass(oa + "SpecificResource");
|
284
|
Resource fragmentSelectorClass = m.getOntClass(oa + "FragmentSelector");
|
285
|
Resource provEntityClass = m.getResource(prov + "Entity");
|
286
|
Resource personClass = m.getResource(prov + "Person");
|
287
|
|
288
|
// these apply to every attribute annotation
|
289
|
Individual meta1 = m.createIndividual(ont.getURI() + "#meta", provEntityClass);
|
290
|
meta1.addProperty(identifierProperty, metadataPid.getValue());
|
291
|
|
292
|
// decide who should be credited with the package
|
293
|
Individual p1 = null;
|
294
|
|
295
|
// look up creators from the EML metadata
|
296
|
List<Party> creators = dataPackage.getCreators();
|
297
|
//creators = Arrays.asList("Matthew Jones");
|
298
|
if (creators != null && creators.size() > 0) {
|
299
|
// use an orcid if we can find one from their system
|
300
|
String orcidUri = OrcidService.lookupOrcid(creators.get(0).getOrganization(), creators.get(0).getSurName(), creators.get(0).getGivenNames(), null);
|
301
|
if (orcidUri != null) {
|
302
|
p1 = m.createIndividual(orcidUri, personClass);
|
303
|
p1.addProperty(identifierProperty, orcidUri);
|
304
|
} else {
|
305
|
p1 = m.createIndividual(ont.getURI() + "#person", personClass);
|
306
|
}
|
307
|
// include the name we have in the metadata
|
308
|
if (creators.get(0).getSurName() != null) {
|
309
|
p1.addProperty(nameProperty, creators.get(0).getSurName());
|
310
|
} else if (creators.get(0).getOrganization() != null) {
|
311
|
p1.addProperty(nameProperty, creators.get(0).getOrganization());
|
312
|
}
|
313
|
}
|
314
|
|
315
|
// attribute the package to this creator if we have one
|
316
|
if (p1 != null) {
|
317
|
meta1.addProperty(wasAttributedTo, p1);
|
318
|
}
|
319
|
|
320
|
// loop through the tables and attributes
|
321
|
int entityCount = 1;
|
322
|
Entity[] entities = dataPackage.getEntityList();
|
323
|
if (entities != null) {
|
324
|
for (Entity entity: entities) {
|
325
|
String entityName = entity.getName();
|
326
|
|
327
|
Individual o1 = m.createIndividual(ont.getURI() + "#observation" + entityCount, observationClass);
|
328
|
Resource entityConcept = lookupEntity(entityClass, entity);
|
329
|
if (entityConcept != null) {
|
330
|
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofEntity, entityConcept);
|
331
|
o1.addOntClass(avfr);
|
332
|
}
|
333
|
|
334
|
logMetacat.debug("Entity name: " + entityName);
|
335
|
Attribute[] attributes = entity.getAttributeList().getAttributes();
|
336
|
int attributeCount = 1;
|
337
|
if (attributes != null) {
|
338
|
for (Attribute attribute: attributes) {
|
339
|
|
340
|
// for naming the individuals uniquely
|
341
|
String cnt = entityCount + "_" + attributeCount;
|
342
|
|
343
|
String attributeName = attribute.getName();
|
344
|
String attributeLabel = attribute.getLabel();
|
345
|
String attributeDefinition = attribute.getDefinition();
|
346
|
String attributeType = attribute.getAttributeType();
|
347
|
String attributeScale = attribute.getMeasurementScale();
|
348
|
String attributeUnitType = attribute.getUnitType();
|
349
|
String attributeUnit = attribute.getUnit();
|
350
|
String attributeDomain = attribute.getDomain().getClass().getSimpleName();
|
351
|
|
352
|
logMetacat.debug("Attribute name: " + attributeName);
|
353
|
logMetacat.debug("Attribute label: " + attributeLabel);
|
354
|
logMetacat.debug("Attribute definition: " + attributeDefinition);
|
355
|
logMetacat.debug("Attribute type: " + attributeType);
|
356
|
logMetacat.debug("Attribute scale: " + attributeScale);
|
357
|
logMetacat.debug("Attribute unit type: " + attributeUnitType);
|
358
|
logMetacat.debug("Attribute unit: " + attributeUnit);
|
359
|
logMetacat.debug("Attribute domain: " + attributeDomain);
|
360
|
|
361
|
// look up the characteristic or standard subclasses
|
362
|
Resource standard = this.lookupStandard(standardClass, attribute);
|
363
|
Resource characteristic = this.lookupCharacteristic(characteristicClass, attribute);
|
364
|
|
365
|
if (standard != null || characteristic != null) {
|
366
|
|
367
|
// instances
|
368
|
Individual m1 = m.createIndividual(ont.getURI() + "#measurement" + cnt, measurementClass);
|
369
|
Individual a1 = m.createIndividual(ont.getURI() + "#annotation" + cnt, annotationClass);
|
370
|
Individual t1 = m.createIndividual(ont.getURI() + "#target" + cnt, specificResourceClass);
|
371
|
String xpointer = "xpointer(/eml/dataSet/dataTable[" + entityCount + "]/attributeList/attribute[" + attributeCount + "])";
|
372
|
Individual s1 = m.createIndividual(ont.getURI() + "#" + xpointer, fragmentSelectorClass);
|
373
|
s1.addLiteral(rdfValue, xpointer);
|
374
|
s1.addProperty(conformsToProperty, "http://tools.ietf.org/rfc/rfc3023");
|
375
|
//s1.addProperty(conformsToProperty, "http://www.w3.org/TR/xptr/");
|
376
|
|
377
|
|
378
|
// statements about the annotation
|
379
|
a1.addProperty(hasBodyProperty, m1);
|
380
|
a1.addProperty(hasTargetProperty, t1);
|
381
|
t1.addProperty(hasSourceProperty, meta1);
|
382
|
t1.addProperty(hasSelectorProperty, s1);
|
383
|
//a1.addProperty(annotatedByProperty, p1);
|
384
|
|
385
|
// describe the measurement in terms of restrictions
|
386
|
if (standard != null) {
|
387
|
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, usesStandard, standard);
|
388
|
m1.addOntClass(avfr);
|
389
|
}
|
390
|
if (characteristic != null) {
|
391
|
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofCharacteristic, characteristic);
|
392
|
m1.addOntClass(avfr);
|
393
|
}
|
394
|
|
395
|
// attach to the observation
|
396
|
// TODO: evaluate whether the measurement can apply to the given observed entity
|
397
|
o1.addProperty(hasMeasurement, m1);
|
398
|
}
|
399
|
attributeCount++;
|
400
|
|
401
|
}
|
402
|
}
|
403
|
entityCount++;
|
404
|
}
|
405
|
}
|
406
|
|
407
|
StringWriter sw = new StringWriter();
|
408
|
// only write the base model
|
409
|
//m.write(sw, "RDF/XML-ABBREV");
|
410
|
m.write(sw, null);
|
411
|
|
412
|
return sw.toString();
|
413
|
|
414
|
}
|
415
|
|
416
|
private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
|
417
|
// what's our unit?
|
418
|
String unit = attribute.getUnit().toLowerCase();
|
419
|
|
420
|
/*
|
421
|
boolean found = false;
|
422
|
List<String> tokens = Arrays.asList(unit.split(" "));
|
423
|
ExtendedIterator iter = standardClass.listSubClasses(false);
|
424
|
while (iter.hasNext()) {
|
425
|
OntClass subclass = (OntClass) iter.next();
|
426
|
String subclassName = subclass.getLocalName().toLowerCase();
|
427
|
logMetacat.debug("subclass: " + subclassName);
|
428
|
if (tokens.contains(subclassName)) {
|
429
|
found = true;
|
430
|
}
|
431
|
if (subclass.hasLabel(unit, null)) {
|
432
|
found = true;
|
433
|
}
|
434
|
if (found) {
|
435
|
return subclass;
|
436
|
}
|
437
|
}
|
438
|
*/
|
439
|
|
440
|
// try to look it up if we got this far
|
441
|
return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC);
|
442
|
}
|
443
|
|
444
|
private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
|
445
|
// what are we looking for?
|
446
|
String label = attribute.getLabel().toLowerCase();
|
447
|
String definition = attribute.getDefinition();
|
448
|
String text = label + " " + definition;
|
449
|
|
450
|
/*
|
451
|
// find something that matches
|
452
|
boolean found = false;
|
453
|
List<String> tokens = Arrays.asList(label.split(" "));
|
454
|
ExtendedIterator iter = characteristicClass.listSubClasses();
|
455
|
while (iter.hasNext()) {
|
456
|
OntClass subclass = (OntClass) iter.next();
|
457
|
String subclassName = subclass.getLocalName().toLowerCase();
|
458
|
logMetacat.debug("subclass: " + subclassName);
|
459
|
if (tokens.contains(subclassName)) {
|
460
|
found = true;
|
461
|
}
|
462
|
if (subclass.hasLabel(label, null)) {
|
463
|
found = true;
|
464
|
}
|
465
|
if (found) {
|
466
|
return subclass;
|
467
|
}
|
468
|
}
|
469
|
*/
|
470
|
|
471
|
// try to look it up from the service
|
472
|
return BioPortalService.lookupAnnotationClass(characteristicClass, text, OBOE_SBC);
|
473
|
|
474
|
}
|
475
|
|
476
|
private Resource lookupEntity(OntClass entityClass, Entity entity) {
|
477
|
// what's our description like?
|
478
|
String name = entity.getName();
|
479
|
String definition = entity.getDefinition();
|
480
|
|
481
|
// try to look it up if we got this far
|
482
|
return BioPortalService.lookupAnnotationClass(entityClass, definition, OBOE_SBC);
|
483
|
|
484
|
}
|
485
|
|
486
|
private DataPackage getDataPackage(Identifier pid) throws Exception {
|
487
|
// for using the MN API as the MN itself
|
488
|
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
|
489
|
Session session = new Session();
|
490
|
Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
|
491
|
session.setSubject(subject);
|
492
|
InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
|
493
|
|
494
|
// parse the metadata
|
495
|
DataPackageParserInterface parser = new Eml200DataPackageParser();
|
496
|
parser.parse(emlStream);
|
497
|
DataPackage dataPackage = parser.getDataPackage();
|
498
|
return dataPackage;
|
499
|
}
|
500
|
|
501
|
private void summarize(List<Identifier> identifiers) throws SQLException {
|
502
|
|
503
|
DBConnection dbconn = null;
|
504
|
int serialNumber = -1;
|
505
|
PreparedStatement dropStatement = null;
|
506
|
PreparedStatement createStatement = null;
|
507
|
PreparedStatement insertStatement = null;
|
508
|
try {
|
509
|
dbconn = DBConnectionPool.getDBConnection("DatapackageSummarizer.summarize");
|
510
|
serialNumber = dbconn.getCheckOutSerialNumber();
|
511
|
dropStatement = dbconn.prepareStatement("DROP TABLE IF EXISTS entity_summary");
|
512
|
dropStatement.execute();
|
513
|
|
514
|
createStatement = dbconn.prepareStatement(
|
515
|
"CREATE TABLE entity_summary (" +
|
516
|
"guid text, " +
|
517
|
"title text, " +
|
518
|
"entity text," +
|
519
|
"attributeName text," +
|
520
|
"attributeLabel text," +
|
521
|
"attributeDefinition text," +
|
522
|
"attributeType text," +
|
523
|
"attributeScale text," +
|
524
|
"attributeUnitType text," +
|
525
|
"attributeUnit text," +
|
526
|
"attributeDomain text" +
|
527
|
")");
|
528
|
createStatement.execute();
|
529
|
|
530
|
insertStatement = dbconn.prepareStatement(
|
531
|
"INSERT INTO entity_summary " +
|
532
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
|
533
|
|
534
|
for (Identifier pid: identifiers) {
|
535
|
|
536
|
logMetacat.debug("Parsing pid: " + pid.getValue());
|
537
|
|
538
|
try {
|
539
|
|
540
|
// get the package
|
541
|
DataPackage dataPackage = this.getDataPackage(pid);
|
542
|
String title = dataPackage.getTitle();
|
543
|
logMetacat.debug("Title: " + title);
|
544
|
|
545
|
Entity[] entities = dataPackage.getEntityList();
|
546
|
if (entities != null) {
|
547
|
for (Entity entity: entities) {
|
548
|
String entityName = entity.getName();
|
549
|
logMetacat.debug("Entity name: " + entityName);
|
550
|
Attribute[] attributes = entity.getAttributeList().getAttributes();
|
551
|
for (Attribute attribute: attributes) {
|
552
|
String attributeName = attribute.getName();
|
553
|
String attributeLabel = attribute.getLabel();
|
554
|
String attributeDefinition = attribute.getDefinition();
|
555
|
String attributeType = attribute.getAttributeType();
|
556
|
String attributeScale = attribute.getMeasurementScale();
|
557
|
String attributeUnitType = attribute.getUnitType();
|
558
|
String attributeUnit = attribute.getUnit();
|
559
|
String attributeDomain = attribute.getDomain().getClass().getSimpleName();
|
560
|
|
561
|
logMetacat.debug("Attribute name: " + attributeName);
|
562
|
logMetacat.debug("Attribute label: " + attributeLabel);
|
563
|
logMetacat.debug("Attribute definition: " + attributeDefinition);
|
564
|
logMetacat.debug("Attribute type: " + attributeType);
|
565
|
logMetacat.debug("Attribute scale: " + attributeScale);
|
566
|
logMetacat.debug("Attribute unit type: " + attributeUnitType);
|
567
|
logMetacat.debug("Attribute unit: " + attributeUnit);
|
568
|
logMetacat.debug("Attribute domain: " + attributeDomain);
|
569
|
|
570
|
// set the values for this attribute
|
571
|
insertStatement.setString(1, pid.getValue());
|
572
|
insertStatement.setString(2, title);
|
573
|
insertStatement.setString(3, entityName);
|
574
|
insertStatement.setString(4, attributeName);
|
575
|
insertStatement.setString(5, attributeLabel);
|
576
|
insertStatement.setString(6, attributeDefinition);
|
577
|
insertStatement.setString(7, attributeType);
|
578
|
insertStatement.setString(8, attributeScale);
|
579
|
insertStatement.setString(9, attributeUnitType);
|
580
|
insertStatement.setString(10, attributeUnit);
|
581
|
insertStatement.setString(11, attributeDomain);
|
582
|
insertStatement.execute();
|
583
|
|
584
|
}
|
585
|
}
|
586
|
}
|
587
|
|
588
|
} catch (Exception e) {
|
589
|
logMetacat.warn("error parsing metadata for: " + pid.getValue(), e);
|
590
|
}
|
591
|
}
|
592
|
} catch (SQLException sqle) {
|
593
|
// just throw it
|
594
|
throw sqle;
|
595
|
} finally {
|
596
|
try {
|
597
|
if(dropStatement != null) {
|
598
|
dropStatement.close();
|
599
|
}
|
600
|
if(createStatement != null) {
|
601
|
createStatement.close();
|
602
|
}
|
603
|
if(insertStatement != null) {
|
604
|
insertStatement.close();
|
605
|
}
|
606
|
} catch (Exception e) {
|
607
|
logMetacat.warn("couldn't close the prepared statement "+e.getMessage());
|
608
|
} finally {
|
609
|
if (dbconn != null) {
|
610
|
DBConnectionPool.returnDBConnection(dbconn, serialNumber);
|
611
|
//dbconn.close();
|
612
|
}
|
613
|
}
|
614
|
|
615
|
}
|
616
|
}
|
617
|
|
618
|
public static void main(String[] args) throws Exception {
|
619
|
// set up the properties based on the test/deployed configuration of the workspace
|
620
|
SortedProperties testProperties = new SortedProperties("test/test.properties");
|
621
|
testProperties.load();
|
622
|
String metacatContextDir = testProperties.getProperty("metacat.contextDir");
|
623
|
PropertyService.getInstance(metacatContextDir + "/WEB-INF");
|
624
|
|
625
|
testGenerate();
|
626
|
// testSummary();
|
627
|
System.exit(0);
|
628
|
}
|
629
|
|
630
|
public static void testGenerate() throws Exception {
|
631
|
Identifier metadataPid = new Identifier();
|
632
|
metadataPid.setValue("tao.1.4");
|
633
|
DatapackageSummarizer ds = new DatapackageSummarizer();
|
634
|
String rdfString = ds.generateAnnotation(metadataPid);
|
635
|
logMetacat.info("RDF annotation: \n" + rdfString);
|
636
|
|
637
|
}
|
638
|
|
639
|
public static void testSummary() throws Exception {
|
640
|
|
641
|
// summarize the packages
|
642
|
DatapackageSummarizer ds = new DatapackageSummarizer();
|
643
|
List<Identifier> identifiers = new ArrayList<Identifier>();
|
644
|
Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
|
645
|
|
646
|
// select the metadata ids we want to summarize
|
647
|
boolean includeReplicas = true;
|
648
|
Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
|
649
|
if (includeReplicas ) {
|
650
|
codeIter = serverCodes.keySet().iterator();
|
651
|
}
|
652
|
|
653
|
Vector<String> idList = new Vector<String>();
|
654
|
while (codeIter.hasNext()) {
|
655
|
int serverLocation = codeIter.next();
|
656
|
Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
|
657
|
Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
|
658
|
Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
|
659
|
Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
|
660
|
|
661
|
idList.addAll(idList0);
|
662
|
idList.addAll(idList1);
|
663
|
idList.addAll(idList2);
|
664
|
idList.addAll(idList3);
|
665
|
|
666
|
}
|
667
|
|
668
|
// go through all the identifiers now
|
669
|
for (String localId : idList) {
|
670
|
try {
|
671
|
String guid = IdentifierManager.getInstance().getGUID(
|
672
|
DocumentUtil.getDocIdFromAccessionNumber(localId),
|
673
|
DocumentUtil.getRevisionFromAccessionNumber(localId));
|
674
|
Identifier pid = new Identifier();
|
675
|
pid.setValue(guid);
|
676
|
identifiers.add(pid);
|
677
|
} catch (McdbDocNotFoundException nfe) {
|
678
|
// just skip it
|
679
|
continue;
|
680
|
}
|
681
|
}
|
682
|
ds.summarize(identifiers);
|
683
|
System.exit(0);
|
684
|
}
|
685
|
|
686
|
}
|