1 |
8646
|
leinfelder
|
package edu.ucsb.nceas.metacat.annotation;
|
2 |
|
|
|
3 |
|
|
import java.io.InputStream;
|
4 |
8702
|
leinfelder
|
import java.io.StringWriter;
|
5 |
8646
|
leinfelder
|
import java.sql.PreparedStatement;
|
6 |
|
|
import java.sql.SQLException;
|
7 |
|
|
import java.util.ArrayList;
|
8 |
8689
|
leinfelder
|
import java.util.Arrays;
|
9 |
8757
|
leinfelder
|
import java.util.HashMap;
|
10 |
8689
|
leinfelder
|
import java.util.Iterator;
|
11 |
8646
|
leinfelder
|
import java.util.List;
|
12 |
8689
|
leinfelder
|
import java.util.Map;
|
13 |
8646
|
leinfelder
|
import java.util.Vector;
|
14 |
|
|
|
15 |
8757
|
leinfelder
|
import org.apache.commons.io.IOUtils;
|
16 |
8646
|
leinfelder
|
import org.apache.log4j.Logger;
|
17 |
|
|
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
|
18 |
|
|
import org.dataone.service.types.v1.Identifier;
|
19 |
|
|
import org.dataone.service.types.v1.Session;
|
20 |
|
|
import org.dataone.service.types.v1.Subject;
|
21 |
|
|
import org.ecoinformatics.datamanager.parser.Attribute;
|
22 |
|
|
import org.ecoinformatics.datamanager.parser.DataPackage;
|
23 |
|
|
import org.ecoinformatics.datamanager.parser.Entity;
|
24 |
8788
|
leinfelder
|
import org.ecoinformatics.datamanager.parser.Party;
|
25 |
8646
|
leinfelder
|
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
|
26 |
|
|
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
|
27 |
|
|
|
28 |
8702
|
leinfelder
|
import com.hp.hpl.jena.ontology.AllValuesFromRestriction;
|
29 |
|
|
import com.hp.hpl.jena.ontology.Individual;
|
30 |
|
|
import com.hp.hpl.jena.ontology.ObjectProperty;
|
31 |
|
|
import com.hp.hpl.jena.ontology.OntClass;
|
32 |
8769
|
leinfelder
|
import com.hp.hpl.jena.ontology.OntDocumentManager;
|
33 |
8702
|
leinfelder
|
import com.hp.hpl.jena.ontology.OntModel;
|
34 |
|
|
import com.hp.hpl.jena.ontology.Ontology;
|
35 |
8757
|
leinfelder
|
import com.hp.hpl.jena.query.Dataset;
|
36 |
|
|
import com.hp.hpl.jena.query.Query;
|
37 |
|
|
import com.hp.hpl.jena.query.QueryExecution;
|
38 |
|
|
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
39 |
|
|
import com.hp.hpl.jena.query.QueryFactory;
|
40 |
|
|
import com.hp.hpl.jena.query.QuerySolution;
|
41 |
|
|
import com.hp.hpl.jena.query.ResultSet;
|
42 |
8702
|
leinfelder
|
import com.hp.hpl.jena.rdf.model.ModelFactory;
|
43 |
|
|
import com.hp.hpl.jena.rdf.model.Property;
|
44 |
|
|
import com.hp.hpl.jena.rdf.model.Resource;
|
45 |
8757
|
leinfelder
|
import com.hp.hpl.jena.tdb.TDBFactory;
|
46 |
8702
|
leinfelder
|
|
47 |
8646
|
leinfelder
|
import edu.ucsb.nceas.metacat.DBUtil;
|
48 |
|
|
import edu.ucsb.nceas.metacat.DocumentImpl;
|
49 |
|
|
import edu.ucsb.nceas.metacat.IdentifierManager;
|
50 |
|
|
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
|
51 |
|
|
import edu.ucsb.nceas.metacat.database.DBConnection;
|
52 |
|
|
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
|
53 |
|
|
import edu.ucsb.nceas.metacat.dataone.MNodeService;
|
54 |
8757
|
leinfelder
|
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
|
55 |
8646
|
leinfelder
|
import edu.ucsb.nceas.metacat.properties.PropertyService;
|
56 |
8689
|
leinfelder
|
import edu.ucsb.nceas.metacat.replication.ReplicationService;
|
57 |
8646
|
leinfelder
|
import edu.ucsb.nceas.metacat.util.DocumentUtil;
|
58 |
|
|
import edu.ucsb.nceas.utilities.SortedProperties;
|
59 |
|
|
|
60 |
|
|
public class DatapackageSummarizer {
|
61 |
|
|
|
62 |
|
|
private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
|
63 |
|
|
|
64 |
8702
|
leinfelder
|
public static String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
65 |
|
|
public static String rdfs = "http://www.w3.org/2000/01/rdf-schema#";
|
66 |
|
|
public static String owl = "http://www.w3.org/2002/07/owl#";
|
67 |
|
|
public static String oboe = "http://ecoinformatics.org/oboe/oboe.1.0/oboe.owl#";
|
68 |
|
|
public static String oboe_core = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#";
|
69 |
8717
|
leinfelder
|
public static String oboe_characteristics = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-characteristics.owl#";
|
70 |
8724
|
leinfelder
|
public static String oboe_sbc = "http://ecoinformatics.org/oboe-ext/sbclter.1.0/oboe-sbclter.owl#";
|
71 |
8702
|
leinfelder
|
public static String oa = "http://www.w3.org/ns/oa#";
|
72 |
|
|
public static String oa_source = "http://www.w3.org/ns/oa.rdf";
|
73 |
|
|
public static String dcterms = "http://purl.org/dc/terms/";
|
74 |
|
|
public static String dcterms_source = "http://dublincore.org/2012/06/14/dcterms.rdf";
|
75 |
|
|
public static String foaf = "http://xmlns.com/foaf/0.1/";
|
76 |
|
|
public static String foaf_source = "http://xmlns.com/foaf/spec/index.rdf";
|
77 |
|
|
public static String prov = "http://www.w3.org/ns/prov#";
|
78 |
|
|
public static String prov_source = "http://www.w3.org/ns/prov.owl";
|
79 |
|
|
public static String cito = "http://purl.org/spar/cito/";
|
80 |
8709
|
leinfelder
|
|
81 |
8717
|
leinfelder
|
public static String OBOE_SBC = "OBOE-SBC";
|
82 |
8769
|
leinfelder
|
|
83 |
|
|
private static boolean cacheInitialized;
|
84 |
|
|
|
85 |
|
|
private static void initializeCache() {
|
86 |
|
|
if (!cacheInitialized) {
|
87 |
|
|
// cache the ontologies we use
|
88 |
|
|
OntDocumentManager.getInstance().addModel(oboe, ModelFactory.createOntologyModel().read(oboe));
|
89 |
|
|
OntDocumentManager.getInstance().addModel(oboe_sbc, ModelFactory.createOntologyModel().read(oboe_sbc));
|
90 |
|
|
OntDocumentManager.getInstance().addModel(oa, ModelFactory.createOntologyModel().read(oa_source));
|
91 |
|
|
OntDocumentManager.getInstance().addModel(dcterms, ModelFactory.createOntologyModel().read(dcterms_source));
|
92 |
|
|
OntDocumentManager.getInstance().addModel(foaf, ModelFactory.createOntologyModel().read(foaf_source));
|
93 |
|
|
OntDocumentManager.getInstance().addModel(prov, ModelFactory.createOntologyModel().read(prov));
|
94 |
|
|
OntDocumentManager.getInstance().addModel(cito, ModelFactory.createOntologyModel().read(cito));
|
95 |
|
|
cacheInitialized = true;
|
96 |
|
|
}
|
97 |
|
|
}
|
98 |
8717
|
leinfelder
|
|
99 |
8757
|
leinfelder
|
public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
|
100 |
8702
|
leinfelder
|
|
101 |
8757
|
leinfelder
|
// generate an annotation for the metadata given
|
102 |
|
|
String rdfContent = this.generateAnnotation(metadataPid);
|
103 |
|
|
|
104 |
|
|
// load to triple store
|
105 |
8765
|
leinfelder
|
//Dataset dataset = TDBFactory.createDataset("./tbd");
|
106 |
8767
|
leinfelder
|
Dataset dataset = TDBFactory.createDataset();
|
107 |
|
|
//Dataset dataset = DatasetFactory.createMem();
|
108 |
8757
|
leinfelder
|
|
109 |
|
|
// read the annotation into the triplestore
|
110 |
|
|
InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
|
111 |
8763
|
leinfelder
|
String name = "http://annotation/" + metadataPid.getValue();
|
112 |
8757
|
leinfelder
|
boolean loaded = dataset.containsNamedModel(name);
|
113 |
|
|
if (loaded) {
|
114 |
|
|
dataset.removeNamedModel(name);
|
115 |
|
|
loaded = false;
|
116 |
|
|
}
|
117 |
8765
|
leinfelder
|
OntModel ontModel = null;
|
118 |
8757
|
leinfelder
|
if (!loaded) {
|
119 |
8765
|
leinfelder
|
ontModel = ModelFactory.createOntologyModel();
|
120 |
8757
|
leinfelder
|
ontModel.read(source, name);
|
121 |
|
|
dataset.addNamedModel(name, ontModel);
|
122 |
|
|
}
|
123 |
|
|
|
124 |
|
|
// query for fields to add to index
|
125 |
|
|
Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
|
126 |
|
|
|
127 |
|
|
// TODO: look up the query to use (support multiple like in the indexing project)
|
128 |
8763
|
leinfelder
|
List<String> queries = new ArrayList<String>();
|
129 |
|
|
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
130 |
8757
|
leinfelder
|
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
131 |
|
|
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
|
132 |
|
|
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
|
133 |
|
|
+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
|
134 |
|
|
+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
|
135 |
8763
|
leinfelder
|
+ "SELECT ?standard_sm ?pid "
|
136 |
8757
|
leinfelder
|
+ "FROM <$GRAPH_NAME> "
|
137 |
|
|
+ "WHERE { "
|
138 |
|
|
+ " ?measurement rdf:type oboe-core:Measurement . "
|
139 |
|
|
+ " ?measurement rdf:type ?restriction . "
|
140 |
|
|
+ " ?restriction owl:onProperty oboe-core:usesStandard . "
|
141 |
|
|
+ " ?restriction owl:allValuesFrom ?standard . "
|
142 |
|
|
+ " ?standard rdfs:subClassOf+ ?standard_sm . "
|
143 |
|
|
+ " ?standard_sm rdfs:subClassOf oboe-core:Standard . "
|
144 |
|
|
+ " ?annotation oa:hasBody ?measurement . "
|
145 |
|
|
+ " ?annotation oa:hasTarget ?target . "
|
146 |
|
|
+ " ?target oa:hasSource ?metadata . "
|
147 |
8763
|
leinfelder
|
+ " ?metadata dcterms:identifier ?pid . "
|
148 |
|
|
+ "}");
|
149 |
|
|
|
150 |
|
|
queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
151 |
|
|
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
152 |
|
|
+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
|
153 |
|
|
+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
|
154 |
|
|
+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
|
155 |
|
|
+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
|
156 |
|
|
+ "SELECT ?characteristic_sm ?pid "
|
157 |
|
|
+ "FROM <$GRAPH_NAME>"
|
158 |
|
|
+ "WHERE { "
|
159 |
|
|
+ " ?measurement rdf:type oboe-core:Measurement . "
|
160 |
|
|
+ " ?measurement rdf:type ?restriction . "
|
161 |
|
|
+ " ?restriction owl:onProperty oboe-core:ofCharacteristic . "
|
162 |
|
|
+ " ?restriction owl:allValuesFrom ?characteristic . "
|
163 |
|
|
+ " ?characteristic rdfs:subClassOf+ ?characteristic_sm . "
|
164 |
|
|
+ " ?characteristic_sm rdfs:subClassOf oboe-core:Characteristic . "
|
165 |
|
|
+ " ?annotation oa:hasBody ?measurement . "
|
166 |
|
|
+ " ?annotation oa:hasTarget ?target . "
|
167 |
|
|
+ " ?target oa:hasSource ?metadata . "
|
168 |
|
|
+ " ?metadata dcterms:identifier ?pid . "
|
169 |
|
|
+ "}");
|
170 |
|
|
|
171 |
|
|
for (String q: queries) {
|
172 |
|
|
q = q.replaceAll("\\$GRAPH_NAME", name);
|
173 |
|
|
Query query = QueryFactory.create(q);
|
174 |
|
|
QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
|
175 |
|
|
ResultSet results = qexec.execSelect();
|
176 |
8757
|
leinfelder
|
|
177 |
8763
|
leinfelder
|
while (results.hasNext()) {
|
178 |
|
|
QuerySolution solution = results.next();
|
179 |
|
|
System.out.println(solution.toString());
|
180 |
8757
|
leinfelder
|
|
181 |
8763
|
leinfelder
|
// find the index document we are trying to augment with the annotation
|
182 |
|
|
if (solution.contains("pid")) {
|
183 |
|
|
String id = solution.getLiteral("pid").getString();
|
184 |
|
|
if (!id.equals(metadataPid.getValue())) {
|
185 |
|
|
// skip any solution that does not annotate the given pid
|
186 |
|
|
continue;
|
187 |
|
|
}
|
188 |
|
|
|
189 |
8757
|
leinfelder
|
}
|
190 |
8763
|
leinfelder
|
// loop through the solution variables, add an index value for each
|
191 |
|
|
Iterator<String> varNameIter = solution.varNames();
|
192 |
|
|
while (varNameIter.hasNext()) {
|
193 |
|
|
String key = varNameIter.next();
|
194 |
|
|
if (key.equals("pid")) {
|
195 |
|
|
// don't include the id
|
196 |
|
|
continue;
|
197 |
|
|
}
|
198 |
|
|
String value = solution.get(key).toString();
|
199 |
|
|
List<Object> values = fields.get(key);
|
200 |
|
|
if (values == null) {
|
201 |
|
|
values = new ArrayList<Object>();
|
202 |
|
|
}
|
203 |
|
|
values.add(value);
|
204 |
|
|
fields.put(key, values);
|
205 |
8757
|
leinfelder
|
}
|
206 |
|
|
}
|
207 |
8763
|
leinfelder
|
}
|
208 |
8757
|
leinfelder
|
|
209 |
8765
|
leinfelder
|
// remove the graph to save storage
|
210 |
|
|
// ontModel.removeAll();
|
211 |
|
|
// ontModel.commit();
|
212 |
|
|
// ontModel.close();
|
213 |
|
|
dataset.removeNamedModel(name);
|
214 |
|
|
|
215 |
8757
|
leinfelder
|
// clean up the triple store
|
216 |
|
|
TDBFactory.release(dataset);
|
217 |
|
|
|
218 |
|
|
// add to index
|
219 |
|
|
MetacatSolrIndex.getInstance().submit(metadataPid, null, fields, true);
|
220 |
|
|
|
221 |
|
|
|
222 |
|
|
}
|
223 |
|
|
|
224 |
8702
|
leinfelder
|
/**
|
225 |
|
|
* Generate annotation for given metadata identifier
|
226 |
|
|
* @param metadataPid
|
227 |
|
|
*/
|
228 |
|
|
public String generateAnnotation(Identifier metadataPid) throws Exception {
|
229 |
|
|
|
230 |
|
|
DataPackage dataPackage = this.getDataPackage(metadataPid);
|
231 |
|
|
|
232 |
|
|
OntModel m = ModelFactory.createOntologyModel();
|
233 |
|
|
Ontology ont = m.createOntology("http://annotation/" + metadataPid.getValue());
|
234 |
8646
|
leinfelder
|
|
235 |
8702
|
leinfelder
|
// TODO: import the ontologies we use
|
236 |
8769
|
leinfelder
|
initializeCache();
|
237 |
|
|
|
238 |
8702
|
leinfelder
|
ont.addImport(m.createResource(oboe));
|
239 |
8769
|
leinfelder
|
m.addSubModel(OntDocumentManager.getInstance().getModel(oboe));
|
240 |
8702
|
leinfelder
|
|
241 |
8724
|
leinfelder
|
ont.addImport(m.createResource(oboe_sbc));
|
242 |
8769
|
leinfelder
|
m.addSubModel(OntDocumentManager.getInstance().getModel(oboe_sbc));
|
243 |
8724
|
leinfelder
|
|
244 |
8702
|
leinfelder
|
ont.addImport(m.createResource(oa));
|
245 |
8769
|
leinfelder
|
m.addSubModel(OntDocumentManager.getInstance().getModel(oa));
|
246 |
8702
|
leinfelder
|
|
247 |
|
|
ont.addImport(m.createResource(dcterms));
|
248 |
8769
|
leinfelder
|
m.addSubModel(OntDocumentManager.getInstance().getModel(dcterms));
|
249 |
8702
|
leinfelder
|
|
250 |
|
|
ont.addImport(m.createResource(foaf));
|
251 |
8769
|
leinfelder
|
m.addSubModel(OntDocumentManager.getInstance().getModel(foaf));
|
252 |
8702
|
leinfelder
|
|
253 |
|
|
ont.addImport(m.createResource(prov));
|
254 |
|
|
//m.addSubModel(ModelFactory.createOntologyModel().read(prov_source));
|
255 |
|
|
|
256 |
|
|
ont.addImport(m.createResource(cito));
|
257 |
|
|
|
258 |
|
|
// properties
|
259 |
|
|
ObjectProperty hasBodyProperty = m.getObjectProperty(oa + "hasBody");
|
260 |
|
|
ObjectProperty hasTargetProperty = m.getObjectProperty(oa + "hasTarget");
|
261 |
|
|
ObjectProperty hasSourceProperty = m.getObjectProperty(oa + "hasSource");
|
262 |
8710
|
leinfelder
|
ObjectProperty hasSelectorProperty = m.getObjectProperty(oa + "hasSelector");
|
263 |
8702
|
leinfelder
|
ObjectProperty annotatedByProperty = m.getObjectProperty(oa + "annotatedBy");
|
264 |
|
|
Property identifierProperty = m.getProperty(dcterms + "identifier");
|
265 |
8710
|
leinfelder
|
Property conformsToProperty = m.getProperty(dcterms + "conformsTo");
|
266 |
8718
|
leinfelder
|
Property wasAttributedTo = m.getProperty(prov + "wasAttributedTo");
|
267 |
8702
|
leinfelder
|
Property nameProperty = m.getProperty(foaf + "name");
|
268 |
8710
|
leinfelder
|
Property rdfValue = m.getProperty(rdf + "value");
|
269 |
8702
|
leinfelder
|
|
270 |
|
|
ObjectProperty ofCharacteristic = m.getObjectProperty(oboe_core + "ofCharacteristic");
|
271 |
|
|
ObjectProperty usesStandard = m.getObjectProperty(oboe_core + "usesStandard");
|
272 |
8743
|
leinfelder
|
ObjectProperty ofEntity = m.getObjectProperty(oboe_core + "ofEntity");
|
273 |
|
|
ObjectProperty hasMeasurement = m.getObjectProperty(oboe_core + "hasMeasurement");
|
274 |
8702
|
leinfelder
|
|
275 |
|
|
// classes
|
276 |
8743
|
leinfelder
|
OntClass entityClass = m.getOntClass(oboe_core + "Entity");
|
277 |
|
|
OntClass observationClass = m.getOntClass(oboe_core + "Observation");
|
278 |
8702
|
leinfelder
|
OntClass measurementClass = m.getOntClass(oboe_core + "Measurement");
|
279 |
|
|
OntClass characteristicClass = m.getOntClass(oboe_core + "Characteristic");
|
280 |
|
|
OntClass standardClass = m.getOntClass(oboe_core + "Standard");
|
281 |
|
|
|
282 |
|
|
Resource annotationClass = m.getOntClass(oa + "Annotation");
|
283 |
|
|
Resource specificResourceClass = m.getOntClass(oa + "SpecificResource");
|
284 |
8710
|
leinfelder
|
Resource fragmentSelectorClass = m.getOntClass(oa + "FragmentSelector");
|
285 |
8743
|
leinfelder
|
Resource provEntityClass = m.getResource(prov + "Entity");
|
286 |
8702
|
leinfelder
|
Resource personClass = m.getResource(prov + "Person");
|
287 |
8718
|
leinfelder
|
|
288 |
8702
|
leinfelder
|
// these apply to every attribute annotation
|
289 |
8743
|
leinfelder
|
Individual meta1 = m.createIndividual(ont.getURI() + "#meta", provEntityClass);
|
290 |
8702
|
leinfelder
|
meta1.addProperty(identifierProperty, metadataPid.getValue());
|
291 |
|
|
|
292 |
8718
|
leinfelder
|
// decide who should be credited with the package
|
293 |
|
|
Individual p1 = null;
|
294 |
8716
|
leinfelder
|
|
295 |
8718
|
leinfelder
|
// look up creators from the EML metadata
|
296 |
8788
|
leinfelder
|
List<Party> creators = dataPackage.getCreators();
|
297 |
8716
|
leinfelder
|
//creators = Arrays.asList("Matthew Jones");
|
298 |
8718
|
leinfelder
|
if (creators != null && creators.size() > 0) {
|
299 |
|
|
// use an orcid if we can find one from their system
|
300 |
8788
|
leinfelder
|
String orcidUri = OrcidService.lookupOrcid(creators.get(0).getOrganization(), creators.get(0).getSurName(), creators.get(0).getGivenNames(), null);
|
301 |
8718
|
leinfelder
|
if (orcidUri != null) {
|
302 |
|
|
p1 = m.createIndividual(orcidUri, personClass);
|
303 |
|
|
p1.addProperty(identifierProperty, orcidUri);
|
304 |
|
|
} else {
|
305 |
|
|
p1 = m.createIndividual(ont.getURI() + "#person", personClass);
|
306 |
|
|
}
|
307 |
|
|
// include the name we have in the metadata
|
308 |
8788
|
leinfelder
|
if (creators.get(0).getSurName() != null) {
|
309 |
|
|
p1.addProperty(nameProperty, creators.get(0).getSurName());
|
310 |
|
|
} else if (creators.get(0).getOrganization() != null) {
|
311 |
|
|
p1.addProperty(nameProperty, creators.get(0).getOrganization());
|
312 |
|
|
}
|
313 |
8716
|
leinfelder
|
}
|
314 |
|
|
|
315 |
8718
|
leinfelder
|
// attribute the package to this creator if we have one
|
316 |
|
|
if (p1 != null) {
|
317 |
|
|
meta1.addProperty(wasAttributedTo, p1);
|
318 |
|
|
}
|
319 |
|
|
|
320 |
8702
|
leinfelder
|
// loop through the tables and attributes
|
321 |
8714
|
leinfelder
|
int entityCount = 1;
|
322 |
8702
|
leinfelder
|
Entity[] entities = dataPackage.getEntityList();
|
323 |
8724
|
leinfelder
|
if (entities != null) {
|
324 |
|
|
for (Entity entity: entities) {
|
325 |
|
|
String entityName = entity.getName();
|
326 |
8743
|
leinfelder
|
|
327 |
|
|
Individual o1 = m.createIndividual(ont.getURI() + "#observation" + entityCount, observationClass);
|
328 |
|
|
Resource entityConcept = lookupEntity(entityClass, entity);
|
329 |
|
|
if (entityConcept != null) {
|
330 |
|
|
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofEntity, entityConcept);
|
331 |
|
|
o1.addOntClass(avfr);
|
332 |
|
|
}
|
333 |
|
|
|
334 |
8724
|
leinfelder
|
logMetacat.debug("Entity name: " + entityName);
|
335 |
|
|
Attribute[] attributes = entity.getAttributeList().getAttributes();
|
336 |
|
|
int attributeCount = 1;
|
337 |
|
|
if (attributes != null) {
|
338 |
|
|
for (Attribute attribute: attributes) {
|
339 |
|
|
|
340 |
|
|
// for naming the individuals uniquely
|
341 |
|
|
String cnt = entityCount + "_" + attributeCount;
|
342 |
|
|
|
343 |
|
|
String attributeName = attribute.getName();
|
344 |
|
|
String attributeLabel = attribute.getLabel();
|
345 |
|
|
String attributeDefinition = attribute.getDefinition();
|
346 |
|
|
String attributeType = attribute.getAttributeType();
|
347 |
|
|
String attributeScale = attribute.getMeasurementScale();
|
348 |
|
|
String attributeUnitType = attribute.getUnitType();
|
349 |
|
|
String attributeUnit = attribute.getUnit();
|
350 |
|
|
String attributeDomain = attribute.getDomain().getClass().getSimpleName();
|
351 |
|
|
|
352 |
|
|
logMetacat.debug("Attribute name: " + attributeName);
|
353 |
|
|
logMetacat.debug("Attribute label: " + attributeLabel);
|
354 |
|
|
logMetacat.debug("Attribute definition: " + attributeDefinition);
|
355 |
|
|
logMetacat.debug("Attribute type: " + attributeType);
|
356 |
|
|
logMetacat.debug("Attribute scale: " + attributeScale);
|
357 |
|
|
logMetacat.debug("Attribute unit type: " + attributeUnitType);
|
358 |
|
|
logMetacat.debug("Attribute unit: " + attributeUnit);
|
359 |
|
|
logMetacat.debug("Attribute domain: " + attributeDomain);
|
360 |
8702
|
leinfelder
|
|
361 |
8724
|
leinfelder
|
// look up the characteristic or standard subclasses
|
362 |
|
|
Resource standard = this.lookupStandard(standardClass, attribute);
|
363 |
|
|
Resource characteristic = this.lookupCharacteristic(characteristicClass, attribute);
|
364 |
|
|
|
365 |
|
|
if (standard != null || characteristic != null) {
|
366 |
|
|
|
367 |
|
|
// instances
|
368 |
|
|
Individual m1 = m.createIndividual(ont.getURI() + "#measurement" + cnt, measurementClass);
|
369 |
|
|
Individual a1 = m.createIndividual(ont.getURI() + "#annotation" + cnt, annotationClass);
|
370 |
|
|
Individual t1 = m.createIndividual(ont.getURI() + "#target" + cnt, specificResourceClass);
|
371 |
8910
|
leinfelder
|
String xpointer = "xpointer(/eml/dataSet/dataTable[" + entityCount + "]/attributeList/attribute[" + attributeCount + "])";
|
372 |
8724
|
leinfelder
|
Individual s1 = m.createIndividual(ont.getURI() + "#" + xpointer, fragmentSelectorClass);
|
373 |
|
|
s1.addLiteral(rdfValue, xpointer);
|
374 |
8910
|
leinfelder
|
s1.addProperty(conformsToProperty, "http://tools.ietf.org/rfc/rfc3023");
|
375 |
|
|
//s1.addProperty(conformsToProperty, "http://www.w3.org/TR/xptr/");
|
376 |
|
|
|
377 |
8724
|
leinfelder
|
|
378 |
|
|
// statements about the annotation
|
379 |
|
|
a1.addProperty(hasBodyProperty, m1);
|
380 |
|
|
a1.addProperty(hasTargetProperty, t1);
|
381 |
|
|
t1.addProperty(hasSourceProperty, meta1);
|
382 |
|
|
t1.addProperty(hasSelectorProperty, s1);
|
383 |
|
|
//a1.addProperty(annotatedByProperty, p1);
|
384 |
|
|
|
385 |
|
|
// describe the measurement in terms of restrictions
|
386 |
|
|
if (standard != null) {
|
387 |
|
|
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, usesStandard, standard);
|
388 |
|
|
m1.addOntClass(avfr);
|
389 |
|
|
}
|
390 |
|
|
if (characteristic != null) {
|
391 |
|
|
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofCharacteristic, characteristic);
|
392 |
|
|
m1.addOntClass(avfr);
|
393 |
|
|
}
|
394 |
8743
|
leinfelder
|
|
395 |
|
|
// attach to the observation
|
396 |
|
|
// TODO: evaluate whether the measurement can apply to the given observed entity
|
397 |
|
|
o1.addProperty(hasMeasurement, m1);
|
398 |
8724
|
leinfelder
|
}
|
399 |
|
|
attributeCount++;
|
400 |
|
|
|
401 |
8702
|
leinfelder
|
}
|
402 |
|
|
}
|
403 |
8724
|
leinfelder
|
entityCount++;
|
404 |
8714
|
leinfelder
|
}
|
405 |
8702
|
leinfelder
|
}
|
406 |
|
|
|
407 |
|
|
StringWriter sw = new StringWriter();
|
408 |
|
|
// only write the base model
|
409 |
|
|
//m.write(sw, "RDF/XML-ABBREV");
|
410 |
|
|
m.write(sw, null);
|
411 |
|
|
|
412 |
|
|
return sw.toString();
|
413 |
|
|
|
414 |
|
|
}
|
415 |
|
|
|
416 |
|
|
private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
|
417 |
|
|
// what's our unit?
|
418 |
|
|
String unit = attribute.getUnit().toLowerCase();
|
419 |
8763
|
leinfelder
|
|
420 |
|
|
/*
|
421 |
|
|
boolean found = false;
|
422 |
8710
|
leinfelder
|
List<String> tokens = Arrays.asList(unit.split(" "));
|
423 |
8702
|
leinfelder
|
ExtendedIterator iter = standardClass.listSubClasses(false);
|
424 |
|
|
while (iter.hasNext()) {
|
425 |
|
|
OntClass subclass = (OntClass) iter.next();
|
426 |
|
|
String subclassName = subclass.getLocalName().toLowerCase();
|
427 |
|
|
logMetacat.debug("subclass: " + subclassName);
|
428 |
8710
|
leinfelder
|
if (tokens.contains(subclassName)) {
|
429 |
8702
|
leinfelder
|
found = true;
|
430 |
|
|
}
|
431 |
|
|
if (subclass.hasLabel(unit, null)) {
|
432 |
|
|
found = true;
|
433 |
|
|
}
|
434 |
|
|
if (found) {
|
435 |
|
|
return subclass;
|
436 |
|
|
}
|
437 |
|
|
}
|
438 |
8763
|
leinfelder
|
*/
|
439 |
|
|
|
440 |
8711
|
leinfelder
|
// try to look it up if we got this far
|
441 |
8717
|
leinfelder
|
return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC);
|
442 |
8702
|
leinfelder
|
}
|
443 |
|
|
|
444 |
|
|
private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
|
445 |
8763
|
leinfelder
|
// what are we looking for?
|
446 |
8702
|
leinfelder
|
String label = attribute.getLabel().toLowerCase();
|
447 |
8763
|
leinfelder
|
String definition = attribute.getDefinition();
|
448 |
|
|
String text = label + " " + definition;
|
449 |
8710
|
leinfelder
|
|
450 |
8763
|
leinfelder
|
/*
|
451 |
|
|
// find something that matches
|
452 |
8702
|
leinfelder
|
boolean found = false;
|
453 |
8763
|
leinfelder
|
List<String> tokens = Arrays.asList(label.split(" "));
|
454 |
8702
|
leinfelder
|
ExtendedIterator iter = characteristicClass.listSubClasses();
|
455 |
|
|
while (iter.hasNext()) {
|
456 |
|
|
OntClass subclass = (OntClass) iter.next();
|
457 |
|
|
String subclassName = subclass.getLocalName().toLowerCase();
|
458 |
|
|
logMetacat.debug("subclass: " + subclassName);
|
459 |
8710
|
leinfelder
|
if (tokens.contains(subclassName)) {
|
460 |
8702
|
leinfelder
|
found = true;
|
461 |
|
|
}
|
462 |
|
|
if (subclass.hasLabel(label, null)) {
|
463 |
|
|
found = true;
|
464 |
|
|
}
|
465 |
|
|
if (found) {
|
466 |
|
|
return subclass;
|
467 |
|
|
}
|
468 |
|
|
}
|
469 |
8763
|
leinfelder
|
*/
|
470 |
8711
|
leinfelder
|
|
471 |
8763
|
leinfelder
|
// try to look it up from the service
|
472 |
|
|
return BioPortalService.lookupAnnotationClass(characteristicClass, text, OBOE_SBC);
|
473 |
8711
|
leinfelder
|
|
474 |
|
|
}
|
475 |
|
|
|
476 |
8743
|
leinfelder
|
private Resource lookupEntity(OntClass entityClass, Entity entity) {
|
477 |
|
|
// what's our description like?
|
478 |
|
|
String name = entity.getName();
|
479 |
|
|
String definition = entity.getDefinition();
|
480 |
|
|
|
481 |
|
|
// try to look it up if we got this far
|
482 |
|
|
return BioPortalService.lookupAnnotationClass(entityClass, definition, OBOE_SBC);
|
483 |
|
|
|
484 |
|
|
}
|
485 |
|
|
|
486 |
8702
|
leinfelder
|
private DataPackage getDataPackage(Identifier pid) throws Exception {
|
487 |
|
|
// for using the MN API as the MN itself
|
488 |
|
|
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
|
489 |
|
|
Session session = new Session();
|
490 |
|
|
Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
|
491 |
|
|
session.setSubject(subject);
|
492 |
|
|
InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
|
493 |
|
|
|
494 |
|
|
// parse the metadata
|
495 |
|
|
DataPackageParserInterface parser = new Eml200DataPackageParser();
|
496 |
|
|
parser.parse(emlStream);
|
497 |
|
|
DataPackage dataPackage = parser.getDataPackage();
|
498 |
|
|
return dataPackage;
|
499 |
|
|
}
|
500 |
|
|
|
501 |
|
|
private void summarize(List<Identifier> identifiers) throws SQLException {
|
502 |
|
|
|
503 |
8646
|
leinfelder
|
DBConnection dbconn = null;
|
504 |
9463
|
tao
|
int serialNumber = -1;
|
505 |
|
|
PreparedStatement dropStatement = null;
|
506 |
|
|
PreparedStatement createStatement = null;
|
507 |
|
|
PreparedStatement insertStatement = null;
|
508 |
8646
|
leinfelder
|
try {
|
509 |
|
|
dbconn = DBConnectionPool.getDBConnection("DatapackageSummarizer.summarize");
|
510 |
9463
|
tao
|
serialNumber = dbconn.getCheckOutSerialNumber();
|
511 |
|
|
dropStatement = dbconn.prepareStatement("DROP TABLE IF EXISTS entity_summary");
|
512 |
8646
|
leinfelder
|
dropStatement.execute();
|
513 |
|
|
|
514 |
9463
|
tao
|
createStatement = dbconn.prepareStatement(
|
515 |
8646
|
leinfelder
|
"CREATE TABLE entity_summary (" +
|
516 |
|
|
"guid text, " +
|
517 |
|
|
"title text, " +
|
518 |
|
|
"entity text," +
|
519 |
|
|
"attributeName text," +
|
520 |
|
|
"attributeLabel text," +
|
521 |
|
|
"attributeDefinition text," +
|
522 |
|
|
"attributeType text," +
|
523 |
|
|
"attributeScale text," +
|
524 |
|
|
"attributeUnitType text," +
|
525 |
|
|
"attributeUnit text," +
|
526 |
|
|
"attributeDomain text" +
|
527 |
|
|
")");
|
528 |
|
|
createStatement.execute();
|
529 |
|
|
|
530 |
9463
|
tao
|
insertStatement = dbconn.prepareStatement(
|
531 |
8646
|
leinfelder
|
"INSERT INTO entity_summary " +
|
532 |
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
|
533 |
|
|
|
534 |
|
|
for (Identifier pid: identifiers) {
|
535 |
|
|
|
536 |
|
|
logMetacat.debug("Parsing pid: " + pid.getValue());
|
537 |
|
|
|
538 |
|
|
try {
|
539 |
|
|
|
540 |
8702
|
leinfelder
|
// get the package
|
541 |
|
|
DataPackage dataPackage = this.getDataPackage(pid);
|
542 |
8646
|
leinfelder
|
String title = dataPackage.getTitle();
|
543 |
|
|
logMetacat.debug("Title: " + title);
|
544 |
|
|
|
545 |
|
|
Entity[] entities = dataPackage.getEntityList();
|
546 |
|
|
if (entities != null) {
|
547 |
|
|
for (Entity entity: entities) {
|
548 |
|
|
String entityName = entity.getName();
|
549 |
|
|
logMetacat.debug("Entity name: " + entityName);
|
550 |
|
|
Attribute[] attributes = entity.getAttributeList().getAttributes();
|
551 |
|
|
for (Attribute attribute: attributes) {
|
552 |
|
|
String attributeName = attribute.getName();
|
553 |
|
|
String attributeLabel = attribute.getLabel();
|
554 |
|
|
String attributeDefinition = attribute.getDefinition();
|
555 |
|
|
String attributeType = attribute.getAttributeType();
|
556 |
|
|
String attributeScale = attribute.getMeasurementScale();
|
557 |
|
|
String attributeUnitType = attribute.getUnitType();
|
558 |
|
|
String attributeUnit = attribute.getUnit();
|
559 |
|
|
String attributeDomain = attribute.getDomain().getClass().getSimpleName();
|
560 |
|
|
|
561 |
|
|
logMetacat.debug("Attribute name: " + attributeName);
|
562 |
|
|
logMetacat.debug("Attribute label: " + attributeLabel);
|
563 |
|
|
logMetacat.debug("Attribute definition: " + attributeDefinition);
|
564 |
|
|
logMetacat.debug("Attribute type: " + attributeType);
|
565 |
|
|
logMetacat.debug("Attribute scale: " + attributeScale);
|
566 |
|
|
logMetacat.debug("Attribute unit type: " + attributeUnitType);
|
567 |
|
|
logMetacat.debug("Attribute unit: " + attributeUnit);
|
568 |
|
|
logMetacat.debug("Attribute domain: " + attributeDomain);
|
569 |
|
|
|
570 |
|
|
// set the values for this attribute
|
571 |
|
|
insertStatement.setString(1, pid.getValue());
|
572 |
|
|
insertStatement.setString(2, title);
|
573 |
|
|
insertStatement.setString(3, entityName);
|
574 |
|
|
insertStatement.setString(4, attributeName);
|
575 |
|
|
insertStatement.setString(5, attributeLabel);
|
576 |
|
|
insertStatement.setString(6, attributeDefinition);
|
577 |
|
|
insertStatement.setString(7, attributeType);
|
578 |
|
|
insertStatement.setString(8, attributeScale);
|
579 |
|
|
insertStatement.setString(9, attributeUnitType);
|
580 |
|
|
insertStatement.setString(10, attributeUnit);
|
581 |
|
|
insertStatement.setString(11, attributeDomain);
|
582 |
|
|
insertStatement.execute();
|
583 |
|
|
|
584 |
|
|
}
|
585 |
|
|
}
|
586 |
|
|
}
|
587 |
|
|
|
588 |
|
|
} catch (Exception e) {
|
589 |
|
|
logMetacat.warn("error parsing metadata for: " + pid.getValue(), e);
|
590 |
|
|
}
|
591 |
|
|
}
|
592 |
|
|
} catch (SQLException sqle) {
|
593 |
|
|
// just throw it
|
594 |
|
|
throw sqle;
|
595 |
|
|
} finally {
|
596 |
9463
|
tao
|
try {
|
597 |
|
|
if(dropStatement != null) {
|
598 |
|
|
dropStatement.close();
|
599 |
|
|
}
|
600 |
|
|
if(createStatement != null) {
|
601 |
|
|
createStatement.close();
|
602 |
|
|
}
|
603 |
|
|
if(insertStatement != null) {
|
604 |
|
|
insertStatement.close();
|
605 |
|
|
}
|
606 |
|
|
} catch (Exception e) {
|
607 |
|
|
logMetacat.warn("couldn't close the prepared statement "+e.getMessage());
|
608 |
|
|
} finally {
|
609 |
|
|
if (dbconn != null) {
|
610 |
|
|
DBConnectionPool.returnDBConnection(dbconn, serialNumber);
|
611 |
|
|
//dbconn.close();
|
612 |
|
|
}
|
613 |
|
|
}
|
614 |
|
|
|
615 |
8646
|
leinfelder
|
}
|
616 |
|
|
}
|
617 |
|
|
|
618 |
|
|
public static void main(String[] args) throws Exception {
|
619 |
|
|
// set up the properties based on the test/deployed configuration of the workspace
|
620 |
8702
|
leinfelder
|
SortedProperties testProperties = new SortedProperties("test/test.properties");
|
621 |
|
|
testProperties.load();
|
622 |
|
|
String metacatContextDir = testProperties.getProperty("metacat.contextDir");
|
623 |
|
|
PropertyService.getInstance(metacatContextDir + "/WEB-INF");
|
624 |
|
|
|
625 |
|
|
testGenerate();
|
626 |
|
|
// testSummary();
|
627 |
|
|
System.exit(0);
|
628 |
|
|
}
|
629 |
|
|
|
630 |
|
|
public static void testGenerate() throws Exception {
|
631 |
|
|
Identifier metadataPid = new Identifier();
|
632 |
8743
|
leinfelder
|
metadataPid.setValue("tao.1.4");
|
633 |
8702
|
leinfelder
|
DatapackageSummarizer ds = new DatapackageSummarizer();
|
634 |
|
|
String rdfString = ds.generateAnnotation(metadataPid);
|
635 |
|
|
logMetacat.info("RDF annotation: \n" + rdfString);
|
636 |
8646
|
leinfelder
|
|
637 |
8702
|
leinfelder
|
}
|
638 |
|
|
|
639 |
|
|
public static void testSummary() throws Exception {
|
640 |
|
|
|
641 |
8646
|
leinfelder
|
// summarize the packages
|
642 |
|
|
DatapackageSummarizer ds = new DatapackageSummarizer();
|
643 |
|
|
List<Identifier> identifiers = new ArrayList<Identifier>();
|
644 |
8689
|
leinfelder
|
Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
|
645 |
|
|
|
646 |
|
|
// select the metadata ids we want to summarize
|
647 |
|
|
boolean includeReplicas = true;
|
648 |
|
|
Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
|
649 |
|
|
if (includeReplicas ) {
|
650 |
|
|
codeIter = serverCodes.keySet().iterator();
|
651 |
|
|
}
|
652 |
8646
|
leinfelder
|
|
653 |
8689
|
leinfelder
|
Vector<String> idList = new Vector<String>();
|
654 |
|
|
while (codeIter.hasNext()) {
|
655 |
|
|
int serverLocation = codeIter.next();
|
656 |
|
|
Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
|
657 |
|
|
Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
|
658 |
|
|
Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
|
659 |
|
|
Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
|
660 |
|
|
|
661 |
|
|
idList.addAll(idList0);
|
662 |
|
|
idList.addAll(idList1);
|
663 |
|
|
idList.addAll(idList2);
|
664 |
|
|
idList.addAll(idList3);
|
665 |
8646
|
leinfelder
|
|
666 |
8689
|
leinfelder
|
}
|
667 |
|
|
|
668 |
|
|
// go through all the identifiers now
|
669 |
8646
|
leinfelder
|
for (String localId : idList) {
|
670 |
|
|
try {
|
671 |
|
|
String guid = IdentifierManager.getInstance().getGUID(
|
672 |
|
|
DocumentUtil.getDocIdFromAccessionNumber(localId),
|
673 |
|
|
DocumentUtil.getRevisionFromAccessionNumber(localId));
|
674 |
|
|
Identifier pid = new Identifier();
|
675 |
|
|
pid.setValue(guid);
|
676 |
|
|
identifiers.add(pid);
|
677 |
|
|
} catch (McdbDocNotFoundException nfe) {
|
678 |
|
|
// just skip it
|
679 |
|
|
continue;
|
680 |
|
|
}
|
681 |
|
|
}
|
682 |
|
|
ds.summarize(identifiers);
|
683 |
|
|
System.exit(0);
|
684 |
|
|
}
|
685 |
|
|
|
686 |
|
|
}
|