1 |
1 |
package edu.ucsb.nceas.metacat.annotation;
|
2 |
2 |
|
3 |
3 |
import java.io.InputStream;
|
|
4 |
import java.io.StringWriter;
|
4 |
5 |
import java.sql.PreparedStatement;
|
5 |
6 |
import java.sql.SQLException;
|
6 |
7 |
import java.util.ArrayList;
|
... | ... | |
21 |
22 |
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
|
22 |
23 |
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
|
23 |
24 |
|
|
25 |
import com.hp.hpl.jena.ontology.AllValuesFromRestriction;
|
|
26 |
import com.hp.hpl.jena.ontology.Individual;
|
|
27 |
import com.hp.hpl.jena.ontology.ObjectProperty;
|
|
28 |
import com.hp.hpl.jena.ontology.OntClass;
|
|
29 |
import com.hp.hpl.jena.ontology.OntModel;
|
|
30 |
import com.hp.hpl.jena.ontology.Ontology;
|
|
31 |
import com.hp.hpl.jena.rdf.model.ModelFactory;
|
|
32 |
import com.hp.hpl.jena.rdf.model.Property;
|
|
33 |
import com.hp.hpl.jena.rdf.model.Resource;
|
|
34 |
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
|
|
35 |
|
24 |
36 |
import edu.ucsb.nceas.metacat.DBUtil;
|
25 |
37 |
import edu.ucsb.nceas.metacat.DocumentImpl;
|
26 |
38 |
import edu.ucsb.nceas.metacat.IdentifierManager;
|
... | ... | |
37 |
49 |
|
38 |
50 |
private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
|
39 |
51 |
|
40 |
|
public void summarize(List<Identifier> identifiers) throws SQLException {
|
|
52 |
public static String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
|
53 |
public static String rdfs = "http://www.w3.org/2000/01/rdf-schema#";
|
|
54 |
public static String owl = "http://www.w3.org/2002/07/owl#";
|
|
55 |
public static String oboe = "http://ecoinformatics.org/oboe/oboe.1.0/oboe.owl#";
|
|
56 |
public static String oboe_core = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#";
|
|
57 |
public static String oa = "http://www.w3.org/ns/oa#";
|
|
58 |
public static String oa_source = "http://www.w3.org/ns/oa.rdf";
|
|
59 |
public static String dcterms = "http://purl.org/dc/terms/";
|
|
60 |
public static String dcterms_source = "http://dublincore.org/2012/06/14/dcterms.rdf";
|
|
61 |
public static String foaf = "http://xmlns.com/foaf/0.1/";
|
|
62 |
public static String foaf_source = "http://xmlns.com/foaf/spec/index.rdf";
|
|
63 |
public static String prov = "http://www.w3.org/ns/prov#";
|
|
64 |
public static String prov_source = "http://www.w3.org/ns/prov.owl";
|
|
65 |
public static String cito = "http://purl.org/spar/cito/";
|
|
66 |
|
|
67 |
/**
|
|
68 |
* Generate annotation for given metadata identifier
|
|
69 |
* @param metadataPid
|
|
70 |
*/
|
|
71 |
public String generateAnnotation(Identifier metadataPid) throws Exception {
|
|
72 |
|
|
73 |
DataPackage dataPackage = this.getDataPackage(metadataPid);
|
|
74 |
|
|
75 |
OntModel m = ModelFactory.createOntologyModel();
|
|
76 |
Ontology ont = m.createOntology("http://annotation/" + metadataPid.getValue());
|
41 |
77 |
|
|
78 |
// TODO: import the ontologies we use
|
|
79 |
ont.addImport(m.createResource(oboe));
|
|
80 |
m.addSubModel(ModelFactory.createOntologyModel().read(oboe));
|
|
81 |
|
|
82 |
ont.addImport(m.createResource(oa));
|
|
83 |
m.addSubModel(ModelFactory.createOntologyModel().read(oa_source));
|
|
84 |
|
|
85 |
ont.addImport(m.createResource(dcterms));
|
|
86 |
m.addSubModel(ModelFactory.createOntologyModel().read(dcterms_source));
|
|
87 |
|
|
88 |
ont.addImport(m.createResource(foaf));
|
|
89 |
m.addSubModel(ModelFactory.createOntologyModel().read(foaf_source));
|
|
90 |
|
|
91 |
ont.addImport(m.createResource(prov));
|
|
92 |
//m.addSubModel(ModelFactory.createOntologyModel().read(prov_source));
|
|
93 |
|
|
94 |
ont.addImport(m.createResource(cito));
|
|
95 |
|
|
96 |
// properties
|
|
97 |
ObjectProperty hasBodyProperty = m.getObjectProperty(oa + "hasBody");
|
|
98 |
ObjectProperty hasTargetProperty = m.getObjectProperty(oa + "hasTarget");
|
|
99 |
ObjectProperty hasSourceProperty = m.getObjectProperty(oa + "hasSource");
|
|
100 |
ObjectProperty annotatedByProperty = m.getObjectProperty(oa + "annotatedBy");
|
|
101 |
Property identifierProperty = m.getProperty(dcterms + "identifier");
|
|
102 |
Property nameProperty = m.getProperty(foaf + "name");
|
|
103 |
|
|
104 |
ObjectProperty ofCharacteristic = m.getObjectProperty(oboe_core + "ofCharacteristic");
|
|
105 |
ObjectProperty usesStandard = m.getObjectProperty(oboe_core + "usesStandard");
|
|
106 |
|
|
107 |
// classes
|
|
108 |
OntClass measurementClass = m.getOntClass(oboe_core + "Measurement");
|
|
109 |
OntClass characteristicClass = m.getOntClass(oboe_core + "Characteristic");
|
|
110 |
OntClass standardClass = m.getOntClass(oboe_core + "Standard");
|
|
111 |
|
|
112 |
Resource annotationClass = m.getOntClass(oa + "Annotation");
|
|
113 |
Resource specificResourceClass = m.getOntClass(oa + "SpecificResource");
|
|
114 |
Resource entityClass = m.getResource(prov + "Entity");
|
|
115 |
Resource personClass = m.getResource(prov + "Person");
|
|
116 |
|
|
117 |
int cnt = 0;
|
|
118 |
|
|
119 |
// these apply to every attribute annotation
|
|
120 |
Individual meta1 = m.createIndividual(ont.getURI() + "#meta" + cnt, entityClass);
|
|
121 |
Individual p1 = m.createIndividual(ont.getURI() + "#person" + cnt, personClass);
|
|
122 |
p1.addProperty(nameProperty, "Ben Leinfelder");
|
|
123 |
meta1.addProperty(identifierProperty, metadataPid.getValue());
|
|
124 |
|
|
125 |
// loop through the tables and attributes
|
|
126 |
Entity[] entities = dataPackage.getEntityList();
|
|
127 |
for (Entity entity: entities) {
|
|
128 |
String entityName = entity.getName();
|
|
129 |
logMetacat.debug("Entity name: " + entityName);
|
|
130 |
Attribute[] attributes = entity.getAttributeList().getAttributes();
|
|
131 |
for (Attribute attribute: attributes) {
|
|
132 |
|
|
133 |
String attributeName = attribute.getName();
|
|
134 |
String attributeLabel = attribute.getLabel();
|
|
135 |
String attributeDefinition = attribute.getDefinition();
|
|
136 |
String attributeType = attribute.getAttributeType();
|
|
137 |
String attributeScale = attribute.getMeasurementScale();
|
|
138 |
String attributeUnitType = attribute.getUnitType();
|
|
139 |
String attributeUnit = attribute.getUnit();
|
|
140 |
String attributeDomain = attribute.getDomain().getClass().getSimpleName();
|
|
141 |
|
|
142 |
logMetacat.debug("Attribute name: " + attributeName);
|
|
143 |
logMetacat.debug("Attribute label: " + attributeLabel);
|
|
144 |
logMetacat.debug("Attribute definition: " + attributeDefinition);
|
|
145 |
logMetacat.debug("Attribute type: " + attributeType);
|
|
146 |
logMetacat.debug("Attribute scale: " + attributeScale);
|
|
147 |
logMetacat.debug("Attribute unit type: " + attributeUnitType);
|
|
148 |
logMetacat.debug("Attribute unit: " + attributeUnit);
|
|
149 |
logMetacat.debug("Attribute domain: " + attributeDomain);
|
|
150 |
|
|
151 |
// look up the characteristic or standard subclasses
|
|
152 |
Resource standard = this.lookupStandard(standardClass, attribute);
|
|
153 |
Resource characteristic = this.lookupCharacteristic(characteristicClass, attribute);
|
|
154 |
|
|
155 |
if (standard != null || characteristic != null) {
|
|
156 |
|
|
157 |
// instances
|
|
158 |
Individual m1 = m.createIndividual(ont.getURI() + "#measurement" + cnt, measurementClass);
|
|
159 |
Individual a1 = m.createIndividual(ont.getURI() + "#annotation" + cnt, annotationClass);
|
|
160 |
Individual t1 = m.createIndividual(ont.getURI() + "#target" + cnt, specificResourceClass);
|
|
161 |
|
|
162 |
// statements about the annotation
|
|
163 |
a1.addProperty(hasBodyProperty, m1);
|
|
164 |
a1.addProperty(hasTargetProperty, t1);
|
|
165 |
t1.addProperty(hasSourceProperty, meta1);
|
|
166 |
a1.addProperty(annotatedByProperty, p1);
|
|
167 |
|
|
168 |
// describe the measurement in terms of restrictions
|
|
169 |
if (standard != null) {
|
|
170 |
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, usesStandard, standard);
|
|
171 |
m1.addOntClass(avfr);
|
|
172 |
}
|
|
173 |
if (characteristic != null) {
|
|
174 |
AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofCharacteristic, characteristic);
|
|
175 |
m1.addOntClass(avfr);
|
|
176 |
}
|
|
177 |
cnt++;
|
|
178 |
}
|
|
179 |
|
|
180 |
}
|
|
181 |
}
|
|
182 |
|
|
183 |
StringWriter sw = new StringWriter();
|
|
184 |
// only write the base model
|
|
185 |
//m.write(sw, "RDF/XML-ABBREV");
|
|
186 |
m.write(sw, null);
|
|
187 |
|
|
188 |
return sw.toString();
|
|
189 |
|
|
190 |
}
|
|
191 |
|
|
192 |
private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
|
|
193 |
// what's our unit?
|
|
194 |
String unit = attribute.getUnit().toLowerCase();
|
|
195 |
boolean found = false;
|
|
196 |
ExtendedIterator iter = standardClass.listSubClasses(false);
|
|
197 |
while (iter.hasNext()) {
|
|
198 |
OntClass subclass = (OntClass) iter.next();
|
|
199 |
String subclassName = subclass.getLocalName().toLowerCase();
|
|
200 |
logMetacat.debug("subclass: " + subclassName);
|
|
201 |
if (subclassName.equals(unit)) {
|
|
202 |
found = true;
|
|
203 |
}
|
|
204 |
if (subclass.hasLabel(unit, null)) {
|
|
205 |
found = true;
|
|
206 |
}
|
|
207 |
if (found) {
|
|
208 |
return subclass;
|
|
209 |
}
|
|
210 |
}
|
|
211 |
return null;
|
|
212 |
}
|
|
213 |
|
|
214 |
private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
|
|
215 |
// what's our label?
|
|
216 |
String label = attribute.getLabel().toLowerCase();
|
|
217 |
boolean found = false;
|
|
218 |
// find something that matches
|
|
219 |
ExtendedIterator iter = characteristicClass.listSubClasses();
|
|
220 |
while (iter.hasNext()) {
|
|
221 |
OntClass subclass = (OntClass) iter.next();
|
|
222 |
String subclassName = subclass.getLocalName().toLowerCase();
|
|
223 |
logMetacat.debug("subclass: " + subclassName);
|
|
224 |
if (subclassName.equals(label)) {
|
|
225 |
found = true;
|
|
226 |
}
|
|
227 |
if (subclass.hasLabel(label, null)) {
|
|
228 |
found = true;
|
|
229 |
}
|
|
230 |
if (found) {
|
|
231 |
return subclass;
|
|
232 |
}
|
|
233 |
}
|
|
234 |
return null;
|
|
235 |
}
|
|
236 |
|
|
237 |
private DataPackage getDataPackage(Identifier pid) throws Exception {
|
|
238 |
// for using the MN API as the MN itself
|
|
239 |
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
|
|
240 |
Session session = new Session();
|
|
241 |
Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
|
|
242 |
session.setSubject(subject);
|
|
243 |
InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
|
|
244 |
|
|
245 |
// parse the metadata
|
|
246 |
DataPackageParserInterface parser = new Eml200DataPackageParser();
|
|
247 |
parser.parse(emlStream);
|
|
248 |
DataPackage dataPackage = parser.getDataPackage();
|
|
249 |
return dataPackage;
|
|
250 |
}
|
|
251 |
|
|
252 |
private void summarize(List<Identifier> identifiers) throws SQLException {
|
|
253 |
|
42 |
254 |
DBConnection dbconn = null;
|
43 |
255 |
|
44 |
256 |
try {
|
... | ... | |
73 |
285 |
|
74 |
286 |
try {
|
75 |
287 |
|
76 |
|
// for using the MN API as the MN itself
|
77 |
|
MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
|
78 |
|
Session session = new Session();
|
79 |
|
Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
|
80 |
|
session.setSubject(subject);
|
81 |
|
InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
|
82 |
|
|
83 |
|
// parse the metadata
|
84 |
|
DataPackageParserInterface parser = new Eml200DataPackageParser();
|
85 |
|
parser.parse(emlStream);
|
86 |
|
DataPackage dataPackage = parser.getDataPackage();
|
|
288 |
// get the package
|
|
289 |
DataPackage dataPackage = this.getDataPackage(pid);
|
87 |
290 |
String title = dataPackage.getTitle();
|
88 |
291 |
logMetacat.debug("Title: " + title);
|
89 |
292 |
|
... | ... | |
146 |
349 |
}
|
147 |
350 |
|
148 |
351 |
public static void main(String[] args) throws Exception {
|
149 |
|
|
150 |
352 |
// set up the properties based on the test/deployed configuration of the workspace
|
151 |
|
SortedProperties testProperties = new SortedProperties("test/test.properties");
|
152 |
|
testProperties.load();
|
153 |
|
String metacatContextDir = testProperties.getProperty("metacat.contextDir");
|
154 |
|
PropertyService.getInstance(metacatContextDir + "/WEB-INF");
|
|
353 |
SortedProperties testProperties = new SortedProperties("test/test.properties");
|
|
354 |
testProperties.load();
|
|
355 |
String metacatContextDir = testProperties.getProperty("metacat.contextDir");
|
|
356 |
PropertyService.getInstance(metacatContextDir + "/WEB-INF");
|
|
357 |
|
|
358 |
testGenerate();
|
|
359 |
// testSummary();
|
|
360 |
System.exit(0);
|
|
361 |
}
|
|
362 |
|
|
363 |
public static void testGenerate() throws Exception {
|
|
364 |
Identifier metadataPid = new Identifier();
|
|
365 |
metadataPid.setValue("doi:10.5072/FK2445ZN4");
|
|
366 |
DatapackageSummarizer ds = new DatapackageSummarizer();
|
|
367 |
String rdfString = ds.generateAnnotation(metadataPid);
|
|
368 |
logMetacat.info("RDF annotation: \n" + rdfString);
|
155 |
369 |
|
|
370 |
}
|
|
371 |
|
|
372 |
public static void testSummary() throws Exception {
|
|
373 |
|
156 |
374 |
// summarize the packages
|
157 |
375 |
DatapackageSummarizer ds = new DatapackageSummarizer();
|
158 |
376 |
List<Identifier> identifiers = new ArrayList<Identifier>();
|
first pass at generating annotations from EML attribute information. uses the OpenAnnotation model that the metacat-index tests assume which allows us to populate dynamic index fields for the annotation class[es]. There is still much to be done with finding appropriate concepts for each attribute. https://projects.ecoinformatics.org/ecoinfo/issues/6256