Project

General

Profile

1 8646 leinfelder
package edu.ucsb.nceas.metacat.annotation;
2
3
import java.io.InputStream;
4 8702 leinfelder
import java.io.StringWriter;
5 8646 leinfelder
import java.sql.PreparedStatement;
6
import java.sql.SQLException;
7
import java.util.ArrayList;
8 8689 leinfelder
import java.util.Arrays;
9 8757 leinfelder
import java.util.HashMap;
10 8689 leinfelder
import java.util.Iterator;
11 8646 leinfelder
import java.util.List;
12 8689 leinfelder
import java.util.Map;
13 8646 leinfelder
import java.util.Vector;
14
15 8757 leinfelder
import org.apache.commons.io.IOUtils;
16 8646 leinfelder
import org.apache.log4j.Logger;
17
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
18
import org.dataone.service.types.v1.Identifier;
19
import org.dataone.service.types.v1.Session;
20
import org.dataone.service.types.v1.Subject;
21
import org.ecoinformatics.datamanager.parser.Attribute;
22
import org.ecoinformatics.datamanager.parser.DataPackage;
23
import org.ecoinformatics.datamanager.parser.Entity;
24 8788 leinfelder
import org.ecoinformatics.datamanager.parser.Party;
25 8646 leinfelder
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
26
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
27
28 8702 leinfelder
import com.hp.hpl.jena.ontology.AllValuesFromRestriction;
29
import com.hp.hpl.jena.ontology.Individual;
30
import com.hp.hpl.jena.ontology.ObjectProperty;
31
import com.hp.hpl.jena.ontology.OntClass;
32 8769 leinfelder
import com.hp.hpl.jena.ontology.OntDocumentManager;
33 8702 leinfelder
import com.hp.hpl.jena.ontology.OntModel;
34
import com.hp.hpl.jena.ontology.Ontology;
35 8757 leinfelder
import com.hp.hpl.jena.query.Dataset;
36
import com.hp.hpl.jena.query.Query;
37
import com.hp.hpl.jena.query.QueryExecution;
38
import com.hp.hpl.jena.query.QueryExecutionFactory;
39
import com.hp.hpl.jena.query.QueryFactory;
40
import com.hp.hpl.jena.query.QuerySolution;
41
import com.hp.hpl.jena.query.ResultSet;
42 8702 leinfelder
import com.hp.hpl.jena.rdf.model.ModelFactory;
43
import com.hp.hpl.jena.rdf.model.Property;
44
import com.hp.hpl.jena.rdf.model.Resource;
45 8757 leinfelder
import com.hp.hpl.jena.tdb.TDBFactory;
46 8702 leinfelder
47 8646 leinfelder
import edu.ucsb.nceas.metacat.DBUtil;
48
import edu.ucsb.nceas.metacat.DocumentImpl;
49
import edu.ucsb.nceas.metacat.IdentifierManager;
50
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
51
import edu.ucsb.nceas.metacat.database.DBConnection;
52
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
53
import edu.ucsb.nceas.metacat.dataone.MNodeService;
54 8757 leinfelder
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
55 8646 leinfelder
import edu.ucsb.nceas.metacat.properties.PropertyService;
56 8689 leinfelder
import edu.ucsb.nceas.metacat.replication.ReplicationService;
57 8646 leinfelder
import edu.ucsb.nceas.metacat.util.DocumentUtil;
58
import edu.ucsb.nceas.utilities.SortedProperties;
59
60
public class DatapackageSummarizer {
61
62
	private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
63
64 8702 leinfelder
	public static String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
65
	public static String rdfs = "http://www.w3.org/2000/01/rdf-schema#";
66
	public static String owl = "http://www.w3.org/2002/07/owl#";
67
	public static String oboe = "http://ecoinformatics.org/oboe/oboe.1.0/oboe.owl#";
68
	public static String oboe_core = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#";
69 8717 leinfelder
	public static String oboe_characteristics = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-characteristics.owl#";
70 8724 leinfelder
	public static String oboe_sbc = "http://ecoinformatics.org/oboe-ext/sbclter.1.0/oboe-sbclter.owl#";
71 8702 leinfelder
	public static String oa = "http://www.w3.org/ns/oa#";
72
	public static String oa_source = "http://www.w3.org/ns/oa.rdf";
73
	public static String dcterms = "http://purl.org/dc/terms/";
74
	public static String dcterms_source = "http://dublincore.org/2012/06/14/dcterms.rdf";
75
	public static String foaf = "http://xmlns.com/foaf/0.1/";
76
	public static String foaf_source = "http://xmlns.com/foaf/spec/index.rdf";
77
    public static String prov = "http://www.w3.org/ns/prov#";
78
    public static String prov_source = "http://www.w3.org/ns/prov.owl";
79
    public static String cito =  "http://purl.org/spar/cito/";
80 8709 leinfelder
81 8717 leinfelder
	public static String OBOE_SBC = "OBOE-SBC";
82 8769 leinfelder
83
	private static boolean cacheInitialized;
84
85
	private static void initializeCache() {
86
		if (!cacheInitialized) {
87
			// cache the ontologies we use
88
			OntDocumentManager.getInstance().addModel(oboe, ModelFactory.createOntologyModel().read(oboe));
89
			OntDocumentManager.getInstance().addModel(oboe_sbc, ModelFactory.createOntologyModel().read(oboe_sbc));
90
			OntDocumentManager.getInstance().addModel(oa, ModelFactory.createOntologyModel().read(oa_source));
91
			OntDocumentManager.getInstance().addModel(dcterms, ModelFactory.createOntologyModel().read(dcterms_source));
92
			OntDocumentManager.getInstance().addModel(foaf, ModelFactory.createOntologyModel().read(foaf_source));
93
			OntDocumentManager.getInstance().addModel(prov, ModelFactory.createOntologyModel().read(prov));
94
			OntDocumentManager.getInstance().addModel(cito, ModelFactory.createOntologyModel().read(cito));
95
			cacheInitialized = true;
96
		}
97
	}
98 8717 leinfelder
99 8757 leinfelder
    public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
100 8702 leinfelder
101 8757 leinfelder
    	// generate an annotation for the metadata given
102
		String rdfContent = this.generateAnnotation(metadataPid);
103
104
		// load to triple store
105 8765 leinfelder
		//Dataset dataset = TDBFactory.createDataset("./tbd");
106 8767 leinfelder
		Dataset dataset = TDBFactory.createDataset();
107
		//Dataset dataset = DatasetFactory.createMem();
108 8757 leinfelder
109
    	// read the annotation into the triplestore
110
		InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
111 8763 leinfelder
    	String name = "http://annotation/" + metadataPid.getValue();
112 8757 leinfelder
    	boolean loaded = dataset.containsNamedModel(name);
113
    	if (loaded) {
114
    		dataset.removeNamedModel(name);
115
    		loaded = false;
116
    	}
117 8765 leinfelder
		OntModel ontModel = null;
118 8757 leinfelder
		if (!loaded) {
119 8765 leinfelder
			ontModel = ModelFactory.createOntologyModel();
120 8757 leinfelder
			ontModel.read(source, name);
121
			dataset.addNamedModel(name, ontModel);
122
		}
123
124
		// query for fields to add to index
125
        Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
126
127
        // TODO: look up the query to use (support multiple like in the indexing project)
128 8763 leinfelder
        List<String> queries = new ArrayList<String>();
129
        queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
130 8757 leinfelder
        	+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
131
        	+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
132
			+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
133
			+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
134
			+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
135 8763 leinfelder
			+ "SELECT ?standard_sm ?pid "
136 8757 leinfelder
			+ "FROM <$GRAPH_NAME> "
137
			+ "WHERE { "
138
			+ "		?measurement rdf:type oboe-core:Measurement . "
139
			+ "		?measurement rdf:type ?restriction . "
140
			+ "		?restriction owl:onProperty oboe-core:usesStandard . "
141
			+ "		?restriction owl:allValuesFrom ?standard . "
142
			+ "		?standard rdfs:subClassOf+ ?standard_sm . "
143
			+ "		?standard_sm rdfs:subClassOf oboe-core:Standard . "
144
			+ "		?annotation oa:hasBody ?measurement . "
145
			+ "		?annotation oa:hasTarget ?target . "
146
			+ "		?target oa:hasSource ?metadata . "
147 8763 leinfelder
			+ "		?metadata dcterms:identifier ?pid . "
148
			+ "}");
149
150
        queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
151
    		+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
152
    		+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
153
    		+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
154
    		+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
155
    		+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
156
    		+ "SELECT ?characteristic_sm ?pid "
157
    		+ "FROM <$GRAPH_NAME>"
158
    		+ "WHERE { "
159
    		+ "		?measurement rdf:type oboe-core:Measurement . "
160
    		+ "		?measurement rdf:type ?restriction . "
161
			+ "		?restriction owl:onProperty oboe-core:ofCharacteristic . "
162
			+ "		?restriction owl:allValuesFrom ?characteristic . "
163
			+ "		?characteristic rdfs:subClassOf+ ?characteristic_sm . "
164
			+ "		?characteristic_sm rdfs:subClassOf oboe-core:Characteristic . "
165
			+ "		?annotation oa:hasBody ?measurement .	"
166
			+ "		?annotation oa:hasTarget ?target . "
167
			+ "		?target oa:hasSource ?metadata . "
168
			+ "		?metadata dcterms:identifier ?pid . "
169
			+ "}");
170
171
        for (String q: queries) {
172
	        q = q.replaceAll("\\$GRAPH_NAME", name);
173
			Query query = QueryFactory.create(q);
174
			QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
175
			ResultSet results = qexec.execSelect();
176 8757 leinfelder
177 8763 leinfelder
			while (results.hasNext()) {
178
				QuerySolution solution = results.next();
179
				System.out.println(solution.toString());
180 8757 leinfelder
181 8763 leinfelder
				// find the index document we are trying to augment with the annotation
182
				if (solution.contains("pid")) {
183
					String id = solution.getLiteral("pid").getString();
184
					if (!id.equals(metadataPid.getValue())) {
185
						// skip any solution that does not annotate the given pid
186
						continue;
187
					}
188
189 8757 leinfelder
				}
190 8763 leinfelder
				// loop through the solution variables, add an index value for each
191
				Iterator<String> varNameIter = solution.varNames();
192
				while (varNameIter.hasNext()) {
193
					String key = varNameIter.next();
194
					if (key.equals("pid")) {
195
						// don't include the id
196
						continue;
197
					}
198
					String value = solution.get(key).toString();
199
					List<Object> values = fields.get(key);
200
					if (values  == null) {
201
						values = new ArrayList<Object>();
202
					}
203
					values.add(value);
204
					fields.put(key, values);
205 8757 leinfelder
				}
206
			}
207 8763 leinfelder
        }
208 8757 leinfelder
209 8765 leinfelder
        // remove the graph to save storage
210
//        ontModel.removeAll();
211
//        ontModel.commit();
212
//        ontModel.close();
213
		dataset.removeNamedModel(name);
214
215 8757 leinfelder
		// clean up the triple store
216
		TDBFactory.release(dataset);
217
218
		// add to index
219
		MetacatSolrIndex.getInstance().submit(metadataPid, null, fields, true);
220
221
222
	}
223
224 8702 leinfelder
    /**
225
     * Generate annotation for given metadata identifier
226
     * @param metadataPid
227
     */
228
    public String generateAnnotation(Identifier metadataPid) throws Exception {
229
230
    	DataPackage dataPackage = this.getDataPackage(metadataPid);
231
232
		OntModel m = ModelFactory.createOntologyModel();
233
		Ontology ont = m.createOntology("http://annotation/" + metadataPid.getValue());
234 8646 leinfelder
235 8702 leinfelder
		// TODO: import the ontologies we use
236 8769 leinfelder
		initializeCache();
237
238 8702 leinfelder
		ont.addImport(m.createResource(oboe));
239 8769 leinfelder
		m.addSubModel(OntDocumentManager.getInstance().getModel(oboe));
240 8702 leinfelder
241 8724 leinfelder
		ont.addImport(m.createResource(oboe_sbc));
242 8769 leinfelder
		m.addSubModel(OntDocumentManager.getInstance().getModel(oboe_sbc));
243 8724 leinfelder
244 8702 leinfelder
		ont.addImport(m.createResource(oa));
245 8769 leinfelder
		m.addSubModel(OntDocumentManager.getInstance().getModel(oa));
246 8702 leinfelder
247
		ont.addImport(m.createResource(dcterms));
248 8769 leinfelder
		m.addSubModel(OntDocumentManager.getInstance().getModel(dcterms));
249 8702 leinfelder
250
		ont.addImport(m.createResource(foaf));
251 8769 leinfelder
		m.addSubModel(OntDocumentManager.getInstance().getModel(foaf));
252 8702 leinfelder
253
		ont.addImport(m.createResource(prov));
254
		//m.addSubModel(ModelFactory.createOntologyModel().read(prov_source));
255
256
		ont.addImport(m.createResource(cito));
257
258
		// properties
259
		ObjectProperty hasBodyProperty = m.getObjectProperty(oa + "hasBody");
260
		ObjectProperty hasTargetProperty = m.getObjectProperty(oa + "hasTarget");
261
		ObjectProperty hasSourceProperty = m.getObjectProperty(oa + "hasSource");
262 8710 leinfelder
		ObjectProperty hasSelectorProperty = m.getObjectProperty(oa + "hasSelector");
263 8702 leinfelder
		ObjectProperty annotatedByProperty = m.getObjectProperty(oa + "annotatedBy");
264
		Property identifierProperty = m.getProperty(dcterms + "identifier");
265 8710 leinfelder
		Property conformsToProperty = m.getProperty(dcterms + "conformsTo");
266 8718 leinfelder
		Property wasAttributedTo = m.getProperty(prov + "wasAttributedTo");
267 8702 leinfelder
		Property nameProperty = m.getProperty(foaf + "name");
268 8710 leinfelder
		Property rdfValue = m.getProperty(rdf + "value");
269 8702 leinfelder
270
		ObjectProperty ofCharacteristic = m.getObjectProperty(oboe_core + "ofCharacteristic");
271
		ObjectProperty usesStandard = m.getObjectProperty(oboe_core + "usesStandard");
272 8743 leinfelder
		ObjectProperty ofEntity = m.getObjectProperty(oboe_core + "ofEntity");
273
		ObjectProperty hasMeasurement = m.getObjectProperty(oboe_core + "hasMeasurement");
274 8702 leinfelder
275
		// classes
276 8743 leinfelder
		OntClass entityClass =  m.getOntClass(oboe_core + "Entity");
277
		OntClass observationClass =  m.getOntClass(oboe_core + "Observation");
278 8702 leinfelder
		OntClass measurementClass =  m.getOntClass(oboe_core + "Measurement");
279
		OntClass characteristicClass = m.getOntClass(oboe_core + "Characteristic");
280
		OntClass standardClass =  m.getOntClass(oboe_core + "Standard");
281
282
		Resource annotationClass =  m.getOntClass(oa + "Annotation");
283
		Resource specificResourceClass =  m.getOntClass(oa + "SpecificResource");
284 8710 leinfelder
		Resource fragmentSelectorClass =  m.getOntClass(oa + "FragmentSelector");
285 8743 leinfelder
		Resource provEntityClass =  m.getResource(prov + "Entity");
286 8702 leinfelder
		Resource personClass =  m.getResource(prov + "Person");
287 8718 leinfelder
288 8702 leinfelder
		// these apply to every attribute annotation
289 8743 leinfelder
		Individual meta1 = m.createIndividual(ont.getURI() + "#meta", provEntityClass);
290 8702 leinfelder
		meta1.addProperty(identifierProperty, metadataPid.getValue());
291
292 8718 leinfelder
		// decide who should be credited with the package
293
		Individual p1 = null;
294 8716 leinfelder
295 8718 leinfelder
		// look up creators from the EML metadata
296 8788 leinfelder
		List<Party> creators = dataPackage.getCreators();
297 8716 leinfelder
		//creators = Arrays.asList("Matthew Jones");
298 8718 leinfelder
		if (creators != null && creators.size() > 0) {
299
			// use an orcid if we can find one from their system
300 8788 leinfelder
			String orcidUri = OrcidService.lookupOrcid(creators.get(0).getOrganization(), creators.get(0).getSurName(), creators.get(0).getGivenNames(), null);
301 8718 leinfelder
			if (orcidUri != null) {
302
				p1 = m.createIndividual(orcidUri, personClass);
303
				p1.addProperty(identifierProperty, orcidUri);
304
			} else {
305
				p1 = m.createIndividual(ont.getURI() + "#person", personClass);
306
			}
307
			// include the name we have in the metadata
308 8788 leinfelder
			if (creators.get(0).getSurName() != null) {
309
				p1.addProperty(nameProperty, creators.get(0).getSurName());
310
			} else if (creators.get(0).getOrganization() != null) {
311
				p1.addProperty(nameProperty, creators.get(0).getOrganization());
312
			}
313 8716 leinfelder
		}
314
315 8718 leinfelder
		// attribute the package to this creator if we have one
316
		if (p1 != null) {
317
			meta1.addProperty(wasAttributedTo, p1);
318
		}
319
320 8702 leinfelder
		// loop through the tables and attributes
321 8714 leinfelder
		int entityCount = 1;
322 8702 leinfelder
		Entity[] entities = dataPackage.getEntityList();
323 8724 leinfelder
		if (entities != null) {
324
			for (Entity entity: entities) {
325
				String entityName = entity.getName();
326 8743 leinfelder
327
				Individual o1 = m.createIndividual(ont.getURI() + "#observation" + entityCount, observationClass);
328
				Resource entityConcept = lookupEntity(entityClass, entity);
329
				if (entityConcept != null) {
330
					AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofEntity, entityConcept);
331
					o1.addOntClass(avfr);
332
				}
333
334 8724 leinfelder
				logMetacat.debug("Entity name: " + entityName);
335
				Attribute[] attributes = entity.getAttributeList().getAttributes();
336
				int attributeCount = 1;
337
				if (attributes != null) {
338
					for (Attribute attribute: attributes) {
339
340
						// for naming the individuals uniquely
341
						String cnt = entityCount + "_" + attributeCount;
342
343
						String attributeName = attribute.getName();
344
						String attributeLabel = attribute.getLabel();
345
						String attributeDefinition = attribute.getDefinition();
346
						String attributeType = attribute.getAttributeType();
347
						String attributeScale = attribute.getMeasurementScale();
348
						String attributeUnitType = attribute.getUnitType();
349
						String attributeUnit = attribute.getUnit();
350
						String attributeDomain = attribute.getDomain().getClass().getSimpleName();
351
352
						logMetacat.debug("Attribute name: " + attributeName);
353
						logMetacat.debug("Attribute label: " + attributeLabel);
354
						logMetacat.debug("Attribute definition: " + attributeDefinition);
355
						logMetacat.debug("Attribute type: " + attributeType);
356
						logMetacat.debug("Attribute scale: " + attributeScale);
357
						logMetacat.debug("Attribute unit type: " + attributeUnitType);
358
						logMetacat.debug("Attribute unit: " + attributeUnit);
359
						logMetacat.debug("Attribute domain: " + attributeDomain);
360 8702 leinfelder
361 8724 leinfelder
						// look up the characteristic or standard subclasses
362
						Resource standard = this.lookupStandard(standardClass, attribute);
363
						Resource characteristic = this.lookupCharacteristic(characteristicClass, attribute);
364
365
						if (standard != null || characteristic != null) {
366
367
							// instances
368
							Individual m1 = m.createIndividual(ont.getURI() + "#measurement" + cnt, measurementClass);
369
							Individual a1 = m.createIndividual(ont.getURI() + "#annotation" + cnt, annotationClass);
370
							Individual t1 = m.createIndividual(ont.getURI() + "#target" + cnt, specificResourceClass);
371 8910 leinfelder
							String xpointer = "xpointer(/eml/dataSet/dataTable[" + entityCount + "]/attributeList/attribute[" + attributeCount + "])";
372 8724 leinfelder
							Individual s1 = m.createIndividual(ont.getURI() + "#" + xpointer, fragmentSelectorClass);
373
							s1.addLiteral(rdfValue, xpointer);
374 8910 leinfelder
							s1.addProperty(conformsToProperty, "http://tools.ietf.org/rfc/rfc3023");
375
							//s1.addProperty(conformsToProperty, "http://www.w3.org/TR/xptr/");
376
377 8724 leinfelder
378
							// statements about the annotation
379
							a1.addProperty(hasBodyProperty, m1);
380
							a1.addProperty(hasTargetProperty, t1);
381
							t1.addProperty(hasSourceProperty, meta1);
382
							t1.addProperty(hasSelectorProperty, s1);
383
							//a1.addProperty(annotatedByProperty, p1);
384
385
							// describe the measurement in terms of restrictions
386
							if (standard != null) {
387
								AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, usesStandard, standard);
388
								m1.addOntClass(avfr);
389
							}
390
							if (characteristic != null) {
391
								AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofCharacteristic, characteristic);
392
								m1.addOntClass(avfr);
393
							}
394 8743 leinfelder
395
							// attach to the observation
396
							// TODO: evaluate whether the measurement can apply to the given observed entity
397
							o1.addProperty(hasMeasurement, m1);
398 8724 leinfelder
						}
399
						attributeCount++;
400
401 8702 leinfelder
					}
402
				}
403 8724 leinfelder
				entityCount++;
404 8714 leinfelder
			}
405 8702 leinfelder
		}
406
407
		StringWriter sw = new StringWriter();
408
		// only write the base model
409
		//m.write(sw, "RDF/XML-ABBREV");
410
		m.write(sw, null);
411
412
		return sw.toString();
413
414
	}
415
416
	private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
417
		// what's our unit?
418
		String unit = attribute.getUnit().toLowerCase();
419 8763 leinfelder
420
		/*
421
		boolean found = false;
422 8710 leinfelder
		List<String> tokens = Arrays.asList(unit.split(" "));
423 8702 leinfelder
		ExtendedIterator iter = standardClass.listSubClasses(false);
424
		while (iter.hasNext()) {
425
			OntClass subclass = (OntClass) iter.next();
426
			String subclassName = subclass.getLocalName().toLowerCase();
427
			logMetacat.debug("subclass: " + subclassName);
428 8710 leinfelder
			if (tokens.contains(subclassName)) {
429 8702 leinfelder
				found = true;
430
			}
431
			if (subclass.hasLabel(unit, null)) {
432
				found = true;
433
			}
434
			if (found) {
435
				return subclass;
436
			}
437
		}
438 8763 leinfelder
		*/
439
440 8711 leinfelder
		// try to look it up if we got this far
441 8717 leinfelder
		return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC);
442 8702 leinfelder
	}
443
444
	private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
445 8763 leinfelder
		// what are we looking for?
446 8702 leinfelder
		String label = attribute.getLabel().toLowerCase();
447 8763 leinfelder
		String definition = attribute.getDefinition();
448
		String text = label + " " + definition;
449 8710 leinfelder
450 8763 leinfelder
		/*
451
		// find something that matches
452 8702 leinfelder
		boolean found = false;
453 8763 leinfelder
		List<String> tokens = Arrays.asList(label.split(" "));
454 8702 leinfelder
		ExtendedIterator iter = characteristicClass.listSubClasses();
455
		while (iter.hasNext()) {
456
			OntClass subclass = (OntClass) iter.next();
457
			String subclassName = subclass.getLocalName().toLowerCase();
458
			logMetacat.debug("subclass: " + subclassName);
459 8710 leinfelder
			if (tokens.contains(subclassName)) {
460 8702 leinfelder
				found = true;
461
			}
462
			if (subclass.hasLabel(label, null)) {
463
				found = true;
464
			}
465
			if (found) {
466
				return subclass;
467
			}
468
		}
469 8763 leinfelder
		*/
470 8711 leinfelder
471 8763 leinfelder
		// try to look it up from the service
472
		return BioPortalService.lookupAnnotationClass(characteristicClass, text, OBOE_SBC);
473 8711 leinfelder
474
	}
475
476 8743 leinfelder
	private Resource lookupEntity(OntClass entityClass, Entity entity) {
477
		// what's our description like?
478
		String name = entity.getName();
479
		String definition = entity.getDefinition();
480
481
		// try to look it up if we got this far
482
		return BioPortalService.lookupAnnotationClass(entityClass, definition, OBOE_SBC);
483
484
	}
485
486 8702 leinfelder
	private DataPackage getDataPackage(Identifier pid) throws Exception {
487
		// for using the MN API as the MN itself
488
		MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
489
		Session session = new Session();
490
        Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
491
        session.setSubject(subject);
492
		InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
493
494
		// parse the metadata
495
		DataPackageParserInterface parser = new Eml200DataPackageParser();
496
		parser.parse(emlStream);
497
		DataPackage dataPackage = parser.getDataPackage();
498
		return dataPackage;
499
	}
500
501
	private void summarize(List<Identifier> identifiers) throws SQLException {
502
503 8646 leinfelder
		DBConnection dbconn = null;
504 9463 tao
		int serialNumber = -1;
505
		PreparedStatement dropStatement = null;
506
		PreparedStatement createStatement = null;
507
		PreparedStatement insertStatement = null;
508 8646 leinfelder
		try {
509
			dbconn = DBConnectionPool.getDBConnection("DatapackageSummarizer.summarize");
510 9463 tao
			serialNumber = dbconn.getCheckOutSerialNumber();
511
			dropStatement = dbconn.prepareStatement("DROP TABLE IF EXISTS entity_summary");
512 8646 leinfelder
			dropStatement.execute();
513
514 9463 tao
			createStatement = dbconn.prepareStatement(
515 8646 leinfelder
					"CREATE TABLE entity_summary (" +
516
					"guid text, " +
517
					"title text, " +
518
					"entity text," +
519
					"attributeName text," +
520
					"attributeLabel text," +
521
					"attributeDefinition text," +
522
					"attributeType text," +
523
					"attributeScale text," +
524
					"attributeUnitType text," +
525
					"attributeUnit text," +
526
					"attributeDomain text" +
527
					")");
528
			createStatement.execute();
529
530 9463 tao
			insertStatement = dbconn.prepareStatement(
531 8646 leinfelder
					"INSERT INTO entity_summary " +
532
					"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
533
534
			for (Identifier pid: identifiers) {
535
536
				logMetacat.debug("Parsing pid: " + pid.getValue());
537
538
				try {
539
540 8702 leinfelder
					// get the package
541
					DataPackage dataPackage = this.getDataPackage(pid);
542 8646 leinfelder
					String title = dataPackage.getTitle();
543
					logMetacat.debug("Title: " + title);
544
545
					Entity[] entities = dataPackage.getEntityList();
546
					if (entities != null) {
547
						for (Entity entity: entities) {
548
							String entityName = entity.getName();
549
							logMetacat.debug("Entity name: " + entityName);
550
							Attribute[] attributes = entity.getAttributeList().getAttributes();
551
							for (Attribute attribute: attributes) {
552
								String attributeName = attribute.getName();
553
								String attributeLabel = attribute.getLabel();
554
								String attributeDefinition = attribute.getDefinition();
555
								String attributeType = attribute.getAttributeType();
556
								String attributeScale = attribute.getMeasurementScale();
557
								String attributeUnitType = attribute.getUnitType();
558
								String attributeUnit = attribute.getUnit();
559
								String attributeDomain = attribute.getDomain().getClass().getSimpleName();
560
561
								logMetacat.debug("Attribute name: " + attributeName);
562
								logMetacat.debug("Attribute label: " + attributeLabel);
563
								logMetacat.debug("Attribute definition: " + attributeDefinition);
564
								logMetacat.debug("Attribute type: " + attributeType);
565
								logMetacat.debug("Attribute scale: " + attributeScale);
566
								logMetacat.debug("Attribute unit type: " + attributeUnitType);
567
								logMetacat.debug("Attribute unit: " + attributeUnit);
568
								logMetacat.debug("Attribute domain: " + attributeDomain);
569
570
								// set the values for this attribute
571
								insertStatement.setString(1, pid.getValue());
572
								insertStatement.setString(2, title);
573
								insertStatement.setString(3, entityName);
574
								insertStatement.setString(4, attributeName);
575
								insertStatement.setString(5, attributeLabel);
576
								insertStatement.setString(6, attributeDefinition);
577
								insertStatement.setString(7, attributeType);
578
								insertStatement.setString(8, attributeScale);
579
								insertStatement.setString(9, attributeUnitType);
580
								insertStatement.setString(10, attributeUnit);
581
								insertStatement.setString(11, attributeDomain);
582
								insertStatement.execute();
583
584
							}
585
						}
586
					}
587
588
				} catch (Exception e) {
589
					logMetacat.warn("error parsing metadata for: " + pid.getValue(), e);
590
				}
591
			}
592
		} catch (SQLException sqle) {
593
			// just throw it
594
			throw sqle;
595
		} finally {
596 9463 tao
		    try {
597
		        if(dropStatement != null) {
598
		            dropStatement.close();
599
		        }
600
		        if(createStatement != null) {
601
		            createStatement.close();
602
		        }
603
		        if(insertStatement != null) {
604
		            insertStatement.close();
605
		        }
606
		    } catch (Exception e) {
607
		        logMetacat.warn("couldn't close the prepared statement "+e.getMessage());
608
		    } finally {
609
		        if (dbconn != null) {
610
	                DBConnectionPool.returnDBConnection(dbconn, serialNumber);
611
	                //dbconn.close();
612
	            }
613
		    }
614
615 8646 leinfelder
		}
616
	}
617
618
	public static void main(String[] args) throws Exception {
619
		// set up the properties based on the test/deployed configuration of the workspace
620 8702 leinfelder
			SortedProperties testProperties = new SortedProperties("test/test.properties");
621
			testProperties.load();
622
			String metacatContextDir = testProperties.getProperty("metacat.contextDir");
623
			PropertyService.getInstance(metacatContextDir + "/WEB-INF");
624
625
			testGenerate();
626
//			testSummary();
627
			System.exit(0);
628
	}
629
630
	public static void testGenerate() throws Exception {
631
		Identifier metadataPid = new Identifier();
632 8743 leinfelder
		metadataPid.setValue("tao.1.4");
633 8702 leinfelder
		DatapackageSummarizer ds = new DatapackageSummarizer();
634
		String rdfString = ds.generateAnnotation(metadataPid);
635
		logMetacat.info("RDF annotation: \n" + rdfString);
636 8646 leinfelder
637 8702 leinfelder
	}
638
639
	public static void testSummary() throws Exception {
640
641 8646 leinfelder
		// summarize the packages
642
		DatapackageSummarizer ds = new DatapackageSummarizer();
643
		List<Identifier> identifiers = new ArrayList<Identifier>();
644 8689 leinfelder
		Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
645
646
		// select the metadata ids we want to summarize
647
		boolean includeReplicas = true;
648
		Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
649
		if (includeReplicas ) {
650
			codeIter = serverCodes.keySet().iterator();
651
		}
652 8646 leinfelder
653 8689 leinfelder
		Vector<String> idList = new Vector<String>();
654
		while (codeIter.hasNext()) {
655
			int serverLocation = codeIter.next();
656
			Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
657
			Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
658
			Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
659
			Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
660
661
			idList.addAll(idList0);
662
			idList.addAll(idList1);
663
			idList.addAll(idList2);
664
			idList.addAll(idList3);
665 8646 leinfelder
666 8689 leinfelder
		}
667
668
		// go through all the identifiers now
669 8646 leinfelder
		for (String localId : idList) {
670
			try {
671
				String guid = IdentifierManager.getInstance().getGUID(
672
						DocumentUtil.getDocIdFromAccessionNumber(localId),
673
						DocumentUtil.getRevisionFromAccessionNumber(localId));
674
				Identifier pid = new Identifier();
675
				pid.setValue(guid);
676
				identifiers.add(pid);
677
			} catch (McdbDocNotFoundException nfe) {
678
				// just skip it
679
				continue;
680
			}
681
		}
682
		ds.summarize(identifiers);
683
		System.exit(0);
684
	}
685
686
}