Project

General

Profile

1
package edu.ucsb.nceas.metacat.annotation;
2

    
3
import java.io.InputStream;
4
import java.io.StringWriter;
5
import java.sql.PreparedStatement;
6
import java.sql.SQLException;
7
import java.util.ArrayList;
8
import java.util.Arrays;
9
import java.util.Iterator;
10
import java.util.List;
11
import java.util.Map;
12
import java.util.Vector;
13

    
14
import org.apache.log4j.Logger;
15
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
16
import org.dataone.service.types.v1.Identifier;
17
import org.dataone.service.types.v1.Session;
18
import org.dataone.service.types.v1.Subject;
19
import org.ecoinformatics.datamanager.parser.Attribute;
20
import org.ecoinformatics.datamanager.parser.DataPackage;
21
import org.ecoinformatics.datamanager.parser.Entity;
22
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
23
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
24

    
25
import com.hp.hpl.jena.ontology.AllValuesFromRestriction;
26
import com.hp.hpl.jena.ontology.Individual;
27
import com.hp.hpl.jena.ontology.ObjectProperty;
28
import com.hp.hpl.jena.ontology.OntClass;
29
import com.hp.hpl.jena.ontology.OntModel;
30
import com.hp.hpl.jena.ontology.Ontology;
31
import com.hp.hpl.jena.rdf.model.ModelFactory;
32
import com.hp.hpl.jena.rdf.model.Property;
33
import com.hp.hpl.jena.rdf.model.Resource;
34
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
35

    
36
import edu.ucsb.nceas.metacat.DBUtil;
37
import edu.ucsb.nceas.metacat.DocumentImpl;
38
import edu.ucsb.nceas.metacat.IdentifierManager;
39
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
40
import edu.ucsb.nceas.metacat.database.DBConnection;
41
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
42
import edu.ucsb.nceas.metacat.dataone.MNodeService;
43
import edu.ucsb.nceas.metacat.properties.PropertyService;
44
import edu.ucsb.nceas.metacat.replication.ReplicationService;
45
import edu.ucsb.nceas.metacat.util.DocumentUtil;
46
import edu.ucsb.nceas.utilities.SortedProperties;
47

    
48
public class DatapackageSummarizer {
49

    
50
	private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
51
	
52
	public static String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
53
	public static String rdfs = "http://www.w3.org/2000/01/rdf-schema#";
54
	public static String owl = "http://www.w3.org/2002/07/owl#";
55
	public static String oboe = "http://ecoinformatics.org/oboe/oboe.1.0/oboe.owl#";
56
	public static String oboe_core = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#";
57
	public static String oboe_characteristics = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-characteristics.owl#";
58
	public static String oboe_sbc = "http://ecoinformatics.org/oboe-ext/sbclter.1.0/oboe-sbclter.owl#";
59
	public static String oa = "http://www.w3.org/ns/oa#";
60
	public static String oa_source = "http://www.w3.org/ns/oa.rdf";
61
	public static String dcterms = "http://purl.org/dc/terms/";
62
	public static String dcterms_source = "http://dublincore.org/2012/06/14/dcterms.rdf";
63
	public static String foaf = "http://xmlns.com/foaf/0.1/";
64
	public static String foaf_source = "http://xmlns.com/foaf/spec/index.rdf";
65
    public static String prov = "http://www.w3.org/ns/prov#";
66
    public static String prov_source = "http://www.w3.org/ns/prov.owl";
67
    public static String cito =  "http://purl.org/spar/cito/";
68
    
69
	public static String OBOE_SBC = "OBOE-SBC";
70
    
71
    // package visibility for testing only
72
    boolean randomize = false;
73

    
74
    /**
75
     * Generate annotation for given metadata identifier
76
     * @param metadataPid
77
     */
78
    public String generateAnnotation(Identifier metadataPid) throws Exception {
79
    	
80
    	DataPackage dataPackage = this.getDataPackage(metadataPid);
81
    	
82
		OntModel m = ModelFactory.createOntologyModel();
83
		Ontology ont = m.createOntology("http://annotation/" + metadataPid.getValue());
84
		
85
		// TODO: import the ontologies we use
86
		ont.addImport(m.createResource(oboe));
87
		m.addSubModel(ModelFactory.createOntologyModel().read(oboe));
88
		
89
		ont.addImport(m.createResource(oboe_sbc));
90
		m.addSubModel(ModelFactory.createOntologyModel().read(oboe_sbc));
91
		
92
		ont.addImport(m.createResource(oa));
93
		m.addSubModel(ModelFactory.createOntologyModel().read(oa_source));
94

    
95
		ont.addImport(m.createResource(dcterms));
96
		m.addSubModel(ModelFactory.createOntologyModel().read(dcterms_source));
97

    
98
		ont.addImport(m.createResource(foaf));
99
		m.addSubModel(ModelFactory.createOntologyModel().read(foaf_source));
100
		
101
		ont.addImport(m.createResource(prov));
102
		//m.addSubModel(ModelFactory.createOntologyModel().read(prov_source));
103

    
104
		ont.addImport(m.createResource(cito));
105
		
106
		// properties
107
		ObjectProperty hasBodyProperty = m.getObjectProperty(oa + "hasBody");
108
		ObjectProperty hasTargetProperty = m.getObjectProperty(oa + "hasTarget");
109
		ObjectProperty hasSourceProperty = m.getObjectProperty(oa + "hasSource");
110
		ObjectProperty hasSelectorProperty = m.getObjectProperty(oa + "hasSelector");
111
		ObjectProperty annotatedByProperty = m.getObjectProperty(oa + "annotatedBy");
112
		Property identifierProperty = m.getProperty(dcterms + "identifier");
113
		Property conformsToProperty = m.getProperty(dcterms + "conformsTo");
114
		Property wasAttributedTo = m.getProperty(prov + "wasAttributedTo");
115
		Property nameProperty = m.getProperty(foaf + "name");
116
		Property rdfValue = m.getProperty(rdf + "value");
117
		
118
		ObjectProperty ofCharacteristic = m.getObjectProperty(oboe_core + "ofCharacteristic");
119
		ObjectProperty usesStandard = m.getObjectProperty(oboe_core + "usesStandard");
120
		ObjectProperty ofEntity = m.getObjectProperty(oboe_core + "ofEntity");
121
		ObjectProperty hasMeasurement = m.getObjectProperty(oboe_core + "hasMeasurement");
122

    
123
		// classes
124
		OntClass entityClass =  m.getOntClass(oboe_core + "Entity");
125
		OntClass observationClass =  m.getOntClass(oboe_core + "Observation");
126
		OntClass measurementClass =  m.getOntClass(oboe_core + "Measurement");
127
		OntClass characteristicClass = m.getOntClass(oboe_core + "Characteristic");
128
		OntClass standardClass =  m.getOntClass(oboe_core + "Standard");
129
		
130
		Resource annotationClass =  m.getOntClass(oa + "Annotation");
131
		Resource specificResourceClass =  m.getOntClass(oa + "SpecificResource");
132
		Resource fragmentSelectorClass =  m.getOntClass(oa + "FragmentSelector");
133
		Resource provEntityClass =  m.getResource(prov + "Entity");
134
		Resource personClass =  m.getResource(prov + "Person");
135
				
136
		// these apply to every attribute annotation
137
		Individual meta1 = m.createIndividual(ont.getURI() + "#meta", provEntityClass);
138
		meta1.addProperty(identifierProperty, metadataPid.getValue());
139

    
140
		// decide who should be credited with the package
141
		Individual p1 = null;
142
		
143
		// look up creators from the EML metadata
144
		List<String> creators = dataPackage.getCreators();
145
		//creators = Arrays.asList("Matthew Jones");
146
		if (creators != null && creators.size() > 0) {	
147
			// use an orcid if we can find one from their system
148
			String orcidUri = OrcidService.lookupOrcid(null, null, creators.toArray(new String[0]));
149
			if (orcidUri != null) {
150
				p1 = m.createIndividual(orcidUri, personClass);
151
				p1.addProperty(identifierProperty, orcidUri);
152
			} else {
153
				p1 = m.createIndividual(ont.getURI() + "#person", personClass);
154
			}
155
			// include the name we have in the metadata
156
			p1.addProperty(nameProperty, creators.get(0));
157
		}
158
		
159
		// attribute the package to this creator if we have one
160
		if (p1 != null) {
161
			meta1.addProperty(wasAttributedTo, p1);
162
		}
163
		
164
		// loop through the tables and attributes
165
		int entityCount = 1;
166
		Entity[] entities = dataPackage.getEntityList();
167
		if (entities != null) {
168
			for (Entity entity: entities) {
169
				String entityName = entity.getName();
170
				
171
				Individual o1 = m.createIndividual(ont.getURI() + "#observation" + entityCount, observationClass);
172
				Resource entityConcept = lookupEntity(entityClass, entity);
173
				if (entityConcept != null) {
174
					AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofEntity, entityConcept);
175
					o1.addOntClass(avfr);
176
				}
177
				
178
				logMetacat.debug("Entity name: " + entityName);
179
				Attribute[] attributes = entity.getAttributeList().getAttributes();
180
				int attributeCount = 1;
181
				if (attributes != null) {
182
					for (Attribute attribute: attributes) {
183
						
184
						// for naming the individuals uniquely
185
						String cnt = entityCount + "_" + attributeCount;
186
						
187
						String attributeName = attribute.getName();
188
						String attributeLabel = attribute.getLabel();
189
						String attributeDefinition = attribute.getDefinition();
190
						String attributeType = attribute.getAttributeType();
191
						String attributeScale = attribute.getMeasurementScale();
192
						String attributeUnitType = attribute.getUnitType();
193
						String attributeUnit = attribute.getUnit();
194
						String attributeDomain = attribute.getDomain().getClass().getSimpleName();
195
		
196
						logMetacat.debug("Attribute name: " + attributeName);
197
						logMetacat.debug("Attribute label: " + attributeLabel);
198
						logMetacat.debug("Attribute definition: " + attributeDefinition);
199
						logMetacat.debug("Attribute type: " + attributeType);
200
						logMetacat.debug("Attribute scale: " + attributeScale);
201
						logMetacat.debug("Attribute unit type: " + attributeUnitType);
202
						logMetacat.debug("Attribute unit: " + attributeUnit);
203
						logMetacat.debug("Attribute domain: " + attributeDomain);
204
					
205
						// look up the characteristic or standard subclasses
206
						Resource standard = this.lookupStandard(standardClass, attribute);
207
						Resource characteristic = this.lookupCharacteristic(characteristicClass, attribute);
208
						
209
						if (standard != null || characteristic != null) {
210
							
211
							// instances
212
							Individual m1 = m.createIndividual(ont.getURI() + "#measurement" + cnt, measurementClass);
213
							Individual a1 = m.createIndividual(ont.getURI() + "#annotation" + cnt, annotationClass);
214
							Individual t1 = m.createIndividual(ont.getURI() + "#target" + cnt, specificResourceClass);
215
							String xpointer = "xpointer(/eml/dataSet/" + entityCount + "/attributeList/" + attributeCount + ")";
216
							Individual s1 = m.createIndividual(ont.getURI() + "#" + xpointer, fragmentSelectorClass);
217
							s1.addLiteral(rdfValue, xpointer);
218
							s1.addProperty(conformsToProperty, "http://www.w3.org/TR/xptr/");
219
							
220
							// statements about the annotation
221
							a1.addProperty(hasBodyProperty, m1);
222
							a1.addProperty(hasTargetProperty, t1);
223
							t1.addProperty(hasSourceProperty, meta1);
224
							t1.addProperty(hasSelectorProperty, s1);
225
							//a1.addProperty(annotatedByProperty, p1);
226
							
227
							// describe the measurement in terms of restrictions
228
							if (standard != null) {
229
								AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, usesStandard, standard);
230
								m1.addOntClass(avfr);
231
							}
232
							if (characteristic != null) {
233
								AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofCharacteristic, characteristic);
234
								m1.addOntClass(avfr);
235
							}
236
							
237
							// attach to the observation
238
							// TODO: evaluate whether the measurement can apply to the given observed entity
239
							o1.addProperty(hasMeasurement, m1);
240
						}
241
						attributeCount++;
242
						
243
					}
244
				}
245
				entityCount++;
246
			}
247
		}
248
		
249
		StringWriter sw = new StringWriter();
250
		// only write the base model
251
		//m.write(sw, "RDF/XML-ABBREV");
252
		m.write(sw, null);
253

    
254
		return sw.toString();
255
		
256
	}
257
	
258
	private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
259
		// what's our unit?
260
		String unit = attribute.getUnit().toLowerCase();
261
		List<String> tokens = Arrays.asList(unit.split(" "));
262

    
263
		boolean found = false;
264
		ExtendedIterator iter = standardClass.listSubClasses(false);
265
		if (randomize) {
266
			List subclasses = iter.toList();
267
			int size = subclasses.size();
268
			Long index = new Long(Math.round(Math.floor((Math.random() * (size-1)))));
269
			OntClass subclass = (OntClass) subclasses.get( index.intValue() );
270
			return subclass;
271
		}
272
		while (iter.hasNext()) {
273
			OntClass subclass = (OntClass) iter.next();
274
			String subclassName = subclass.getLocalName().toLowerCase();
275
			logMetacat.debug("subclass: " + subclassName);
276
			if (tokens.contains(subclassName)) {
277
				found = true;
278
			}
279
			if (subclass.hasLabel(unit, null)) {
280
				found = true;
281
			}
282
			if (found) {
283
				return subclass;
284
			}
285
		}
286
		// try to look it up if we got this far
287
		return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC);
288
	}
289
	
290
	private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
291
		// what's our label?
292
		String label = attribute.getLabel().toLowerCase();
293
		List<String> tokens = Arrays.asList(label.split(" "));
294
		
295
		boolean found = false;
296
		// find something that matches
297
		ExtendedIterator iter = characteristicClass.listSubClasses();
298
		if (randomize) {
299
			List subclasses = iter.toList();
300
			int size = subclasses.size();
301
			Long index = new Long(Math.round(Math.floor((Math.random() * (size-1)))));
302
			OntClass subclass = (OntClass) subclasses.get( index.intValue() );
303
			return subclass;
304
		}
305
		while (iter.hasNext()) {
306
			OntClass subclass = (OntClass) iter.next();
307
			String subclassName = subclass.getLocalName().toLowerCase();
308
			logMetacat.debug("subclass: " + subclassName);
309
			if (tokens.contains(subclassName)) {
310
				found = true;
311
			}
312
			if (subclass.hasLabel(label, null)) {
313
				found = true;
314
			}
315
			if (found) {
316
				return subclass;
317
			}
318
		}
319
		
320
		// try to look it up if we got this far
321
		return BioPortalService.lookupAnnotationClass(characteristicClass, attribute.getDefinition(), OBOE_SBC);
322
		
323
	}
324
	
325
	private Resource lookupEntity(OntClass entityClass, Entity entity) {
326
		// what's our description like?
327
		String name = entity.getName();
328
		String definition = entity.getDefinition();
329
		
330
		// try to look it up if we got this far
331
		return BioPortalService.lookupAnnotationClass(entityClass, definition, OBOE_SBC);
332
		
333
	}
334
	
335
	private DataPackage getDataPackage(Identifier pid) throws Exception {
336
		// for using the MN API as the MN itself
337
		MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
338
		Session session = new Session();
339
        Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
340
        session.setSubject(subject);
341
		InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
342

    
343
		// parse the metadata
344
		DataPackageParserInterface parser = new Eml200DataPackageParser();
345
		parser.parse(emlStream);
346
		DataPackage dataPackage = parser.getDataPackage();
347
		return dataPackage;
348
	}
349

    
350
	private void summarize(List<Identifier> identifiers) throws SQLException {
351
		
352
		DBConnection dbconn = null;
353

    
354
		try {
355
			dbconn = DBConnectionPool.getDBConnection("DatapackageSummarizer.summarize");
356
			
357
			PreparedStatement dropStatement = dbconn.prepareStatement("DROP TABLE IF EXISTS entity_summary");
358
			dropStatement.execute();
359
	
360
			PreparedStatement createStatement = dbconn.prepareStatement(
361
					"CREATE TABLE entity_summary (" +
362
					"guid text, " +
363
					"title text, " +
364
					"entity text," +
365
					"attributeName text," +
366
					"attributeLabel text," +
367
					"attributeDefinition text," +
368
					"attributeType text," +
369
					"attributeScale text," +
370
					"attributeUnitType text," +
371
					"attributeUnit text," +
372
					"attributeDomain text" +
373
					")");
374
			createStatement.execute();
375
			
376
			PreparedStatement insertStatement = dbconn.prepareStatement(
377
					"INSERT INTO entity_summary " +
378
					"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
379
			
380
			for (Identifier pid: identifiers) {
381
			
382
				logMetacat.debug("Parsing pid: " + pid.getValue());
383
				
384
				try {
385
					
386
					// get the package
387
					DataPackage dataPackage = this.getDataPackage(pid);
388
					String title = dataPackage.getTitle();
389
					logMetacat.debug("Title: " + title);
390
					
391
					Entity[] entities = dataPackage.getEntityList();
392
					if (entities != null) {
393
						for (Entity entity: entities) {
394
							String entityName = entity.getName();
395
							logMetacat.debug("Entity name: " + entityName);
396
							Attribute[] attributes = entity.getAttributeList().getAttributes();
397
							for (Attribute attribute: attributes) {
398
								String attributeName = attribute.getName();
399
								String attributeLabel = attribute.getLabel();
400
								String attributeDefinition = attribute.getDefinition();
401
								String attributeType = attribute.getAttributeType();
402
								String attributeScale = attribute.getMeasurementScale();
403
								String attributeUnitType = attribute.getUnitType();
404
								String attributeUnit = attribute.getUnit();
405
								String attributeDomain = attribute.getDomain().getClass().getSimpleName();
406
	
407
								logMetacat.debug("Attribute name: " + attributeName);
408
								logMetacat.debug("Attribute label: " + attributeLabel);
409
								logMetacat.debug("Attribute definition: " + attributeDefinition);
410
								logMetacat.debug("Attribute type: " + attributeType);
411
								logMetacat.debug("Attribute scale: " + attributeScale);
412
								logMetacat.debug("Attribute unit type: " + attributeUnitType);
413
								logMetacat.debug("Attribute unit: " + attributeUnit);
414
								logMetacat.debug("Attribute domain: " + attributeDomain);
415
								
416
								// set the values for this attribute
417
								insertStatement.setString(1, pid.getValue());
418
								insertStatement.setString(2, title);
419
								insertStatement.setString(3, entityName);
420
								insertStatement.setString(4, attributeName);
421
								insertStatement.setString(5, attributeLabel);
422
								insertStatement.setString(6, attributeDefinition);
423
								insertStatement.setString(7, attributeType);
424
								insertStatement.setString(8, attributeScale);
425
								insertStatement.setString(9, attributeUnitType);
426
								insertStatement.setString(10, attributeUnit);
427
								insertStatement.setString(11, attributeDomain);
428
								insertStatement.execute();
429
								
430
							}		
431
						}
432
					}
433
					
434
				} catch (Exception e) {
435
					logMetacat.warn("error parsing metadata for: " + pid.getValue(), e);
436
				}
437
			}
438
		} catch (SQLException sqle) {
439
			// just throw it
440
			throw sqle;
441
		} finally {
442
			if (dbconn != null) {
443
				DBConnectionPool.returnDBConnection(dbconn, 0);
444
				dbconn.close();
445
			}
446
		}
447
	}
448
	
449
	public static void main(String[] args) throws Exception {
450
		// set up the properties based on the test/deployed configuration of the workspace
451
			SortedProperties testProperties = new SortedProperties("test/test.properties");
452
			testProperties.load();
453
			String metacatContextDir = testProperties.getProperty("metacat.contextDir");
454
			PropertyService.getInstance(metacatContextDir + "/WEB-INF");
455
			
456
			testGenerate();
457
//			testSummary();
458
			System.exit(0);
459
	}
460
	
461
	public static void testGenerate() throws Exception {
462
		Identifier metadataPid = new Identifier();
463
		metadataPid.setValue("tao.1.4");
464
		DatapackageSummarizer ds = new DatapackageSummarizer();
465
		String rdfString = ds.generateAnnotation(metadataPid);
466
		logMetacat.info("RDF annotation: \n" + rdfString);
467
		
468
	}
469
	
470
	public static void testSummary() throws Exception {
471
		
472
		// summarize the packages
473
		DatapackageSummarizer ds = new DatapackageSummarizer();
474
		List<Identifier> identifiers = new ArrayList<Identifier>();
475
		Map<Integer, String> serverCodes = ReplicationService.getServerCodes();
476

    
477
		// select the metadata ids we want to summarize
478
		boolean includeReplicas = true;
479
		Iterator<Integer> codeIter = Arrays.asList(new Integer[] {1}).iterator();
480
		if (includeReplicas ) {
481
			codeIter = serverCodes.keySet().iterator();
482
		}
483
		
484
		Vector<String> idList = new Vector<String>();
485
		while (codeIter.hasNext()) {
486
			int serverLocation = codeIter.next();
487
			Vector<String> idList0 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_0NAMESPACE, false, serverLocation);
488
			Vector<String> idList1 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_0_1NAMESPACE, false, serverLocation);
489
			Vector<String> idList2 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_0NAMESPACE, false, serverLocation);
490
			Vector<String> idList3 = DBUtil.getAllDocidsByType(DocumentImpl.EML2_1_1NAMESPACE, false, serverLocation);
491
			
492
			idList.addAll(idList0);
493
			idList.addAll(idList1);
494
			idList.addAll(idList2);
495
			idList.addAll(idList3);
496
		
497
		}
498
		
499
		// go through all the identifiers now
500
		for (String localId : idList) {
501
			try {
502
				String guid = IdentifierManager.getInstance().getGUID(
503
						DocumentUtil.getDocIdFromAccessionNumber(localId), 
504
						DocumentUtil.getRevisionFromAccessionNumber(localId));
505
				Identifier pid = new Identifier();
506
				pid.setValue(guid);
507
				identifiers.add(pid);
508
			} catch (McdbDocNotFoundException nfe) {
509
				// just skip it
510
				continue;
511
			}
512
		}
513
		ds.summarize(identifiers);
514
		System.exit(0);
515
	}
516
	
517
}
(2-2/3)