Project

General

Profile

« Previous | Next » 

Revision 8702

first pass at generating annotations from EML attribute information. uses the OpenAnnotation model that the metacat-index tests assume which allows us to populate dynamic index fields for the annotation class[es]. There is still much to be done with finding appropriate concepts for each attribute. https://projects.ecoinformatics.org/ecoinfo/issues/6256

View differences:

test/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizerTest.java
1
/**  '$RCSfile$'
2
 *  Copyright: 2010 Regents of the University of California and the
3
 *              National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.annotation;
20

  
21
import java.io.ByteArrayInputStream;
22
import java.io.FileInputStream;
23
import java.io.InputStream;
24

  
25
import junit.framework.Test;
26
import junit.framework.TestSuite;
27

  
28
import org.dataone.service.types.v1.Identifier;
29
import org.dataone.service.types.v1.ObjectFormatIdentifier;
30
import org.dataone.service.types.v1.Session;
31
import org.dataone.service.types.v1.SystemMetadata;
32

  
33
import edu.ucsb.nceas.metacat.dataone.D1NodeServiceTest;
34
import edu.ucsb.nceas.metacat.dataone.MNodeService;
35

  
36
public class DatapackageSummarizerTest extends D1NodeServiceTest {
37

  
38
	
39
    private static final String ANNOTATION_TEST_DOC = "test/eml-sample-annotation.xml";
40

  
41
	/**
42
	 * constructor for the test
43
	 */
44
	public DatapackageSummarizerTest(String name) {
45
		super(name);
46
	}
47

  
48
	/**
49
	 * Establish a testing framework by initializing appropriate objects
50
	 */
51
	public void setUp() throws Exception {
52
		super.setUp();
53
	}
54

  
55
	/**
56
	 * Release any objects after tests are complete
57
	 */
58
	public void tearDown() {
59
	}
60

  
61
	/**
62
	 * Create a suite of tests to be run together
63
	 */
64
	public static Test suite() {
65
		TestSuite suite = new TestSuite();
66
		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
67
		return suite;
68
	}
69

  
70
	public void testGenerateAnnotation() throws Exception {
71
		Identifier metadataPid = new Identifier();
72
		metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
73
		Session session = getTestSession();
74
		try {
75
			InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
76
			SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
77
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
78
			formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
79
			sysmeta.setFormatId(formatId);
80
			Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
81
			assertEquals(metadataPid.getValue(), pid.getValue());
82
		} catch (Exception e) {
83
			e.printStackTrace();
84
			fail("Could not add metadata test file: " + e.getMessage());
85
		}
86

  
87
		// generate the annotation for the metadata
88
		DatapackageSummarizer ds = new DatapackageSummarizer();
89
		String rdfContent = ds.generateAnnotation(metadataPid);
90
		
91
		// save the annotation
92
		Identifier annotationPid = new Identifier();
93
		annotationPid.setValue("http://annotation/" + metadataPid.getValue());
94
		try {
95
			InputStream object = new ByteArrayInputStream(rdfContent.getBytes("UTF-8"));
96
			SystemMetadata sysmeta = createSystemMetadata(annotationPid, session.getSubject(), object);
97
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
98
			formatId.setValue("http://www.w3.org/TR/rdf-syntax-grammar");
99
			sysmeta.setFormatId(formatId);
100
			Identifier pid = MNodeService.getInstance(request).create(session, annotationPid, object, sysmeta);
101
			assertEquals(annotationPid.getValue(), pid.getValue());
102
		} catch (Exception e) {
103
			e.printStackTrace();
104
			fail("Could not add annotation test file: " + e.getMessage());
105
		}
106
		
107
		// check that it was parsed?
108
	}
109

  
110
}
0 111

  
src/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizer.java
1 1
package edu.ucsb.nceas.metacat.annotation;
2 2

  
3 3
import java.io.InputStream;
4
import java.io.StringWriter;
4 5
import java.sql.PreparedStatement;
5 6
import java.sql.SQLException;
6 7
import java.util.ArrayList;
......
21 22
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface;
22 23
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser;
23 24

  
25
import com.hp.hpl.jena.ontology.AllValuesFromRestriction;
26
import com.hp.hpl.jena.ontology.Individual;
27
import com.hp.hpl.jena.ontology.ObjectProperty;
28
import com.hp.hpl.jena.ontology.OntClass;
29
import com.hp.hpl.jena.ontology.OntModel;
30
import com.hp.hpl.jena.ontology.Ontology;
31
import com.hp.hpl.jena.rdf.model.ModelFactory;
32
import com.hp.hpl.jena.rdf.model.Property;
33
import com.hp.hpl.jena.rdf.model.Resource;
34
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
35

  
24 36
import edu.ucsb.nceas.metacat.DBUtil;
25 37
import edu.ucsb.nceas.metacat.DocumentImpl;
26 38
import edu.ucsb.nceas.metacat.IdentifierManager;
......
37 49

  
38 50
	private static Logger logMetacat = Logger.getLogger(DatapackageSummarizer.class);
39 51
	
40
	public void summarize(List<Identifier> identifiers) throws SQLException {
52
	public static String rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
53
	public static String rdfs = "http://www.w3.org/2000/01/rdf-schema#";
54
	public static String owl = "http://www.w3.org/2002/07/owl#";
55
	public static String oboe = "http://ecoinformatics.org/oboe/oboe.1.0/oboe.owl#";
56
	public static String oboe_core = "http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#";
57
	public static String oa = "http://www.w3.org/ns/oa#";
58
	public static String oa_source = "http://www.w3.org/ns/oa.rdf";
59
	public static String dcterms = "http://purl.org/dc/terms/";
60
	public static String dcterms_source = "http://dublincore.org/2012/06/14/dcterms.rdf";
61
	public static String foaf = "http://xmlns.com/foaf/0.1/";
62
	public static String foaf_source = "http://xmlns.com/foaf/spec/index.rdf";
63
    public static String prov = "http://www.w3.org/ns/prov#";
64
    public static String prov_source = "http://www.w3.org/ns/prov.owl";
65
    public static String cito =  "http://purl.org/spar/cito/";
66

  
67
    /**
68
     * Generate annotation for given metadata identifier
69
     * @param metadataPid
70
     */
71
    public String generateAnnotation(Identifier metadataPid) throws Exception {
72
    	
73
    	DataPackage dataPackage = this.getDataPackage(metadataPid);
74
    	
75
		OntModel m = ModelFactory.createOntologyModel();
76
		Ontology ont = m.createOntology("http://annotation/" + metadataPid.getValue());
41 77
		
78
		// TODO: import the ontologies we use
79
		ont.addImport(m.createResource(oboe));
80
		m.addSubModel(ModelFactory.createOntologyModel().read(oboe));
81
		
82
		ont.addImport(m.createResource(oa));
83
		m.addSubModel(ModelFactory.createOntologyModel().read(oa_source));
84

  
85
		ont.addImport(m.createResource(dcterms));
86
		m.addSubModel(ModelFactory.createOntologyModel().read(dcterms_source));
87

  
88
		ont.addImport(m.createResource(foaf));
89
		m.addSubModel(ModelFactory.createOntologyModel().read(foaf_source));
90
		
91
		ont.addImport(m.createResource(prov));
92
		//m.addSubModel(ModelFactory.createOntologyModel().read(prov_source));
93

  
94
		ont.addImport(m.createResource(cito));
95
		
96
		// properties
97
		ObjectProperty hasBodyProperty = m.getObjectProperty(oa + "hasBody");
98
		ObjectProperty hasTargetProperty = m.getObjectProperty(oa + "hasTarget");
99
		ObjectProperty hasSourceProperty = m.getObjectProperty(oa + "hasSource");
100
		ObjectProperty annotatedByProperty = m.getObjectProperty(oa + "annotatedBy");
101
		Property identifierProperty = m.getProperty(dcterms + "identifier");
102
		Property nameProperty = m.getProperty(foaf + "name");
103
		
104
		ObjectProperty ofCharacteristic = m.getObjectProperty(oboe_core + "ofCharacteristic");
105
		ObjectProperty usesStandard = m.getObjectProperty(oboe_core + "usesStandard");
106

  
107
		// classes
108
		OntClass measurementClass =  m.getOntClass(oboe_core + "Measurement");
109
		OntClass characteristicClass = m.getOntClass(oboe_core + "Characteristic");
110
		OntClass standardClass =  m.getOntClass(oboe_core + "Standard");
111
		
112
		Resource annotationClass =  m.getOntClass(oa + "Annotation");
113
		Resource specificResourceClass =  m.getOntClass(oa + "SpecificResource");
114
		Resource entityClass =  m.getResource(prov + "Entity");
115
		Resource personClass =  m.getResource(prov + "Person");
116
		
117
		int cnt = 0;
118

  
119
		// these apply to every attribute annotation
120
		Individual meta1 = m.createIndividual(ont.getURI() + "#meta" + cnt, entityClass);
121
		Individual p1 = m.createIndividual(ont.getURI() + "#person" + cnt, personClass);
122
		p1.addProperty(nameProperty, "Ben Leinfelder");
123
		meta1.addProperty(identifierProperty, metadataPid.getValue());
124

  
125
		// loop through the tables and attributes
126
		Entity[] entities = dataPackage.getEntityList();
127
		for (Entity entity: entities) {
128
			String entityName = entity.getName();
129
			logMetacat.debug("Entity name: " + entityName);
130
			Attribute[] attributes = entity.getAttributeList().getAttributes();
131
			for (Attribute attribute: attributes) {
132
				
133
				String attributeName = attribute.getName();
134
				String attributeLabel = attribute.getLabel();
135
				String attributeDefinition = attribute.getDefinition();
136
				String attributeType = attribute.getAttributeType();
137
				String attributeScale = attribute.getMeasurementScale();
138
				String attributeUnitType = attribute.getUnitType();
139
				String attributeUnit = attribute.getUnit();
140
				String attributeDomain = attribute.getDomain().getClass().getSimpleName();
141

  
142
				logMetacat.debug("Attribute name: " + attributeName);
143
				logMetacat.debug("Attribute label: " + attributeLabel);
144
				logMetacat.debug("Attribute definition: " + attributeDefinition);
145
				logMetacat.debug("Attribute type: " + attributeType);
146
				logMetacat.debug("Attribute scale: " + attributeScale);
147
				logMetacat.debug("Attribute unit type: " + attributeUnitType);
148
				logMetacat.debug("Attribute unit: " + attributeUnit);
149
				logMetacat.debug("Attribute domain: " + attributeDomain);
150
			
151
				// look up the characteristic or standard subclasses
152
				Resource standard = this.lookupStandard(standardClass, attribute);
153
				Resource characteristic = this.lookupCharacteristic(characteristicClass, attribute);
154
				
155
				if (standard != null || characteristic != null) {
156
					
157
					// instances
158
					Individual m1 = m.createIndividual(ont.getURI() + "#measurement" + cnt, measurementClass);
159
					Individual a1 = m.createIndividual(ont.getURI() + "#annotation" + cnt, annotationClass);
160
					Individual t1 = m.createIndividual(ont.getURI() + "#target" + cnt, specificResourceClass);
161
					
162
					// statements about the annotation
163
					a1.addProperty(hasBodyProperty, m1);
164
					a1.addProperty(hasTargetProperty, t1);
165
					t1.addProperty(hasSourceProperty, meta1);
166
					a1.addProperty(annotatedByProperty, p1);
167
					
168
					// describe the measurement in terms of restrictions
169
					if (standard != null) {
170
						AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, usesStandard, standard);
171
						m1.addOntClass(avfr);
172
					}
173
					if (characteristic != null) {
174
						AllValuesFromRestriction avfr = m.createAllValuesFromRestriction(null, ofCharacteristic, characteristic);
175
						m1.addOntClass(avfr);
176
					}
177
					cnt++;
178
				}
179
				
180
			}		
181
		}
182
		
183
		StringWriter sw = new StringWriter();
184
		// only write the base model
185
		//m.write(sw, "RDF/XML-ABBREV");
186
		m.write(sw, null);
187

  
188
		return sw.toString();
189
		
190
	}
191
	
192
	private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
193
		// what's our unit?
194
		String unit = attribute.getUnit().toLowerCase();
195
		boolean found = false;
196
		ExtendedIterator iter = standardClass.listSubClasses(false);
197
		while (iter.hasNext()) {
198
			OntClass subclass = (OntClass) iter.next();
199
			String subclassName = subclass.getLocalName().toLowerCase();
200
			logMetacat.debug("subclass: " + subclassName);
201
			if (subclassName.equals(unit)) {
202
				found = true;
203
			}
204
			if (subclass.hasLabel(unit, null)) {
205
				found = true;
206
			}
207
			if (found) {
208
				return subclass;
209
			}
210
		}
211
		return null;
212
	}
213
	
214
	private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
215
		// what's our label?
216
		String label = attribute.getLabel().toLowerCase();
217
		boolean found = false;
218
		// find something that matches
219
		ExtendedIterator iter = characteristicClass.listSubClasses();
220
		while (iter.hasNext()) {
221
			OntClass subclass = (OntClass) iter.next();
222
			String subclassName = subclass.getLocalName().toLowerCase();
223
			logMetacat.debug("subclass: " + subclassName);
224
			if (subclassName.equals(label)) {
225
				found = true;
226
			}
227
			if (subclass.hasLabel(label, null)) {
228
				found = true;
229
			}
230
			if (found) {
231
				return subclass;
232
			}
233
		}
234
		return null;
235
	}
236
	
237
	private DataPackage getDataPackage(Identifier pid) throws Exception {
238
		// for using the MN API as the MN itself
239
		MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
240
		Session session = new Session();
241
        Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
242
        session.setSubject(subject);
243
		InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
244

  
245
		// parse the metadata
246
		DataPackageParserInterface parser = new Eml200DataPackageParser();
247
		parser.parse(emlStream);
248
		DataPackage dataPackage = parser.getDataPackage();
249
		return dataPackage;
250
	}
251

  
252
	private void summarize(List<Identifier> identifiers) throws SQLException {
253
		
42 254
		DBConnection dbconn = null;
43 255

  
44 256
		try {
......
73 285
				
74 286
				try {
75 287
					
76
					// for using the MN API as the MN itself
77
					MockHttpServletRequest request = new MockHttpServletRequest(null, null, null);
78
					Session session = new Session();
79
			        Subject subject = MNodeService.getInstance(request).getCapabilities().getSubject(0);
80
			        session.setSubject(subject);
81
					InputStream emlStream = MNodeService.getInstance(request).get(session, pid);
82
			
83
					// parse the metadata
84
					DataPackageParserInterface parser = new Eml200DataPackageParser();
85
					parser.parse(emlStream);
86
					DataPackage dataPackage = parser.getDataPackage();
288
					// get the package
289
					DataPackage dataPackage = this.getDataPackage(pid);
87 290
					String title = dataPackage.getTitle();
88 291
					logMetacat.debug("Title: " + title);
89 292
					
......
146 349
	}
147 350
	
148 351
	public static void main(String[] args) throws Exception {
149
		
150 352
		// set up the properties based on the test/deployed configuration of the workspace
151
		SortedProperties testProperties = new SortedProperties("test/test.properties");
152
		testProperties.load();
153
		String metacatContextDir = testProperties.getProperty("metacat.contextDir");
154
		PropertyService.getInstance(metacatContextDir + "/WEB-INF");
353
			SortedProperties testProperties = new SortedProperties("test/test.properties");
354
			testProperties.load();
355
			String metacatContextDir = testProperties.getProperty("metacat.contextDir");
356
			PropertyService.getInstance(metacatContextDir + "/WEB-INF");
357
			
358
			testGenerate();
359
//			testSummary();
360
			System.exit(0);
361
	}
362
	
363
	public static void testGenerate() throws Exception {
364
		Identifier metadataPid = new Identifier();
365
		metadataPid.setValue("doi:10.5072/FK2445ZN4");
366
		DatapackageSummarizer ds = new DatapackageSummarizer();
367
		String rdfString = ds.generateAnnotation(metadataPid);
368
		logMetacat.info("RDF annotation: \n" + rdfString);
155 369
		
370
	}
371
	
372
	public static void testSummary() throws Exception {
373
		
156 374
		// summarize the packages
157 375
		DatapackageSummarizer ds = new DatapackageSummarizer();
158 376
		List<Identifier> identifiers = new ArrayList<Identifier>();

Also available in: Unified diff