Project

General

Profile

« Previous | Next » 

Revision 8763

simplify lookup for classes and orcid. remove the "random" annotation code branches -- just too confusing to look at those bogus classes especially now that we have "real" generated annotations.

View differences:

test/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizerTest.java
81 81
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
82 82
//		suite.addTest(new DatapackageSummarizerTest("testStandaloneAnnotation"));
83 83
		suite.addTest(new DatapackageSummarizerTest("testIndexAnnotations"));
84
//		suite.addTest(new DatapackageSummarizerTest("testGenerateRandomAnnotation"));
85 84
		return suite;
86 85
	}
87 86
	
......
116 115
	 * @throws Exception
117 116
	 */
118 117
	public void testGenerateAnnotation() throws Exception {
119
		this.testGenerateAnnotation_base(false);
120
	}
121
	
122
	/**
123
	 * Generate a bunch of random annotations
124
	 * @throws Exception
125
	 */
126
	public void testGenerateRandomAnnotation() throws Exception {
127
		for (int i = 0; i < 5; i++) {
128
			this.testGenerateAnnotation_base(true);
129
		}
130
	}
131

  
132
	private void testGenerateAnnotation_base(boolean randomize) throws Exception {
133 118
		Identifier metadataPid = new Identifier();
134 119
		metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
135 120
		Session session = getTestSession();
......
148 133

  
149 134
		// generate the annotation for the metadata
150 135
		DatapackageSummarizer ds = new DatapackageSummarizer();
151
		ds.randomize = randomize;
152 136
		String rdfContent = ds.generateAnnotation(metadataPid);
153 137
		
154 138
		// save the annotation
src/edu/ucsb/nceas/metacat/annotation/OrcidService.java
9 9
import org.apache.log4j.Logger;
10 10
import org.w3c.dom.Node;
11 11

  
12
import edu.ucsb.nceas.metacat.replication.ReplicationService;
13 12
import edu.ucsb.nceas.utilities.XMLUtilities;
14 13

  
15 14
public class OrcidService {
......
49 48
			
50 49
			String url = REST_URL + "?q=" + urlParameters + "&rows=1";
51 50
			URL restURL = new URL(url);
52
			//InputStream is = restURL.openStream();
53
			InputStream is = ReplicationService.getURLStream(restURL);
51
			InputStream is = restURL.openStream();
54 52
			String results = IOUtils.toString(is);
55 53
			logMetacat.debug("RESULTS: " + results);
56 54
			Node doc = XMLUtilities.getXMLReaderAsDOMTreeRootNode(new StringReader(results));
src/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizer.java
41 41
import com.hp.hpl.jena.rdf.model.Property;
42 42
import com.hp.hpl.jena.rdf.model.Resource;
43 43
import com.hp.hpl.jena.tdb.TDBFactory;
44
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
45 44

  
46 45
import edu.ucsb.nceas.metacat.DBUtil;
47 46
import edu.ucsb.nceas.metacat.DocumentImpl;
......
79 78
    
80 79
	public static String OBOE_SBC = "OBOE-SBC";
81 80
    
82
    // package visibility for testing only
83
    boolean randomize = false;
84
    
85 81
    public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
86 82

  
87 83
    	// generate an annotation for the metadata given
......
92 88
		
93 89
    	// read the annotation into the triplestore
94 90
		InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
95
    	String name = "http://annotation";
91
    	String name = "http://annotation/" + metadataPid.getValue();
96 92
    	boolean loaded = dataset.containsNamedModel(name);
97 93
    	if (loaded) {
98 94
    		dataset.removeNamedModel(name);
......
108 104
        Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
109 105
		
110 106
        // TODO: look up the query to use (support multiple like in the indexing project)
111
        String q = null;
112
        
113
        q = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
107
        List<String> queries = new ArrayList<String>();        
108
        queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
114 109
        	+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
115 110
        	+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> " 
116 111
			+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
117 112
			+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
118 113
			+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
119
			+ "SELECT ?standard_sm ?id "
114
			+ "SELECT ?standard_sm ?pid "
120 115
			+ "FROM <$GRAPH_NAME> "
121 116
			+ "WHERE { "
122 117
			+ "		?measurement rdf:type oboe-core:Measurement . "
......
128 123
			+ "		?annotation oa:hasBody ?measurement . "												
129 124
			+ "		?annotation oa:hasTarget ?target . "
130 125
			+ "		?target oa:hasSource ?metadata . "
131
			+ "		?metadata dcterms:identifier ?id . " 
132
			+ "}";
133

  
134
        q = q.replaceAll("\\$GRAPH_NAME", name);
135
		Query query = QueryFactory.create(q);
136
		QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
137
		ResultSet results = qexec.execSelect();
138
		
139
		while (results.hasNext()) {
140
			QuerySolution solution = results.next();
141
			System.out.println(solution.toString());
126
			+ "		?metadata dcterms:identifier ?pid . " 
127
			+ "}");
128
        
129
        queries.add("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
130
    		+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
131
    		+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> "
132
    		+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
133
    		+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
134
    		+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
135
    		+ "SELECT ?characteristic_sm ?pid "
136
    		+ "FROM <$GRAPH_NAME>"
137
    		+ "WHERE { "
138
    		+ "		?measurement rdf:type oboe-core:Measurement . "
139
    		+ "		?measurement rdf:type ?restriction . "
140
			+ "		?restriction owl:onProperty oboe-core:ofCharacteristic . "
141
			+ "		?restriction owl:allValuesFrom ?characteristic . "
142
			+ "		?characteristic rdfs:subClassOf+ ?characteristic_sm . "
143
			+ "		?characteristic_sm rdfs:subClassOf oboe-core:Characteristic . "
144
			+ "		?annotation oa:hasBody ?measurement .	"											
145
			+ "		?annotation oa:hasTarget ?target . "
146
			+ "		?target oa:hasSource ?metadata . "
147
			+ "		?metadata dcterms:identifier ?pid . " 
148
			+ "}");
149
        
150
        for (String q: queries) {
151
	        q = q.replaceAll("\\$GRAPH_NAME", name);
152
			Query query = QueryFactory.create(q);
153
			QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
154
			ResultSet results = qexec.execSelect();
142 155
			
143
			// find the index document we are trying to augment with the annotation
144
			if (solution.contains("id")) {
145
				String id = solution.getLiteral("id").getString();
146
				if (!id.equals(metadataPid.getValue())) {
147
					// skip any solution that does not annotate the given pid
148
					continue;
149
				}
156
			while (results.hasNext()) {
157
				QuerySolution solution = results.next();
158
				System.out.println(solution.toString());
150 159
				
151
			}
152
			// loop through the solution variables, add an index value for each
153
			Iterator<String> varNameIter = solution.varNames();
154
			while (varNameIter.hasNext()) {
155
				String key = varNameIter.next();
156
				if (key.equals("id")) {
157
					// don't include the id
158
					continue;
160
				// find the index document we are trying to augment with the annotation
161
				if (solution.contains("pid")) {
162
					String id = solution.getLiteral("pid").getString();
163
					if (!id.equals(metadataPid.getValue())) {
164
						// skip any solution that does not annotate the given pid
165
						continue;
166
					}
167
					
159 168
				}
160
				String value = solution.get(key).toString();
161
				List<Object> values = fields.get(key);
162
				if (values  == null) {
163
					values = new ArrayList<Object>();
169
				// loop through the solution variables, add an index value for each
170
				Iterator<String> varNameIter = solution.varNames();
171
				while (varNameIter.hasNext()) {
172
					String key = varNameIter.next();
173
					if (key.equals("pid")) {
174
						// don't include the id
175
						continue;
176
					}
177
					String value = solution.get(key).toString();
178
					List<Object> values = fields.get(key);
179
					if (values  == null) {
180
						values = new ArrayList<Object>();
181
					}
182
					values.add(value);
183
					fields.put(key, values);
164 184
				}
165
				values.add(value);
166
				fields.put(key, values);
167 185
			}
168
		}
186
        }
169 187

  
170 188
		// clean up the triple store
171 189
		TDBFactory.release(dataset);
......
363 381
	private Resource lookupStandard(OntClass standardClass, Attribute attribute) {
364 382
		// what's our unit?
365 383
		String unit = attribute.getUnit().toLowerCase();
384
		
385
		/*
386
		boolean found = false;
366 387
		List<String> tokens = Arrays.asList(unit.split(" "));
367

  
368
		boolean found = false;
369 388
		ExtendedIterator iter = standardClass.listSubClasses(false);
370
		if (randomize) {
371
			List subclasses = iter.toList();
372
			int size = subclasses.size();
373
			Long index = new Long(Math.round(Math.floor((Math.random() * (size-1)))));
374
			OntClass subclass = (OntClass) subclasses.get( index.intValue() );
375
			return subclass;
376
		}
377 389
		while (iter.hasNext()) {
378 390
			OntClass subclass = (OntClass) iter.next();
379 391
			String subclassName = subclass.getLocalName().toLowerCase();
......
388 400
				return subclass;
389 401
			}
390 402
		}
403
		*/
404
		
391 405
		// try to look it up if we got this far
392 406
		return BioPortalService.lookupAnnotationClass(standardClass, unit, OBOE_SBC);
393 407
	}
394 408
	
395 409
	private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) {
396
		// what's our label?
410
		// what are we looking for?
397 411
		String label = attribute.getLabel().toLowerCase();
398
		List<String> tokens = Arrays.asList(label.split(" "));
412
		String definition = attribute.getDefinition();
413
		String text = label + " " + definition;
399 414
		
415
		/*
416
		// find something that matches		
400 417
		boolean found = false;
401
		// find something that matches
418
		List<String> tokens = Arrays.asList(label.split(" "));
402 419
		ExtendedIterator iter = characteristicClass.listSubClasses();
403
		if (randomize) {
404
			List subclasses = iter.toList();
405
			int size = subclasses.size();
406
			Long index = new Long(Math.round(Math.floor((Math.random() * (size-1)))));
407
			OntClass subclass = (OntClass) subclasses.get( index.intValue() );
408
			return subclass;
409
		}
410 420
		while (iter.hasNext()) {
411 421
			OntClass subclass = (OntClass) iter.next();
412 422
			String subclassName = subclass.getLocalName().toLowerCase();
......
421 431
				return subclass;
422 432
			}
423 433
		}
434
		*/
424 435
		
425
		// try to look it up if we got this far
426
		return BioPortalService.lookupAnnotationClass(characteristicClass, attribute.getDefinition(), OBOE_SBC);
436
		// try to look it up from the service
437
		return BioPortalService.lookupAnnotationClass(characteristicClass, text, OBOE_SBC);
427 438
		
428 439
	}
429 440
	
src/edu/ucsb/nceas/metacat/annotation/BioPortalService.java
12 12
import com.hp.hpl.jena.ontology.OntClass;
13 13
import com.hp.hpl.jena.rdf.model.Resource;
14 14

  
15
import edu.ucsb.nceas.metacat.replication.ReplicationService;
16 15
import edu.ucsb.nceas.utilities.XMLUtilities;
17 16

  
18 17
public class BioPortalService {
......
43 42
			
44 43
			String url = REST_URL + "/annotator?" + urlParameters ;
45 44
			URL restURL = new URL(url);
46
			InputStream is = ReplicationService.getURLStream(restURL);
45
			InputStream is = restURL.openStream();
47 46
			Document doc = XMLUtilities.getXMLReaderAsDOMDocument(new InputStreamReader(is, "UTF-8"));
48 47
			NodeList classNodeList = XMLUtilities.getNodeListWithXPath(doc, "//annotation/annotatedClass/id");
49 48
			if (classNodeList != null && classNodeList.getLength() > 0) {

Also available in: Unified diff