Revision 8711
Added by ben leinfelder over 10 years ago
src/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizer.java | ||
---|---|---|
1 | 1 |
package edu.ucsb.nceas.metacat.annotation; |
2 | 2 |
|
3 | 3 |
import java.io.InputStream; |
4 |
import java.io.InputStreamReader; |
|
4 | 5 |
import java.io.StringWriter; |
6 |
import java.net.URL; |
|
7 |
import java.net.URLEncoder; |
|
5 | 8 |
import java.sql.PreparedStatement; |
6 | 9 |
import java.sql.SQLException; |
7 | 10 |
import java.util.ArrayList; |
... | ... | |
21 | 24 |
import org.ecoinformatics.datamanager.parser.Entity; |
22 | 25 |
import org.ecoinformatics.datamanager.parser.generic.DataPackageParserInterface; |
23 | 26 |
import org.ecoinformatics.datamanager.parser.generic.Eml200DataPackageParser; |
27 |
import org.w3c.dom.Document; |
|
28 |
import org.w3c.dom.NodeList; |
|
24 | 29 |
|
25 | 30 |
import com.hp.hpl.jena.ontology.AllValuesFromRestriction; |
26 | 31 |
import com.hp.hpl.jena.ontology.Individual; |
... | ... | |
44 | 49 |
import edu.ucsb.nceas.metacat.replication.ReplicationService; |
45 | 50 |
import edu.ucsb.nceas.metacat.util.DocumentUtil; |
46 | 51 |
import edu.ucsb.nceas.utilities.SortedProperties; |
52 |
import edu.ucsb.nceas.utilities.XMLUtilities; |
|
47 | 53 |
|
48 | 54 |
public class DatapackageSummarizer { |
49 | 55 |
|
... | ... | |
64 | 70 |
public static String prov_source = "http://www.w3.org/ns/prov.owl"; |
65 | 71 |
public static String cito = "http://purl.org/spar/cito/"; |
66 | 72 |
|
73 |
// for looking up concepts in BioPortal |
|
74 |
static final String REST_URL = "http://data.bioontology.org"; |
|
75 |
static final String API_KEY = "24e4775e-54e0-11e0-9d7b-005056aa3316"; |
|
76 |
|
|
67 | 77 |
// package visibility for testing only |
68 | 78 |
boolean randomize = false; |
69 | 79 |
|
... | ... | |
230 | 240 |
return subclass; |
231 | 241 |
} |
232 | 242 |
} |
233 |
return null; |
|
243 |
// try to look it up if we got this far |
|
244 |
return this.lookupRemoteAnnotationClass(standardClass, unit); |
|
234 | 245 |
} |
235 | 246 |
|
236 | 247 |
private Resource lookupCharacteristic(OntClass characteristicClass, Attribute attribute) { |
... | ... | |
262 | 273 |
return subclass; |
263 | 274 |
} |
264 | 275 |
} |
276 |
|
|
277 |
// try to look it up if we got this far |
|
278 |
return this.lookupRemoteAnnotationClass(characteristicClass, attribute.getDefinition()); |
|
279 |
|
|
280 |
} |
|
281 |
|
|
282 |
private Resource lookupRemoteAnnotationClass(OntClass superClass, String text) { |
|
283 |
|
|
284 |
|
|
285 |
try { |
|
286 |
|
|
287 |
String urlParameters = "apikey=" + API_KEY; |
|
288 |
urlParameters += "&format=xml"; |
|
289 |
// urlParameters += "&ontologies=OBOE-SBC"; |
|
290 |
urlParameters += "&ontologies=SWEET"; |
|
291 |
urlParameters += "&text=" + URLEncoder.encode(text, "UTF-8"); |
|
292 |
|
|
293 |
String url = REST_URL + "/annotator?" + urlParameters ; |
|
294 |
URL restURL = new URL(url); |
|
295 |
InputStream is = ReplicationService.getURLStream(restURL); |
|
296 |
Document doc = XMLUtilities.getXMLReaderAsDOMDocument(new InputStreamReader(is, "UTF-8")); |
|
297 |
NodeList classNodeList = XMLUtilities.getNodeListWithXPath(doc, "//annotation/annotatedClass/id"); |
|
298 |
if (classNodeList != null && classNodeList.getLength() > 0) { |
|
299 |
String classURI = classNodeList.item(0).getFirstChild().getNodeValue(); |
|
300 |
logMetacat.info("annotator suggested: " + classURI); |
|
301 |
Resource subclass = superClass.getModel().getResource(classURI); |
|
302 |
// TODO: check that it is a subclass of superClass? |
|
303 |
return subclass; |
|
304 |
} |
|
305 |
} catch (Exception e) { |
|
306 |
logMetacat.error("Could not lookup BioPortal annotation for text= " + text, e); |
|
307 |
} |
|
308 |
|
|
265 | 309 |
return null; |
266 | 310 |
} |
267 | 311 |
|
Also available in: Unified diff
Include method to look up annotation classes from BioPortal. We still have OBOE-SBC in there, and theyhave the SWEET ontology. The suggestions returned are not perfect, but they can be better than nothing. Ideally, we'd only query a few ontologies so we don't end up using terms from medical ontologies that aren't really appropriate for our domain. https://projects.ecoinformatics.org/ecoinfo/issues/6256