Project

General

Profile

« Previous | Next » 

Revision 8757

first pass at direct EML->semantic index method. Still produces an RDF model, but does not persist it in Metacat, only in the triplestore. Allows us to re-run without adding stale RDF to the MN store.

View differences:

test/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizerTest.java
78 78
	public static Test suite() {
79 79
		TestSuite suite = new TestSuite();
80 80
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotations"));
81
		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
81
//		suite.addTest(new DatapackageSummarizerTest("testGenerateAnnotation"));
82
		suite.addTest(new DatapackageSummarizerTest("testStandaloneAnnotation"));
82 83
//		suite.addTest(new DatapackageSummarizerTest("testGenerateRandomAnnotation"));
83 84
		return suite;
84 85
	}
85 86
	
87
	public void testStandaloneAnnotation() throws Exception {
88
		// insert the test document to sem-index
89
		Identifier metadataPid = new Identifier();
90
		metadataPid.setValue("testAnnotation.eml." + System.currentTimeMillis());
91
		Session session = getTestSession();
92
		try {
93
			InputStream object = new ByteArrayInputStream(this.getTestDocFromFile(ANNOTATION_TEST_DOC).getBytes("UTF-8"));
94
			SystemMetadata sysmeta = createSystemMetadata(metadataPid, session.getSubject(), object);
95
			ObjectFormatIdentifier formatId = new ObjectFormatIdentifier();
96
			formatId.setValue("eml://ecoinformatics.org/eml-2.0.0");
97
			sysmeta.setFormatId(formatId);
98
			Identifier pid = MNodeService.getInstance(request).create(session, metadataPid, object, sysmeta);
99
			assertEquals(metadataPid.getValue(), pid.getValue());
100
		} catch (Exception e) {
101
			e.printStackTrace();
102
			fail("Could not add metadata test file: " + e.getMessage());
103
		}
104
		
105
		// index it
106
		DatapackageSummarizer ds = new DatapackageSummarizer();
107
		ds.indexEphemeralAnnotation(metadataPid);
108
		
109
		// check it
110
		
111
	}
112
	
86 113
	/**
87 114
	 * Generate a single annotation based exclusively on the metadata
88 115
	 * @throws Exception
src/edu/ucsb/nceas/metacat/annotation/DatapackageSummarizer.java
6 6
import java.sql.SQLException;
7 7
import java.util.ArrayList;
8 8
import java.util.Arrays;
9
import java.util.HashMap;
9 10
import java.util.Iterator;
10 11
import java.util.List;
11 12
import java.util.Map;
12 13
import java.util.Vector;
13 14

  
15
import org.apache.commons.io.IOUtils;
14 16
import org.apache.log4j.Logger;
15 17
import org.apache.wicket.protocol.http.mock.MockHttpServletRequest;
16 18
import org.dataone.service.types.v1.Identifier;
......
28 30
import com.hp.hpl.jena.ontology.OntClass;
29 31
import com.hp.hpl.jena.ontology.OntModel;
30 32
import com.hp.hpl.jena.ontology.Ontology;
33
import com.hp.hpl.jena.query.Dataset;
34
import com.hp.hpl.jena.query.Query;
35
import com.hp.hpl.jena.query.QueryExecution;
36
import com.hp.hpl.jena.query.QueryExecutionFactory;
37
import com.hp.hpl.jena.query.QueryFactory;
38
import com.hp.hpl.jena.query.QuerySolution;
39
import com.hp.hpl.jena.query.ResultSet;
31 40
import com.hp.hpl.jena.rdf.model.ModelFactory;
32 41
import com.hp.hpl.jena.rdf.model.Property;
33 42
import com.hp.hpl.jena.rdf.model.Resource;
43
import com.hp.hpl.jena.tdb.TDBFactory;
34 44
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
35 45

  
36 46
import edu.ucsb.nceas.metacat.DBUtil;
......
40 50
import edu.ucsb.nceas.metacat.database.DBConnection;
41 51
import edu.ucsb.nceas.metacat.database.DBConnectionPool;
42 52
import edu.ucsb.nceas.metacat.dataone.MNodeService;
53
import edu.ucsb.nceas.metacat.index.MetacatSolrIndex;
43 54
import edu.ucsb.nceas.metacat.properties.PropertyService;
44 55
import edu.ucsb.nceas.metacat.replication.ReplicationService;
45 56
import edu.ucsb.nceas.metacat.util.DocumentUtil;
......
70 81
    
71 82
    // package visibility for testing only
72 83
    boolean randomize = false;
84
    
85
    public void indexEphemeralAnnotation(Identifier metadataPid) throws Exception {
73 86

  
87
    	// generate an annotation for the metadata given
88
		String rdfContent = this.generateAnnotation(metadataPid);
89
		
90
		// load to triple store
91
		Dataset dataset = TDBFactory.createDataset("./tbd");
92
		
93
    	// read the annotation into the triplestore
94
		InputStream source = IOUtils.toInputStream(rdfContent, "UTF-8");
95
    	String name = "http://annotation";
96
    	boolean loaded = dataset.containsNamedModel(name);
97
    	if (loaded) {
98
    		dataset.removeNamedModel(name);
99
    		loaded = false;
100
    	}
101
		if (!loaded) {
102
			OntModel ontModel = ModelFactory.createOntologyModel();
103
			ontModel.read(source, name);
104
			dataset.addNamedModel(name, ontModel);
105
		}
106
		
107
		// query for fields to add to index
108
        Map<String, List<Object>> fields = new HashMap<String, List<Object>>();
109
		
110
        // TODO: look up the query to use (support multiple like in the indexing project)
111
        String q = null;
112
        
113
        q = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
114
        	+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
115
        	+ "PREFIX owl: <http://www.w3.org/2002/07/owl#> " 
116
			+ "PREFIX oboe-core: <http://ecoinformatics.org/oboe/oboe.1.0/oboe-core.owl#> "
117
			+ "PREFIX oa: <http://www.w3.org/ns/oa#> "
118
			+ "PREFIX dcterms: <http://purl.org/dc/terms/> "
119
			+ "SELECT ?standard_sm ?id "
120
			+ "FROM <$GRAPH_NAME> "
121
			+ "WHERE { "
122
			+ "		?measurement rdf:type oboe-core:Measurement . "
123
			+ "		?measurement rdf:type ?restriction . "
124
			+ "		?restriction owl:onProperty oboe-core:usesStandard . "
125
			+ "		?restriction owl:allValuesFrom ?standard . "
126
			+ "		?standard rdfs:subClassOf+ ?standard_sm . "
127
			+ "		?standard_sm rdfs:subClassOf oboe-core:Standard . "				
128
			+ "		?annotation oa:hasBody ?measurement . "												
129
			+ "		?annotation oa:hasTarget ?target . "
130
			+ "		?target oa:hasSource ?metadata . "
131
			+ "		?metadata dcterms:identifier ?id . " 
132
			+ "}";
133

  
134
        q = q.replaceAll("\\$GRAPH_NAME", name);
135
		Query query = QueryFactory.create(q);
136
		QueryExecution qexec = QueryExecutionFactory.create(query, dataset);
137
		ResultSet results = qexec.execSelect();
138
		
139
		while (results.hasNext()) {
140
			QuerySolution solution = results.next();
141
			System.out.println(solution.toString());
142
			
143
			// find the index document we are trying to augment with the annotation
144
			if (solution.contains("id")) {
145
				String id = solution.getLiteral("id").getString();
146
				if (!id.equals(metadataPid.getValue())) {
147
					// skip any solution that does not annotate the given pid
148
					continue;
149
				}
150
				
151
			}
152
			// loop through the solution variables, add an index value for each
153
			Iterator<String> varNameIter = solution.varNames();
154
			while (varNameIter.hasNext()) {
155
				String key = varNameIter.next();
156
				if (key.equals("id")) {
157
					// don't include the id
158
					continue;
159
				}
160
				String value = solution.get(key).toString();
161
				List<Object> values = fields.get(key);
162
				if (values  == null) {
163
					values = new ArrayList<Object>();
164
				}
165
				values.add(value);
166
				fields.put(key, values);
167
			}
168
		}
169

  
170
		// clean up the triple store
171
		TDBFactory.release(dataset);
172
        
173
		// add to index
174
		MetacatSolrIndex.getInstance().submit(metadataPid, null, fields, true);
175
		
176
		
177
	}
178

  
74 179
    /**
75 180
     * Generate annotation for given metadata identifier
76 181
     * @param metadataPid

Also available in: Unified diff