Project

General

Profile

« Previous | Next » 

Revision 9420

subclass AnnotatorSubprocessor for use in metacat-index (uses embedded solr server and solrj for retrieving/merging existing documents).

View differences:

metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/annotation/MetacatAnnotatorSubprocessor.java
1
package edu.ucsb.nceas.metacat.index.annotation;
2

  
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Map;
8

  
9
import javax.xml.xpath.XPathExpressionException;
10

  
11
import org.apache.commons.codec.EncoderException;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.dataone.cn.indexer.annotation.AnnotatorSubprocessor;
15
import org.dataone.cn.indexer.solrhttp.SolrDoc;
16
import org.dataone.cn.indexer.solrhttp.SolrElementField;
17

  
18
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
19

  
20
public class MetacatAnnotatorSubprocessor extends AnnotatorSubprocessor {
21
	
22
    private static Log log = LogFactory.getLog(AnnotatorSubprocessor.class);
23
	
24
    
25
    @Override
26
    public Map<String, SolrDoc> processDocument(String annotationId, Map<String, SolrDoc> docs,
27
            InputStream is) throws Exception {
28

  
29
        // check for annotations, and add them if found
30
        SolrDoc annotations = parseAnnotation(is);
31
        if (annotations != null) {
32
            String referencedPid = annotations.getIdentifier();
33
            SolrDoc referencedDoc = docs.get(referencedPid);
34

  
35
            // make sure we have a reference for the document we annotating
36
            if (referencedDoc == null) {
37
                try {
38
                    referencedDoc = ResourceMapSubprocessor.getSolrDoc(referencedPid);
39
                } catch (Exception e) {
40
                    log.error("Unable to retrieve solr document: " + referencedPid
41
                            + ".  Exception attempting to communicate with solr server.", e);
42
                }
43

  
44
                if (referencedDoc == null) {
45
                    referencedDoc = new SolrDoc();
46
                }
47
                docs.put(referencedPid, referencedDoc);
48
            }
49

  
50
            // make sure we say we annotate the object
51
            SolrDoc annotationDoc = docs.get(annotationId);
52
            if (annotationDoc != null) {
53
                annotationDoc.addField(new SolrElementField(FIELD_ANNOTATES, referencedPid));
54
            }
55

  
56
            // add the annotations to the referenced document
57
            Iterator<SolrElementField> annotationIter = annotations.getFieldList().iterator();
58
            while (annotationIter.hasNext()) {
59
                SolrElementField annotation = annotationIter.next();
60
                if (!this.getFieldsToMerge().contains(annotation.getName())) {
61
                    log.debug("SKIPPING field (not in fieldsToMerge): " + annotation.getName());
62
                    continue;
63
                }
64
                referencedDoc.addField(annotation);
65
                log.debug("ADDING annotation to " + referencedPid + ": " + annotation.getName()
66
                        + "=" + annotation.getValue());
67
            }
68
        } else {
69
            log.warn("Annotations were not found when parsing: " + annotationId);
70
        }
71
        // return the collection that we have augmented
72
        return docs;
73
    }
74
    
75
    /**
76
     * Merge updates with existing solr documents
77
     * 
78
     * @param indexDocument
79
     * @return
80
     * @throws IOException
81
     * @throws EncoderException
82
     * @throws XPathExpressionException
83
     */
84
    public SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
85
            EncoderException, XPathExpressionException {	
86
        
87
		return mergeWithIndexedDocument(indexDocument, getFieldsToMerge());
88
    }
89
    
90
    /**
91
     * Inspired by SubprocessorUtility method, but works with embedded solr server
92
     * @param indexDocument
93
     * @param fieldsToMerge
94
     * @return
95
     * @throws IOException
96
     * @throws EncoderException
97
     * @throws XPathExpressionException
98
     */
99
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument, List<String> fieldsToMerge)
100
            throws IOException, EncoderException, XPathExpressionException {
101

  
102
        log.debug("about to merge indexed document with new doc to insert for pid: "
103
                + indexDocument.getIdentifier());
104
        SolrDoc solrDoc = null;
105
		try {
106
			solrDoc = ResourceMapSubprocessor.getSolrDoc(indexDocument.getIdentifier());
107
		} catch (Exception e) {
108
			log.error("Could not retrieve existing index document: " + indexDocument.getIdentifier(), e);
109
		} 
110
        if (solrDoc != null) {
111
            log.debug("found existing doc to merge for pid: " + indexDocument.getIdentifier());
112
            for (SolrElementField field : solrDoc.getFieldList()) {
113
                if (fieldsToMerge.contains(field.getName())
114
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
115
                    indexDocument.addField(field);
116
                    log.debug("merging field: " + field.getName() + " with value: "
117
                            + field.getValue());
118
                }
119
            }
120
        }
121
        return indexDocument;
122
    }
123

  
124
}
0 125

  
metacat-index/src/main/resources/application-context-annotator.xml
3 3
	xmlns:p="http://www.springframework.org/schema/p"
4 4
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
5 5

  
6
	<bean id="annotatorSubprocessor" class="org.dataone.cn.indexer.annotation.AnnotatorSubprocessor">
6
	<bean id="annotatorSubprocessor" class="edu.ucsb.nceas.metacat.index.annotation.MetacatAnnotatorSubprocessor">
7 7
		
8 8
		<!-- match annotation documents -->
9 9
		<property name="matchDocuments">
......
14 14
		<property name="fieldList">
15 15
			<list>
16 16
				<ref bean="annotation.expansion" />
17
				<ref bean="annotation.bioportal.expansion" />
18
				<ref bean="annotation.esor.expansion" />
17 19
			</list>
18 20
		</property>
19 21
		<property name="fieldsToMerge">
20 22
     		<list>
21 23
     			<value>sem_annotation</value>
24
     			<value>sem_annotation_bioportal_sm</value>
25
     			<value>sem_annotation_esor_sm</value>     			
22 26
     			<value>sem_annotates</value>
23 27
     			<value>sem_annotated_by</value>
24 28
     		</list>
25
     	</property>	
29
     	</property>
30
     	<property name="ontologyList">
31
			<list>
32
				<value>http://purl.org/spar/datacite/</value>
33
				<value>http://purl.org/dc/terms/</value>
34
				<value>http://www.w3.org/2006/time</value>
35
				<value>http://purl.dataone.org/ontologies/observation/d1-ECSO.owl</value>
36
				<value>http://purl.dataone.org/ontologies/provenance/ProvONE/v1/owl/provone.owl</value>
37
				<value>http://purl.obolibrary.org/obo/envo.owl</value>
38
				<value>http://purl.obolibrary.org/obo/chebi.owl</value>
39
			</list>
40
		</property>
26 41
	</bean>
27 42
	
28 43
	<bean id="annotation.expansion" class="org.dataone.cn.indexer.annotation.SparqlField">
......
35 50
				PREFIX owl: <http://www.w3.org/2002/07/owl#> 
36 51
				
37 52
				SELECT ?sem_annotation
38
				FROM <$GRAPH_NAME>				
39 53
				WHERE { 
40 54
						<$CONCEPT_URI> rdfs:subClassOf+ ?sem_annotation .
41 55
				 	} 
......
44 58
		</constructor-arg>
45 59
		<!--property name="multivalue" value="false" /-->
46 60
	</bean>
61
	
62
	<!-- bioportal-based annotation -->
63
	<bean id="annotation.bioportal.expansion" class="org.dataone.cn.indexer.annotation.SparqlField">
64
		<constructor-arg name="name" value="sem_annotation_bioportal_sm" />
65
		<constructor-arg name="query">
66
			<value>
67
				<![CDATA[
68
				PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
69
				PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
70
				PREFIX owl: <http://www.w3.org/2002/07/owl#> 
71
				
72
				SELECT ?sem_annotation_bioportal_sm
73
				WHERE { 
74
						<$CONCEPT_URI> rdfs:subClassOf+ ?sem_annotation_bioportal_sm .
75
				 	} 
76
				 ]]>
77
			</value>
78
		</constructor-arg>
79
	</bean>
80
	<bean id="annotation.bioportal.raw" class="org.dataone.cn.indexer.annotation.SparqlField">
81
		<constructor-arg name="name" value="sem_annotation_bioportal_raw_sm" />
82
		<constructor-arg name="query">
83
			<value>
84
				<![CDATA[
85
				PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
86
				PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
87
				PREFIX owl: <http://www.w3.org/2002/07/owl#> 
88
				
89
				SELECT ?sem_annotation_bioportal_raw_sm
90
				WHERE { 
91
						<$CONCEPT_URI> rdf:about ?sem_annotation_bioportal_raw_sm .
92
				 	} 
93
				 ]]>
94
			</value>
95
		</constructor-arg>
96
	</bean>
97
	
98
		<!-- esor-based annotation -->
99
	<bean id="annotation.esor.expansion" class="org.dataone.cn.indexer.annotation.SparqlField">
100
		<constructor-arg name="name" value="sem_annotation_esor_sm" />
101
		<constructor-arg name="query">
102
			<value>
103
				<![CDATA[
104
				PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
105
				PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
106
				PREFIX owl: <http://www.w3.org/2002/07/owl#> 
107
				
108
				SELECT ?sem_annotation_esor_sm
109
				WHERE { 
110
						<$CONCEPT_URI> rdfs:subClassOf+ ?sem_annotation_esor_sm .
111
				 	} 
112
				 ]]>
113
			</value>
114
		</constructor-arg>
115
	</bean>
116
	<bean id="annotation.esor.raw" class="org.dataone.cn.indexer.annotation.SparqlField">
117
		<constructor-arg name="name" value="sem_annotation_esor_raw_sm" />
118
		<constructor-arg name="query">
119
			<value>
120
				<![CDATA[
121
				PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
122
				PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
123
				PREFIX owl: <http://www.w3.org/2002/07/owl#> 
124
				
125
				SELECT ?sem_annotation_esor_raw_sm
126
				WHERE { 
127
						<$CONCEPT_URI> rdf:about ?sem_annotation_esor_raw_sm .
128
				 	} 
129
				 ]]>
130
			</value>
131
		</constructor-arg>
132
	</bean>
47 133

  
48 134
</beans>

Also available in: Unified diff