Project

General

Profile

1
package edu.ucsb.nceas.metacat.index.annotation;
2

    
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Map;
8

    
9
import javax.xml.xpath.XPathExpressionException;
10

    
11
import org.apache.commons.codec.EncoderException;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.dataone.cn.indexer.annotation.AnnotatorSubprocessor;
15
import org.dataone.cn.indexer.solrhttp.SolrDoc;
16
import org.dataone.cn.indexer.solrhttp.SolrElementField;
17

    
18
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
19

    
20
public class MetacatAnnotatorSubprocessor extends AnnotatorSubprocessor {
21
	
22
    private static Log log = LogFactory.getLog(AnnotatorSubprocessor.class);
23
	
24
    
25
    @Override
26
    public Map<String, SolrDoc> processDocument(String annotationId, Map<String, SolrDoc> docs,
27
            InputStream is) throws Exception {
28

    
29
        // check for annotations, and add them if found
30
        SolrDoc annotations = parseAnnotation(is);
31
        if (annotations != null) {
32
            String referencedPid = annotations.getIdentifier();
33
            SolrDoc referencedDoc = docs.get(referencedPid);
34

    
35
            // make sure we have a reference for the document we annotating
36
            if (referencedDoc == null) {
37
                try {
38
                    referencedDoc = ResourceMapSubprocessor.getSolrDoc(referencedPid);
39
                } catch (Exception e) {
40
                    log.error("Unable to retrieve solr document: " + referencedPid
41
                            + ".  Exception attempting to communicate with solr server.", e);
42
                }
43

    
44
                if (referencedDoc == null) {
45
                    referencedDoc = new SolrDoc();
46
                }
47
                docs.put(referencedPid, referencedDoc);
48
            }
49

    
50
            // make sure we say we annotate the object
51
            SolrDoc annotationDoc = docs.get(annotationId);
52
            if (annotationDoc != null) {
53
                annotationDoc.addField(new SolrElementField(FIELD_ANNOTATES, referencedPid));
54
            }
55

    
56
            // add the annotations to the referenced document
57
            Iterator<SolrElementField> annotationIter = annotations.getFieldList().iterator();
58
            while (annotationIter.hasNext()) {
59
                SolrElementField annotation = annotationIter.next();
60
                if (!this.getFieldsToMerge().contains(annotation.getName())) {
61
                    log.debug("SKIPPING field (not in fieldsToMerge): " + annotation.getName());
62
                    continue;
63
                }
64
                referencedDoc.addField(annotation);
65
                log.debug("ADDING annotation to " + referencedPid + ": " + annotation.getName()
66
                        + "=" + annotation.getValue());
67
            }
68
        } else {
69
            log.warn("Annotations were not found when parsing: " + annotationId);
70
        }
71
        // return the collection that we have augmented
72
        return docs;
73
    }
74
    
75
    /**
76
     * Merge updates with existing solr documents
77
     * 
78
     * @param indexDocument
79
     * @return
80
     * @throws IOException
81
     * @throws EncoderException
82
     * @throws XPathExpressionException
83
     */
84
    public SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
85
            EncoderException, XPathExpressionException {	
86
        
87
		return mergeWithIndexedDocument(indexDocument, getFieldsToMerge());
88
    }
89
    
90
    /**
91
     * Inspired by SubprocessorUtility method, but works with embedded solr server
92
     * @param indexDocument
93
     * @param fieldsToMerge
94
     * @return
95
     * @throws IOException
96
     * @throws EncoderException
97
     * @throws XPathExpressionException
98
     */
99
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument, List<String> fieldsToMerge)
100
            throws IOException, EncoderException, XPathExpressionException {
101

    
102
        log.debug("about to merge indexed document with new doc to insert for pid: "
103
                + indexDocument.getIdentifier());
104
        SolrDoc solrDoc = null;
105
		try {
106
			solrDoc = ResourceMapSubprocessor.getSolrDoc(indexDocument.getIdentifier());
107
		} catch (Exception e) {
108
			log.error("Could not retrieve existing index document: " + indexDocument.getIdentifier(), e);
109
		} 
110
        if (solrDoc != null) {
111
            log.debug("found existing doc to merge for pid: " + indexDocument.getIdentifier());
112
            for (SolrElementField field : solrDoc.getFieldList()) {
113
                if (fieldsToMerge.contains(field.getName())
114
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
115
                    indexDocument.addField(field);
116
                    log.debug("merging field: " + field.getName() + " with value: "
117
                            + field.getValue());
118
                }
119
            }
120
        }
121
        return indexDocument;
122
    }
123

    
124
}
    (1-1/1)