Project

General

Profile

1
package edu.ucsb.nceas.metacat.index.annotation;
2

    
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Map;
8

    
9
import javax.xml.xpath.XPathExpressionException;
10

    
11
import org.apache.commons.codec.EncoderException;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.dataone.cn.indexer.annotation.AnnotatorSubprocessor;
15
import org.dataone.cn.indexer.solrhttp.SolrDoc;
16
import org.dataone.cn.indexer.solrhttp.SolrElementField;
17

    
18
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
19

    
20
public class MetacatAnnotatorSubprocessor extends AnnotatorSubprocessor {
21
	
22
    private static Log log = LogFactory.getLog(MetacatAnnotatorSubprocessor.class);
23
	
24
    
25
    @Override
26
    public Map<String, SolrDoc> processDocument(String annotationId, Map<String, SolrDoc> docs,
27
            InputStream is) throws Exception {
28

    
29
        // check for annotations, and add them if found
30
        SolrDoc annotations = parseAnnotation(is);
31
        if (annotations != null) {
32
            String referencedPid = annotations.getIdentifier();
33
            SolrDoc referencedDoc = docs.get(referencedPid);
34

    
35
            // make sure we have a reference for the document we annotating
36
            boolean referenceExists = true;
37
            if (referencedDoc == null) {
38
                try {
39
                    referencedDoc = ResourceMapSubprocessor.getSolrDoc(referencedPid);
40
                } catch (Exception e) {
41
                    log.error("Unable to retrieve solr document: " + referencedPid
42
                            + ".  Exception attempting to communicate with solr server.", e);
43
                }
44

    
45
                
46
                if (referencedDoc == null) {
47
                    referencedDoc = new SolrDoc();
48
                    referenceExists = false;
49
                }
50
                docs.put(referencedPid, referencedDoc);
51
            }
52

    
53
            // make sure we say we annotate the object
54
            SolrDoc annotationDoc = docs.get(annotationId);
55
            if (annotationDoc != null) {
56
                annotationDoc.addField(new SolrElementField(FIELD_ANNOTATES, referencedPid));
57
            }
58

    
59
            // add the annotations to the referenced document
60
            Iterator<SolrElementField> annotationIter = annotations.getFieldList().iterator();
61
            while (annotationIter.hasNext()) {
62
                SolrElementField annotation = annotationIter.next();
63
                // only skip merge field if there was an existing record
64
                if (referenceExists && !this.getFieldsToMerge().contains(annotation.getName())) {
65
                    log.debug("SKIPPING field (not in fieldsToMerge): " + annotation.getName());
66
                    continue;
67
                }
68
                referencedDoc.addField(annotation);
69
                log.debug("ADDING annotation to " + referencedPid + ": " + annotation.getName()
70
                        + "=" + annotation.getValue());
71
            }
72
        } else {
73
            log.warn("Annotations were not found when parsing: " + annotationId);
74
        }
75
        // return the collection that we have augmented
76
        return docs;
77
    }
78
    
79
    /**
80
     * Merge updates with existing solr documents
81
     * 
82
     * @param indexDocument
83
     * @return
84
     * @throws IOException
85
     * @throws EncoderException
86
     * @throws XPathExpressionException
87
     */
88
    public SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
89
            EncoderException, XPathExpressionException {	
90
        
91
		return mergeWithIndexedDocument(indexDocument, getFieldsToMerge());
92
    }
93
    
94
    /**
95
     * Inspired by SubprocessorUtility method, but works with embedded solr server
96
     * @param indexDocument
97
     * @param fieldsToMerge
98
     * @return
99
     * @throws IOException
100
     * @throws EncoderException
101
     * @throws XPathExpressionException
102
     */
103
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument, List<String> fieldsToMerge)
104
            throws IOException, EncoderException, XPathExpressionException {
105

    
106
        log.debug("about to merge indexed document with new doc to insert for pid: "
107
                + indexDocument.getIdentifier());
108
        SolrDoc solrDoc = null;
109
		try {
110
			solrDoc = ResourceMapSubprocessor.getSolrDoc(indexDocument.getIdentifier());
111
		} catch (Exception e) {
112
			log.error("Could not retrieve existing index document: " + indexDocument.getIdentifier(), e);
113
		} 
114
        if (solrDoc != null) {
115
            log.debug("found existing doc to merge for pid: " + indexDocument.getIdentifier());
116
            for (SolrElementField field : solrDoc.getFieldList()) {
117
                if (fieldsToMerge.contains(field.getName())
118
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
119
                    indexDocument.addField(field);
120
                    log.debug("merging field: " + field.getName() + " with value: "
121
                            + field.getValue());
122
                }
123
            }
124
        }
125
        return indexDocument;
126
    }
127

    
128
}
(1-1/2)