Project

General

Profile

1
/**
2
 * This program is free software; you can redistribute it and/or modify
3
 * it under the terms of the GNU General Public License as published by
4
 * the Free Software Foundation; either version 2 of the License, or
5
 * (at your option) any later version.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15
 */
16
package edu.ucsb.nceas.metacat.index.resourcemap;
17

    
18
import java.io.IOException;
19
import java.util.ArrayList;
20
import java.util.Date;
21
import java.util.HashMap;
22
import java.util.List;
23
import java.util.Map;
24

    
25
import javax.xml.parsers.ParserConfigurationException;
26
import javax.xml.xpath.XPathExpressionException;
27

    
28
import org.apache.commons.codec.EncoderException;
29
import org.apache.commons.logging.Log;
30
import org.apache.commons.logging.LogFactory;
31
import org.apache.solr.client.solrj.SolrServer;
32
import org.apache.solr.client.solrj.SolrServerException;
33
import org.apache.solr.client.solrj.response.QueryResponse;
34
import org.apache.solr.common.SolrDocument;
35
import org.apache.solr.common.params.SolrParams;
36
import org.apache.solr.schema.IndexSchema;
37
import org.apache.solr.servlet.SolrRequestParsers;
38
import org.dataone.cn.indexer.convert.SolrDateConverter;
39
import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
40
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
41
import org.dataone.cn.indexer.resourcemap.ResourceMap;
42
import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
43
import org.dataone.cn.indexer.solrhttp.SolrDoc;
44
import org.dataone.cn.indexer.solrhttp.SolrElementField;
45
import org.dataone.service.exceptions.NotFound;
46
import org.dataone.service.exceptions.NotImplemented;
47
import org.dataone.service.exceptions.UnsupportedType;
48
import org.dataone.service.util.DateTimeMarshaller;
49
import org.dspace.foresite.OREParserException;
50
import org.w3c.dom.Document;
51
import org.xml.sax.SAXException;
52

    
53
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
54
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
55
import edu.ucsb.nceas.metacat.index.SolrIndex;
56

    
57

    
58
/**
59
 * A solr index parser for the ResourceMap file.
60
 * The solr doc of the ResourceMap self only has the system metadata information.
61
 * The solr docs of the science metadata doc and data file have the resource map package information.
62
 */
63
public class ResourceMapSubprocessor extends AbstractDocumentSubprocessor implements IDocumentSubprocessor {
64

    
65
    private static final String QUERY ="q=id:";
66
    private static Log log = LogFactory.getLog(SolrIndex.class);
67
    private static SolrServer solrServer =  null;
68
    static {
69
        try {
70
            solrServer = SolrServerFactory.createSolrServer();
71
        } catch (Exception e) {
72
            log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage());
73
        }
74
    }
75
          
76
    @Override
77
    public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
78
    Document doc) throws IOException, EncoderException, SAXException,
79
    XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException {
80
        SolrDoc resourceMapDoc = docs.get(identifier);
81
        List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc);
82
        Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
83
        for (SolrDoc processedDoc : processedDocs) {
84
            processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
85
        }
86
        return processedDocsMap;
87
    }
88

    
89
    private List<SolrDoc> processResourceMap(SolrDoc indexDocument, Document resourceMapDocument)
90
                    throws XPathExpressionException, IOException, SAXException, ParserConfigurationException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException{
91
        //ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
92
        ResourceMap resourceMap = ResourceMapFactory.buildResourceMap(resourceMapDocument);
93
        List<String> documentIds = resourceMap.getAllDocumentIDs();//this list includes the resourceMap id itself.
94
        //List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds);
95
        List<SolrDoc> updateDocuments = getSolrDocs(resourceMap.getIdentifier(), documentIds);
96
        List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments);
97
        /*if(mergedDocuments != null) {
98
            for(SolrDoc doc : mergedDocuments) {
99
                ByteArrayOutputStream out = new ByteArrayOutputStream();
100
                doc.serialize(out, "UTF-8");
101
                String result = new String(out.toByteArray(), "UTF-8");
102
                System.out.println("after updated document===========================");
103
                System.out.println(result);
104
            }
105
        }*/
106
        mergedDocuments.add(indexDocument);
107
        return mergedDocuments;
108
    }
109
    
110
    private List<SolrDoc> getSolrDocs(String resourceMapId, List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, ResourceMapException {
111
        List<SolrDoc> list = new ArrayList<SolrDoc>();
112
        if(ids != null) {
113
            for(String id : ids) {
114
            	SolrDoc doc = getSolrDoc(id);
115
                if(doc != null) {
116
                    list.add(doc);
117
                } else if ( !id.equals(resourceMapId)) {
118
                    throw new ResourceMapException("Solr index doesn't have the information about the id "+id+" which is a component in the resource map "+resourceMapId+". Metacat-Index can't process the resource map prior to its components.");
119
                }
120
            }
121
        }
122
        return list;
123
    } 
124
    
125
    /*
126
     * Get the SolrDoc list for the list of the ids.
127
     */
128
    public static List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
129
        List<SolrDoc> list = new ArrayList<SolrDoc>();
130
        if(ids != null) {
131
            for(String id : ids) {
132
            	SolrDoc doc = getSolrDoc(id);
133
                if(doc != null) {
134
                    list.add(doc);
135
                }
136
            }
137
        }
138
        return list;
139
    }
140
    
141
	/*
142
	 * Get the SolrDoc for the specified id
143
	 */
144
	public static SolrDoc getSolrDoc(String id) throws SolrServerException,
145
			IOException, ParserConfigurationException, SAXException,
146
			XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
147
		SolrDoc doc = null;
148

    
149
		if (solrServer != null) {
150
			String query = QUERY + "\"" + id + "\"";
151
			SolrParams solrParams = SolrRequestParsers.parseQueryString(query);
152
			QueryResponse qr = solrServer.query(solrParams);
153
			if (qr.getResults().size() > 0) {
154
				SolrDocument orig = qr.getResults().get(0);
155
				doc = new SolrDoc();
156
				IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
157
				for (String fieldName : orig.getFieldNames()) {
158
					// don't transfer the copyTo fields, otherwise there are errors
159
					if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
160
						continue;
161
					}
162
					for (Object value : orig.getFieldValues(fieldName)) {
163
						String stringValue = value.toString();
164
						// special handling for dates in ISO 8601
165
						if (value instanceof Date) {
166
							stringValue = DateTimeMarshaller.serializeDateToUTC((Date) value);
167
							SolrDateConverter converter = new SolrDateConverter();
168
							stringValue = converter.convert(stringValue);
169
						}
170
						SolrElementField field = new SolrElementField(fieldName, stringValue);
171
						log.debug("Adding field: " + fieldName);
172
						doc.addField(field);
173
					}
174
				}
175
			}
176

    
177
		}
178
		return doc;
179
	}
180

    
181

    
182
}
(2-2/2)