Project

General

Profile

1 7696 tao
/**
2
 * This program is free software; you can redistribute it and/or modify
3
 * it under the terms of the GNU General Public License as published by
4
 * the Free Software Foundation; either version 2 of the License, or
5
 * (at your option) any later version.
6
 *
7
 * This program is distributed in the hope that it will be useful,
8
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 * GNU General Public License for more details.
11
 *
12
 * You should have received a copy of the GNU General Public License
13
 * along with this program; if not, write to the Free Software
14
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15
 */
16
package edu.ucsb.nceas.metacat.index.resourcemap;
17
18
import java.io.IOException;
19 9018 leinfelder
import java.io.InputStream;
20 8893 tao
import java.net.MalformedURLException;
21 7696 tao
import java.util.ArrayList;
22 7710 tao
import java.util.Date;
23 7696 tao
import java.util.HashMap;
24
import java.util.List;
25
import java.util.Map;
26
27
import javax.xml.parsers.ParserConfigurationException;
28
import javax.xml.xpath.XPathExpressionException;
29
30
import org.apache.commons.codec.EncoderException;
31
import org.apache.commons.logging.Log;
32
import org.apache.commons.logging.LogFactory;
33
import org.apache.solr.client.solrj.SolrServer;
34
import org.apache.solr.client.solrj.SolrServerException;
35
import org.apache.solr.client.solrj.response.QueryResponse;
36
import org.apache.solr.common.SolrDocument;
37
import org.apache.solr.common.params.SolrParams;
38 7710 tao
import org.apache.solr.schema.IndexSchema;
39 7696 tao
import org.apache.solr.servlet.SolrRequestParsers;
40 9028 leinfelder
import org.dataone.cn.indexer.XmlDocumentUtility;
41 8464 leinfelder
import org.dataone.cn.indexer.convert.SolrDateConverter;
42 9028 leinfelder
import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor;
43 7696 tao
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
44
import org.dataone.cn.indexer.resourcemap.ResourceMap;
45 8296 tao
import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
46 7696 tao
import org.dataone.cn.indexer.solrhttp.SolrDoc;
47
import org.dataone.cn.indexer.solrhttp.SolrElementField;
48 7733 tao
import org.dataone.service.exceptions.NotFound;
49
import org.dataone.service.exceptions.NotImplemented;
50
import org.dataone.service.exceptions.UnsupportedType;
51 8464 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
52 8023 tao
import org.dspace.foresite.OREParserException;
53 7696 tao
import org.w3c.dom.Document;
54
import org.xml.sax.SAXException;
55
56 8464 leinfelder
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
57 7733 tao
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
58 7696 tao
import edu.ucsb.nceas.metacat.index.SolrIndex;
59
60 7710 tao
61 7696 tao
/**
62
 * A solr index parser for the ResourceMap file.
63
 * The solr doc of the ResourceMap self only has the system metadata information.
64
 * The solr docs of the science metadata doc and data file have the resource map package information.
65
 */
66 9028 leinfelder
public class ResourceMapSubprocessor extends BaseXPathDocumentSubprocessor implements IDocumentSubprocessor {
67 7696 tao
68
    private static final String QUERY ="q=id:";
69 8893 tao
    private static final String QUERY2="q="+SolrElementField.FIELD_RESOURCEMAP+":";
70 7696 tao
    private static Log log = LogFactory.getLog(SolrIndex.class);
71
    private static SolrServer solrServer =  null;
72
    static {
73
        try {
74
            solrServer = SolrServerFactory.createSolrServer();
75
        } catch (Exception e) {
76
            log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage());
77
        }
78
    }
79
80
    @Override
81
    public Map<String, SolrDoc> processDocument(String identifier, Map<String, SolrDoc> docs,
82 9018 leinfelder
    InputStream is) throws IOException, EncoderException, SAXException,
83 8134 tao
    XPathExpressionException, ParserConfigurationException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException {
84 7696 tao
        SolrDoc resourceMapDoc = docs.get(identifier);
85 9028 leinfelder
        Document doc = XmlDocumentUtility.generateXmlDocument(is);
86 9018 leinfelder
		List<SolrDoc> processedDocs = processResourceMap(resourceMapDoc, doc );
87 7696 tao
        Map<String, SolrDoc> processedDocsMap = new HashMap<String, SolrDoc>();
88
        for (SolrDoc processedDoc : processedDocs) {
89
            processedDocsMap.put(processedDoc.getIdentifier(), processedDoc);
90
        }
91
        return processedDocsMap;
92
    }
93
94
    private List<SolrDoc> processResourceMap(SolrDoc indexDocument, Document resourceMapDocument)
95 8134 tao
                    throws XPathExpressionException, IOException, SAXException, ParserConfigurationException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType, OREParserException, ResourceMapException{
96 8023 tao
        //ResourceMap resourceMap = new ResourceMap(resourceMapDocument);
97 8296 tao
        ResourceMap resourceMap = ResourceMapFactory.buildResourceMap(resourceMapDocument);
98 7851 tao
        List<String> documentIds = resourceMap.getAllDocumentIDs();//this list includes the resourceMap id itself.
99 7696 tao
        //List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds);
100 7851 tao
        List<SolrDoc> updateDocuments = getSolrDocs(resourceMap.getIdentifier(), documentIds);
101 7696 tao
        List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments);
102 7710 tao
        /*if(mergedDocuments != null) {
103
            for(SolrDoc doc : mergedDocuments) {
104
                ByteArrayOutputStream out = new ByteArrayOutputStream();
105
                doc.serialize(out, "UTF-8");
106
                String result = new String(out.toByteArray(), "UTF-8");
107
                System.out.println("after updated document===========================");
108
                System.out.println(result);
109
            }
110
        }*/
111 7696 tao
        mergedDocuments.add(indexDocument);
112
        return mergedDocuments;
113
    }
114
115 8134 tao
    private List<SolrDoc> getSolrDocs(String resourceMapId, List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, ResourceMapException {
116 7851 tao
        List<SolrDoc> list = new ArrayList<SolrDoc>();
117
        if(ids != null) {
118
            for(String id : ids) {
119 8464 leinfelder
            	SolrDoc doc = getSolrDoc(id);
120 7851 tao
                if(doc != null) {
121
                    list.add(doc);
122
                } else if ( !id.equals(resourceMapId)) {
123 8134 tao
                    throw new ResourceMapException("Solr index doesn't have the information about the id "+id+" which is a component in the resource map "+resourceMapId+". Metacat-Index can't process the resource map prior to its components.");
124 7851 tao
                }
125
            }
126
        }
127
        return list;
128
    }
129
130 7696 tao
    /*
131
     * Get the SolrDoc list for the list of the ids.
132
     */
133 7733 tao
    public static List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
134 7696 tao
        List<SolrDoc> list = new ArrayList<SolrDoc>();
135
        if(ids != null) {
136
            for(String id : ids) {
137 8464 leinfelder
            	SolrDoc doc = getSolrDoc(id);
138 7696 tao
                if(doc != null) {
139
                    list.add(doc);
140
                }
141
            }
142
        }
143
        return list;
144
    }
145
146 8464 leinfelder
	/*
147
	 * Get the SolrDoc for the specified id
148
	 */
149
	public static SolrDoc getSolrDoc(String id) throws SolrServerException,
150
			IOException, ParserConfigurationException, SAXException,
151
			XPathExpressionException, NotImplemented, NotFound, UnsupportedType {
152 8893 tao
	    int targetIndex = 0;
153 8552 leinfelder
		SolrDoc doc = null;
154 8893 tao
		String query = QUERY + "\"" + id + "\"";
155
	    List<SolrDoc> list = getDocumentsByQuery(query);
156
	    if(list != null && !list.isEmpty()) {
157
	        doc = list.get(targetIndex);
158
	    }
159 8464 leinfelder
		return doc;
160
	}
161 8893 tao
162
	/**
163
	 * Get a list of solr documents which's resourcemap field matches the given value.
164
	 * @param resourceMapId - the target resource map id
165
	 * @return the list of solr document
166
	 * @throws MalformedURLException
167
	 * @throws UnsupportedType
168
	 * @throws NotFound
169
	 * @throws SolrServerException
170
	 * @throws ParserConfigurationException
171
	 * @throws IOException
172
	 * @throws SAXException
173
	 */
174
	public static List<SolrDoc> getDocumentsByResourceMap(String resourceMapId) throws MalformedURLException,
175
	            UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, IOException, SAXException {
176
	    String query = QUERY2 + "\"" + resourceMapId + "\"";
177
	    return getDocumentsByQuery(query);
178
	}
179
180
	/**
181
	 * Get a list of slor docs which match the query.
182
	 * @param query - a string of a query
183
	 * @return
184
	 * @throws SolrServerException
185
	 * @throws MalformedURLException
186
	 * @throws UnsupportedType
187
	 * @throws NotFound
188
	 * @throws ParserConfigurationException
189
	 * @throws IOException
190
	 * @throws SAXException
191
	 */
192
	public static List<SolrDoc> getDocumentsByQuery(String query) throws SolrServerException, MalformedURLException, UnsupportedType,
193
	                                                                NotFound, ParserConfigurationException, IOException, SAXException {
194
	    List<SolrDoc> docs = new ArrayList<SolrDoc>();
195
	    if (solrServer != null && query != null && !query.trim().equals("")) {
196
            SolrParams solrParams = SolrRequestParsers.parseQueryString(query);
197
            QueryResponse qr = solrServer.query(solrParams);
198
            if (qr != null && qr.getResults() != null) {
199
                for(int i=0; i<qr.getResults().size(); i++) {
200
                    SolrDocument orig = qr.getResults().get(i);
201
                    SolrDoc doc = new SolrDoc();
202
                    IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
203
                    for (String fieldName : orig.getFieldNames()) {
204
                        // don't transfer the copyTo fields, otherwise there are errors
205
                        if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
206
                            continue;
207
                        }
208
                        for (Object value : orig.getFieldValues(fieldName)) {
209
                            String stringValue = value.toString();
210
                            // special handling for dates in ISO 8601
211
                            if (value instanceof Date) {
212
                                stringValue = DateTimeMarshaller.serializeDateToUTC((Date) value);
213
                                SolrDateConverter converter = new SolrDateConverter();
214
                                stringValue = converter.convert(stringValue);
215
                            }
216
                            SolrElementField field = new SolrElementField(fieldName, stringValue);
217
                            log.debug("Adding field: " + fieldName);
218
                            doc.addField(field);
219
                        }
220
                    }
221
                    docs.add(doc);
222
                }
223
224
            }
225
	    }
226
	    return docs;
227
	}
228 8464 leinfelder
229
230 7696 tao
}