Project

General

Profile

1
/**
2
 *  Copyright: 2013 Regents of the University of California and the
3
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.common.query;
20

    
21
import java.io.IOException;
22
import java.io.InputStream;
23
import java.net.MalformedURLException;
24
import java.net.URL;
25
import java.util.ArrayList;
26
import java.util.HashMap;
27
import java.util.List;
28
import java.util.Map;
29
import java.util.Set;
30
import java.util.Vector;
31

    
32
import javax.xml.parsers.DocumentBuilder;
33
import javax.xml.parsers.DocumentBuilderFactory;
34
import javax.xml.parsers.ParserConfigurationException;
35
import javax.xml.xpath.XPathConstants;
36
import javax.xml.xpath.XPathExpressionException;
37
import javax.xml.xpath.XPathFactory;
38

    
39
import org.apache.commons.codec.net.URLCodec;
40
import org.apache.commons.logging.Log;
41
import org.apache.commons.logging.LogFactory;
42
import org.apache.solr.client.solrj.SolrServerException;
43
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
44
import org.apache.solr.client.solrj.response.QueryResponse;
45
import org.apache.solr.client.solrj.util.ClientUtils;
46
import org.apache.solr.common.params.SolrParams;
47
import org.apache.solr.core.SolrConfig;
48
import org.apache.solr.schema.FieldType;
49
import org.apache.solr.schema.IndexSchema;
50
import org.apache.solr.schema.SchemaField;
51
import org.apache.solr.schema.TextField;
52
import org.dataone.configuration.Settings;
53
import org.dataone.service.exceptions.NotFound;
54
import org.dataone.service.exceptions.NotImplemented;
55
import org.dataone.service.types.v1.Identifier;
56
import org.dataone.service.types.v1.Subject;
57
import org.w3c.dom.Attr;
58
import org.w3c.dom.Document;
59
import org.w3c.dom.Element;
60
import org.w3c.dom.Node;
61
import org.w3c.dom.NodeList;
62
import org.xml.sax.Attributes;
63
import org.xml.sax.InputSource;
64
import org.xml.sax.SAXException;
65

    
66

    
67

    
68
/**
69
 * The query service for the http solr server.
70
 * @author tao
71
 *
72
 */
73
public class HttpSolrQueryService extends SolrQueryService {
74
    private static final String SELECTIONPHASE = "/select";
75
    private static final String SOLR_SYSTEMINFO_URLAPPENDIX = "solr.systeminfo.urlappendix";
76
    private static final String SOLR_SCHEMA_URLAPPENDIX = "sorl.schema.urlappendix";
77
    private static final String SOLR_CONFIG_URLAPPENDIX = "solr.config.urlappendix";
78
    private static final String SPEC_PATH = "//str[@name='solr-spec-version']";
79
    private static final String FIELDS_PATH = "//fields//field";
80
    private static final String COPY_FIELDS_PATH = "//copyField";
81
    private static final String DEST = "dest";
82
    private static final String TRUE = "true";
83
    
84
    private String solrServerBaseURL = null;
85
    private CommonsHttpSolrServer httpSolrServer = null;
86
    private static Log log = LogFactory.getLog(HttpSolrQueryService.class);
87
    /**
88
     * Constructor
89
     * @param httpSolrServer
90
     */
91
    public HttpSolrQueryService(CommonsHttpSolrServer httpSolrServer) {
92
        if(httpSolrServer == null) {
93
            throw new NullPointerException("HttpSolrQueryService.constructor - The httpSolrServer parameter can't be null");
94
        }
95
        this.httpSolrServer = httpSolrServer;
96
        this.solrServerBaseURL = httpSolrServer.getBaseURL();
97
    }
98
    
99
    /**
100
     * Query the Solr server with specified query string and the user's identity. This is the for the http solr server.
101
     * It is hard to transform the SolrQueryReponse object to the InputStream object for the HttpSolrServer
102
     * since the transform needs the SolrCore. We have to open the solr url directly to get the InputStream.
103
     * @param query the query string
104
     * @param subjects the user's identity which sent the query
105
     * @return the response
106
     * @throws NotFound 
107
     * @throws IOException 
108
     * @throws Exception
109
     */
110
    /*public InputStream query(String query, Set<Subject>subjects) throws NotFound, IOException {
111
        StringBuffer accessFilter = generateAccessFilterParamsString(subjects);
112
        if(accessFilter != null && accessFilter.length() != 0) {
113
            query = solrServerBaseURL+"/select?"+query+"&"+FILTERQUERY+"="+accessFilter.toString();
114
            query = ClientUtils.escapeQueryChars(query);
115
        } else {
116
            throw new NotFound("0000", "HttpSolrQueryService.query - There is no identity (even user public) for the user who issued the query");
117
        }
118
        log.info("==========HttpSolrQueryService.query - the final url for querying the solr http server is "+query);
119
        URL url = new URL(query);
120
        
121
        return url.openStream();
122
    }*/
123
    
124
    /**
125
     * Query the Solr server with specified query and user's identity. 
126
     * It is hard to transform the SolrQueryReponse object to the InputStream object for the HttpSolrServer
127
     * since the transform needs the SolrCore. We have to open the solr url directly to get the InputStream.
128
     * @param query the query params. 
129
     * @param subjects the user's identity which sent the query. If the Subjects is null, there wouldn't be any access control.
130
     * @return the response
131
     * @throws IOException 
132
     * @throws NotFound 
133
     * @throws Exception
134
     */
135
    public  InputStream query(SolrParams query, Set<Subject>subjects) throws IOException, NotFound {
136
        boolean xmlFormat = false;
137
        String queryString = ClientUtils.toQueryString(query, xmlFormat);
138
        log.info("==========HttpSolrQueryService.query - the query string after transforming from the SolrParams to the string "+queryString);
139
        StringBuffer accessFilter = generateAccessFilterParamsString(subjects);
140
        if(accessFilter != null && accessFilter.length() != 0) {
141
            String accessStr = accessFilter.toString();
142
            log.debug("==========HttpSolrQueryService.query - the access string is "+accessStr);
143
            URLCodec urlCodec = new URLCodec();
144
            accessStr = urlCodec.encode(accessStr, "UTF-8");
145
            log.debug("==========HttpSolrQueryService.query - the access string after escape special characters string "+accessStr);
146
            queryString = queryString+"&"+FILTERQUERY+"="+accessStr;
147
           
148
        }
149
        
150
        
151
        //queryString = ClientUtils.escapeQueryChars(queryString);
152
        queryString = solrServerBaseURL+SELECTIONPHASE+queryString;
153
        log.info("==========HttpSolrQueryService.query - the final url for querying the solr http server is "+queryString);
154
        URL url = new URL(queryString);    
155
        return url.openStream();
156
        //throw new NotImplemented("0000", "HttpSolrQueryService - the method of  query(SolrParams query, Set<Subject>subjects) is not for the HttpSolrQueryService. We donot need to implemente it");
157
    }
158
    
159
    
160
    
161
    
162
    /**
163
     * Get the fields list of the index schema
164
     * @return
165
     * @throws SAXException 
166
     * @throws IOException 
167
     * @throws ParserConfigurationException 
168
     * @throws MalformedURLException 
169
     * @throws Exception
170
     */
171
    public  Map<String, SchemaField> getIndexSchemaFields() throws MalformedURLException, ParserConfigurationException, IOException, SAXException  {
172
        if(fieldMap == null || fieldMap.isEmpty()) {
173
            getIndexSchemaFieldFromServer();
174
        }
175
        //System.out.println("get filed map ==========================");
176
        return fieldMap;
177
    }
178
    
179
   
180
    
181
    /**
182
     * Get the list of the valid field name (moved the fields names of the CopyFieldTarget).
183
     * @return
184
     * @throws SAXException 
185
     * @throws IOException 
186
     * @throws ParserConfigurationException 
187
     * @throws MalformedURLException 
188
     */
189
    public List<String> getValidSchemaField() throws MalformedURLException, ParserConfigurationException, IOException, SAXException {
190
        if(fieldMap == null || fieldMap.isEmpty()) {
191
            getIndexSchemaFields();
192
        }
193
        return super.getValidSchemaFields();
194
    }
195
    
196
   
197
    /*
198
     * Get the fieldMap from the http server. 
199
     * @throws MalformedURLException
200
     * @throws ParserConfigurationException
201
     * @throws IOException
202
     * @throws SAXException
203
     */
204
    private void getIndexSchemaFieldFromServer() throws MalformedURLException, ParserConfigurationException, IOException, SAXException {
205
        //System.out.println("get filed map from server (downloading files) ==========================");
206
        SolrConfig config = new SolrConfig("dataone", new InputSource(getSolrConfig())); 
207
        schema = new IndexSchema(config, "dataone", new InputSource(lookupSchema()));
208
        fieldMap = schema.getFields();
209
    }
210
    
211
    /*
212
     * Parse the schema.xml and get the validSolrFieldName list
213
     */
214
    /*private void parseSchema() throws MalformedURLException, ParserConfigurationException, SAXException, IOException, XPathExpressionException {
215
        validSolrFieldNames = new ArrayList<String>();
216
        Map<String, SchemaField> fieldMap = new HashMap<String, SchemaField>();
217
        Document schema = transformInputStreamToDoc(getSchema());
218
        Vector<String>copyFieldTargetNames = new Vector<String>();
219
        NodeList copyFields = (NodeList) XPathFactory.newInstance().newXPath()
220
                        .evaluate(COPY_FIELDS_PATH, schema, XPathConstants.NODESET);
221
        if(copyFields != null) {
222
            for(int i=0; i<copyFields.getLength(); i++) {
223
                Element copyField = (Element)copyFields.item(i);
224
                String target = copyField.getAttribute(DEST);
225
                if(target != null && !target.trim().equals("")) {
226
                    copyFieldTargetNames.add(target);
227
                }
228
            }
229
        }
230
        NodeList fields = (NodeList) XPathFactory.newInstance().newXPath()
231
                        .evaluate(FIELDS_PATH, schema, XPathConstants.NODESET);
232
        if(fields!= null) {
233
            for(int i=0; i<fields.getLength(); i++) {
234
                Element fieldElement = (Element) fields.item(i);
235
                String name = fieldElement.getAttribute("name");
236
                if(name != null && !name.trim().equals("")) {
237
                    if(!copyFieldTargetNames.contains(name)) {
238
                        validSolrFieldNames.add(name);
239
                    }
240
                }
241
            }
242
        }
243
    }*/
244
    
245
    
246
    /*
247
     * Get the SolrConfig InputStream.
248
     * @return
249
     * @throws MalformedURLException
250
     * @throws IOException
251
     */
252
    private InputStream getSolrConfig() throws MalformedURLException, IOException {
253
        String solrConfigAppendix = Settings.getConfiguration().getString(SOLR_CONFIG_URLAPPENDIX);
254
        String configURL = solrServerBaseURL+solrConfigAppendix;
255
        return (new URL(configURL)).openStream();
256
    }
257
    /*
258
     * Get the schema InputStream from the url which is specified in the metacat.properties and transform it to a Document.
259
     */
260
    private InputStream lookupSchema() throws MalformedURLException, IOException {
261
        String schemaURLAppendix = Settings.getConfiguration().getString(SOLR_SCHEMA_URLAPPENDIX);
262
        String schemaURL = solrServerBaseURL+schemaURLAppendix;
263
        return (new URL(schemaURL)).openStream();
264
    }
265
    
266
    /**
267
     * Get the version of the solr server.
268
     * @return
269
     */
270
    public String getSolrServerVersion() {
271
        if(solrSpecVersion == null) {
272
            getHttpSolrServerVersion();
273
        } 
274
        //System.out.println("get spec version  ==========================");
275
        return solrSpecVersion;
276
    }
277
    
278
    
279
    /*
280
     * Get the solr server version from the system information url. 
281
     */
282
    private void getHttpSolrServerVersion() {
283
        //System.out.println("get spec version from server (downloading files) ==========================");
284
        String systemInfoUrlAppendix = Settings.getConfiguration().getString(SOLR_SYSTEMINFO_URLAPPENDIX);
285
        String systemInfoUrl = solrServerBaseURL+systemInfoUrlAppendix;
286
        try {
287
            Document doc = transformInputStreamToDoc((new URL(systemInfoUrl)).openStream());
288
            NodeList nodeList = (NodeList) XPathFactory.newInstance().newXPath()
289
                            .evaluate(SPEC_PATH, doc, XPathConstants.NODESET);
290
            if(nodeList != null && nodeList.getLength() >0) {
291
                //System.out.println("nodelist is not null branch");
292
                Node specNode = nodeList.item(0);
293
                solrSpecVersion = specNode.getFirstChild().getNodeValue();
294
            } else {
295
                //System.out.println("nodelist is null branch");
296
                solrSpecVersion = UNKNOWN;
297
            }
298
            
299
        } catch (Exception e) {
300
            log.error("HttpSolrQueryService.getHttpSolrServerVersion - can't get the solr specification version since "+e.getMessage());
301
            solrSpecVersion = UNKNOWN;
302
        }
303
        
304
        
305
    }
306
    
307
    /**
308
     * Generate a Document from the InputStream
309
     * @param input
310
     * @return
311
     * @throws ParserConfigurationException
312
     * @throws SAXException
313
     * @throws IOException
314
     */
315
    private Document transformInputStreamToDoc(InputStream input) throws ParserConfigurationException, SAXException, IOException {
316
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
317
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
318
        Document doc = dBuilder.parse(input);
319
        return doc;
320
    }
321
    
322
    /**
323
     * If there is a solr doc for the given id.
324
     * @param id - the specified id.
325
     * @return true if there is a solr doc for this id.
326
     */
327
    public boolean hasSolrDoc(Identifier id) throws ParserConfigurationException, SolrServerException, IOException, SAXException {
328
    	boolean hasIt = false;
329
    	if(id != null && id.getValue() != null && !id.getValue().trim().equals("") ) {
330
    		SolrParams query = EmbeddedSolrQueryService.buildIdQuery(id.getValue());
331
            QueryResponse response = httpSolrServer.query(query);
332
            hasIt = EmbeddedSolrQueryService.hasResult(response);
333
    	}
334
    	return hasIt;
335
    }
336
    
337
 
338
}
(3-3/7)