Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.IOException;
30
import java.io.InputStream;
31
import java.util.ArrayList;
32
import java.util.HashMap;
33
import java.util.Iterator;
34
import java.util.List;
35
import java.util.Map;
36
import java.util.Set;
37

    
38
import javax.xml.parsers.DocumentBuilder;
39
import javax.xml.parsers.DocumentBuilderFactory;
40
import javax.xml.parsers.ParserConfigurationException;
41
import javax.xml.xpath.XPath;
42
import javax.xml.xpath.XPathExpressionException;
43
import javax.xml.xpath.XPathFactory;
44

    
45
import org.apache.commons.codec.EncoderException;
46
import org.apache.commons.io.output.ByteArrayOutputStream;
47
import org.apache.commons.logging.Log;
48
import org.apache.commons.logging.LogFactory;
49
import org.apache.solr.client.solrj.SolrServer;
50
import org.apache.solr.client.solrj.SolrServerException;
51
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
52
//import org.apache.solr.client.solrj.impl.HttpSolrServer;
53
import org.apache.solr.client.solrj.response.UpdateResponse;
54
import org.apache.solr.common.SolrInputDocument;
55
import org.apache.solr.core.CoreContainer;
56
import org.dataone.cn.indexer.XMLNamespaceConfig;
57
import org.dataone.cn.indexer.XPathDocumentParser;
58
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
59
import org.dataone.cn.indexer.parser.SolrField;
60
import org.dataone.cn.indexer.solrhttp.SolrDoc;
61
import org.dataone.cn.indexer.solrhttp.SolrElementAdd;
62
import org.dataone.cn.indexer.solrhttp.SolrElementField;
63
import org.w3c.dom.Document;
64
import org.xml.sax.SAXException;
65

    
66
/**
67
 * A class does insert, update and remove indexes to a SOLR server
68
 * @author tao
69
 *
70
 */
71
public class SolrIndex {
72
    
73
    private static final String SOLRHOMEPATH = "/Users/tao/Downloads/apache-solr-3.4.0/example/solr";
74
    
75
    private static final String SOLRHOME = "solr.solr.home";
76
    private static final String SOLRSERVERNAME = "metacat-core";
77
    
78
    
79
    private List<IDocumentSubprocessor> subprocessors = null;
80
    private SolrServer solrServer = null;
81
    private XMLNamespaceConfig xmlNamespaceConfig = null;
82
    private List<SolrField> sysmetaSolrFields = null;
83

    
84
    private static DocumentBuilderFactory documentBuilderFactory = null;
85
    private static DocumentBuilder builder = null;
86

    
87
    private static XPathFactory xpathFactory = null;
88
    private static XPath xpath = null;
89
    Log log = LogFactory.getLog(SolrIndex.class);
90
    
91
    static {
92
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
93
        documentBuilderFactory.setNamespaceAware(true);
94
        try {
95
            builder = documentBuilderFactory.newDocumentBuilder();
96
        } catch (ParserConfigurationException e) {
97
            e.printStackTrace();
98
        }
99
        xpathFactory = XPathFactory.newInstance();
100
        xpath = xpathFactory.newXPath();
101
    }
102
    
103
    /**
104
     * Constructor
105
     * @throws SAXException 
106
     * @throws IOException 
107
     */
108
    public SolrIndex(List<SolrField> sysmetaSolrFields, XMLNamespaceConfig xmlNamespaceConfig)
109
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
110
         this.xmlNamespaceConfig = xmlNamespaceConfig;
111
         this.sysmetaSolrFields = sysmetaSolrFields;
112
         initSolrServer();
113
         init();
114
    }
115
    
116
    private void initSolrServer() throws IOException, ParserConfigurationException, SAXException {
117
        System.setProperty(SOLRHOME, SOLRHOMEPATH);
118
        CoreContainer.Initializer init = new CoreContainer.Initializer();
119
        CoreContainer c = init.initialize();
120
        solrServer = new EmbeddedSolrServer(c, "collection1");
121
    }
122
    
123
    private void init() throws ParserConfigurationException, XPathExpressionException {
124
        xpath.setNamespaceContext(xmlNamespaceConfig);
125
        initExpressions();
126
    }
127

    
128
    private void initExpressions() throws XPathExpressionException {
129
        for (SolrField field : sysmetaSolrFields) {
130
            field.initExpression(xpath);
131
        }
132

    
133
    }
134
    
135
    
136
    /**
137
     * Get the list of the Subprocessors in this index.
138
     * @return the list of the Subprocessors.
139
     */
140
    public List<IDocumentSubprocessor> getSubprocessors() {
141
        return subprocessors;
142
    }
143

    
144
    /**
145
     * Set the list of Subprocessors.
146
     * @param subprocessorList  the list will be set.
147
     */
148
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
149
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
150
            subprocessor.initExpression(xpath);
151
        }
152
        this.subprocessors = subprocessorList;
153
    }
154
    
155
    /**
156
     * Generate the index for the given information
157
     * @param id
158
     * @param systemMetaDataStream
159
     * @param dataStream
160
     * @return
161
     * @throws IOException
162
     * @throws SAXException
163
     * @throws ParserConfigurationException
164
     * @throws XPathExpressionException
165
     * @throws EncoderException
166
     */
167
    private Map<String, SolrDoc> process(String id, InputStream systemMetaDataStream, InputStream dataStream)
168
                    throws IOException, SAXException, ParserConfigurationException,
169
                    XPathExpressionException{
170

    
171
        // Load the System Metadata document
172
        Document sysMetaDoc = generateXmlDocument(systemMetaDataStream);
173
        if (sysMetaDoc == null) {
174
            log.error("Could not load System metadata for ID: " + id);
175
            return null;
176
        }
177

    
178
        // Extract the field values from the System Metadata
179
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
180
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
181
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
182
        docs.put(id, indexDocument);
183

    
184
        // Determine if subprocessors are available for this ID
185
        if (subprocessors != null) {
186
                    // for each subprocessor loaded from the spring config
187
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
188
                        // Does this subprocessor apply?
189
                        if (subprocessor.canProcess(sysMetaDoc)) {
190
                            // if so, then extract the additional information from the
191
                            // document.
192
                            try {
193
                                // docObject = the resource map document or science
194
                                // metadata document.
195
                                // note that resource map processing touches all objects
196
                                // referenced by the resource map.
197
                                Document docObject = generateXmlDocument(dataStream);
198
                                if (docObject == null) {
199
                                    log.error("Could not load OBJECT for ID " + id );
200
                                } else {
201
                                    docs = subprocessor.processDocument(id, docs, docObject);
202
                                }
203
                            } catch (Exception e) {
204
                                log.error(e.getStackTrace().toString());
205
                            }
206
                        }
207
                    }
208
       }
209

    
210
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
211
       // merge is only for resource map. We need more work here.
212
       for (SolrDoc mergeDoc : docs.values()) {
213
           if (!mergeDoc.isMerged()) {
214
                 //mergeWithIndexedDocument(mergeDoc);
215
           }
216
       }
217

    
218
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
219
               
220
       return docs;
221
    }
222
    
223
    /*
224
     * Generate a Document from the InputStream
225
     */
226
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
227
        Document doc = null;
228

    
229
        try {
230
            doc = builder.parse(smdStream);
231
        } catch (IOException e) {
232
            log.error(e.getMessage(), e);
233
        }
234

    
235
        return doc;
236
    }
237
    
238
    /*
239
     * Index the fields of the system metadata
240
     */
241
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
242

    
243
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
244
        // solrFields is the list of fields defined in the application context
245
       
246
        for (SolrField field : sysmetaSolrFields) {
247
            try {
248
                // the field.getFields method can return a single value or
249
                // multiple values for multi-valued fields
250
                // or can return multiple SOLR document fields.
251
                fieldList.addAll(field.getFields(doc, identifier));
252
            } catch (Exception e) {
253
                e.printStackTrace();
254
            }
255
        }
256
        return fieldList;
257

    
258
    }
259
    
260
    /**
261
     * Generate indexes for a newly inserted document.
262
     * @param pid  the id of this document
263
     * @param systemMetadata  the system metadata associated with the data object
264
     * @param data  the data object itself
265
     * @throws SolrServerException 
266
     */
267
    public void insert(String pid, InputStream systemMetadata, InputStream data) 
268
                    throws IOException, SAXException, ParserConfigurationException,
269
                    XPathExpressionException, SolrServerException {
270
        Map<String, SolrDoc> docs = process(pid, systemMetadata, data);
271
        
272
        //transform the Map to the SolrInputDocument which can be used by the solr server
273
        if(docs != null) {
274
            Set<String> ids = docs.keySet();
275
            for(String id : ids) {
276
                SolrInputDocument solrDoc = new SolrInputDocument();
277
                if(id != null) {
278
                    SolrDoc doc = docs.get(id);
279
                    if(doc != null) {
280
                        List<SolrElementField> list = doc.getFieldList();
281
                        if(list != null) {
282
                            //solrDoc.addField(METACATPIDFIELD, pid);
283
                            Iterator<SolrElementField> iterator = list.iterator();
284
                            while (iterator.hasNext()) {
285
                                SolrElementField field = iterator.next();
286
                                if(field != null) {
287
                                    String value = field.getValue();
288
                                    String name = field.getName();
289
                                    System.out.println("add name/value pair - "+name+"/"+value);
290
                                    solrDoc.addField(name, value);
291
                                }
292
                            }
293
                        }
294
                    }
295
                }
296
                if(!solrDoc.isEmpty()) {
297
                    UpdateResponse response = solrServer.add(solrDoc);
298
                    solrServer.commit();
299
                    System.out.println("=================the response is:\n"+response.toString());
300
                }
301
            }
302
        }
303
    }
304
 
305
    /**
306
     * Remove the indexed associated with specified pid.
307
     * @param pid  the pid which the indexes are associated with
308
     * @throws IOException
309
     * @throws SolrServerException
310
     */
311
    public void remove(String pid) throws IOException, SolrServerException {
312
        solrServer.deleteById(pid);
313
        solrServer.commit();
314
       
315
    }
316
}
(2-2/3)