Project

General

Profile

1 7542 tao
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28
29 7555 tao
import java.io.ByteArrayInputStream;
30 7546 tao
import java.io.IOException;
31
import java.io.InputStream;
32
import java.util.ArrayList;
33
import java.util.HashMap;
34
import java.util.Iterator;
35 7542 tao
import java.util.List;
36 7546 tao
import java.util.Map;
37
import java.util.Set;
38 7542 tao
39
import javax.xml.parsers.DocumentBuilder;
40
import javax.xml.parsers.DocumentBuilderFactory;
41 7546 tao
import javax.xml.parsers.ParserConfigurationException;
42 7542 tao
import javax.xml.xpath.XPath;
43 7546 tao
import javax.xml.xpath.XPathExpressionException;
44 7542 tao
import javax.xml.xpath.XPathFactory;
45
46 7546 tao
import org.apache.commons.codec.EncoderException;
47
import org.apache.commons.io.output.ByteArrayOutputStream;
48
import org.apache.commons.logging.Log;
49
import org.apache.commons.logging.LogFactory;
50 7542 tao
import org.apache.solr.client.solrj.SolrServer;
51 7546 tao
import org.apache.solr.client.solrj.SolrServerException;
52 7548 tao
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
53 7549 tao
//import org.apache.solr.client.solrj.impl.HttpSolrServer;
54 7547 tao
import org.apache.solr.client.solrj.response.UpdateResponse;
55 7546 tao
import org.apache.solr.common.SolrInputDocument;
56 7548 tao
import org.apache.solr.core.CoreContainer;
57 7542 tao
import org.dataone.cn.indexer.XMLNamespaceConfig;
58 7546 tao
import org.dataone.cn.indexer.XPathDocumentParser;
59 7542 tao
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
60 7546 tao
import org.dataone.cn.indexer.parser.SolrField;
61
import org.dataone.cn.indexer.solrhttp.SolrDoc;
62
import org.dataone.cn.indexer.solrhttp.SolrElementAdd;
63
import org.dataone.cn.indexer.solrhttp.SolrElementField;
64 7555 tao
import org.dataone.service.types.v1.SystemMetadata;
65
import org.dataone.service.util.TypeMarshaller;
66
import org.jibx.runtime.JiBXException;
67 7546 tao
import org.w3c.dom.Document;
68
import org.xml.sax.SAXException;
69 7542 tao
70
/**
71
 * A class does insert, update and remove indexes to a SOLR server
72
 * @author tao
73
 *
74
 */
75
public class SolrIndex {
76 7546 tao
77 7549 tao
    private static final String SOLRHOMEPATH = "/Users/tao/Downloads/apache-solr-3.4.0/example/solr";
78 7548 tao
79
    private static final String SOLRHOME = "solr.solr.home";
80
    private static final String SOLRSERVERNAME = "metacat-core";
81
82
83 7542 tao
    private List<IDocumentSubprocessor> subprocessors = null;
84
    private SolrServer solrServer = null;
85
    private XMLNamespaceConfig xmlNamespaceConfig = null;
86 7546 tao
    private List<SolrField> sysmetaSolrFields = null;
87 7542 tao
88
    private static DocumentBuilderFactory documentBuilderFactory = null;
89
    private static DocumentBuilder builder = null;
90
91
    private static XPathFactory xpathFactory = null;
92
    private static XPath xpath = null;
93 7546 tao
    Log log = LogFactory.getLog(SolrIndex.class);
94 7542 tao
95 7546 tao
    static {
96
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
97
        documentBuilderFactory.setNamespaceAware(true);
98
        try {
99
            builder = documentBuilderFactory.newDocumentBuilder();
100
        } catch (ParserConfigurationException e) {
101
            e.printStackTrace();
102
        }
103
        xpathFactory = XPathFactory.newInstance();
104
        xpath = xpathFactory.newXPath();
105
    }
106
107 7542 tao
    /**
108
     * Constructor
109 7548 tao
     * @throws SAXException
110
     * @throws IOException
111 7542 tao
     */
112 7547 tao
    public SolrIndex(List<SolrField> sysmetaSolrFields, XMLNamespaceConfig xmlNamespaceConfig)
113 7548 tao
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
114
         this.xmlNamespaceConfig = xmlNamespaceConfig;
115
         this.sysmetaSolrFields = sysmetaSolrFields;
116
         initSolrServer();
117
         init();
118 7542 tao
    }
119
120 7548 tao
    private void initSolrServer() throws IOException, ParserConfigurationException, SAXException {
121
        System.setProperty(SOLRHOME, SOLRHOMEPATH);
122
        CoreContainer.Initializer init = new CoreContainer.Initializer();
123
        CoreContainer c = init.initialize();
124
        solrServer = new EmbeddedSolrServer(c, "collection1");
125
    }
126
127 7546 tao
    private void init() throws ParserConfigurationException, XPathExpressionException {
128
        xpath.setNamespaceContext(xmlNamespaceConfig);
129
        initExpressions();
130
    }
131
132
    private void initExpressions() throws XPathExpressionException {
133
        for (SolrField field : sysmetaSolrFields) {
134
            field.initExpression(xpath);
135
        }
136
137
    }
138 7542 tao
139 7546 tao
140 7542 tao
    /**
141
     * Get the list of the Subprocessors in this index.
142
     * @return the list of the Subprocessors.
143
     */
144
    public List<IDocumentSubprocessor> getSubprocessors() {
145
        return subprocessors;
146
    }
147
148
    /**
149
     * Set the list of Subprocessors.
150
     * @param subprocessorList  the list will be set.
151
     */
152
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
153 7546 tao
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
154 7542 tao
            subprocessor.initExpression(xpath);
155 7546 tao
        }
156 7542 tao
        this.subprocessors = subprocessorList;
157
    }
158 7546 tao
159
    /**
160
     * Generate the index for the given information
161
     * @param id
162 7555 tao
     * @param systemMetadata
163 7546 tao
     * @param dataStream
164
     * @return
165
     * @throws IOException
166
     * @throws SAXException
167
     * @throws ParserConfigurationException
168
     * @throws XPathExpressionException
169 7555 tao
     * @throws JiBXException
170 7546 tao
     * @throws EncoderException
171
     */
172 7555 tao
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
173 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
174 7555 tao
                    XPathExpressionException, JiBXException{
175 7546 tao
176
        // Load the System Metadata document
177 7555 tao
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
178
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
179
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
180
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
181 7546 tao
        if (sysMetaDoc == null) {
182
            log.error("Could not load System metadata for ID: " + id);
183
            return null;
184
        }
185
186
        // Extract the field values from the System Metadata
187
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
188
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
189
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
190
        docs.put(id, indexDocument);
191
192
        // Determine if subprocessors are available for this ID
193
        if (subprocessors != null) {
194
                    // for each subprocessor loaded from the spring config
195
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
196
                        // Does this subprocessor apply?
197
                        if (subprocessor.canProcess(sysMetaDoc)) {
198
                            // if so, then extract the additional information from the
199
                            // document.
200
                            try {
201
                                // docObject = the resource map document or science
202
                                // metadata document.
203
                                // note that resource map processing touches all objects
204
                                // referenced by the resource map.
205
                                Document docObject = generateXmlDocument(dataStream);
206
                                if (docObject == null) {
207
                                    log.error("Could not load OBJECT for ID " + id );
208
                                } else {
209
                                    docs = subprocessor.processDocument(id, docs, docObject);
210
                                }
211
                            } catch (Exception e) {
212
                                log.error(e.getStackTrace().toString());
213
                            }
214
                        }
215
                    }
216
       }
217
218
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
219
       // merge is only for resource map. We need more work here.
220
       for (SolrDoc mergeDoc : docs.values()) {
221
           if (!mergeDoc.isMerged()) {
222
                 //mergeWithIndexedDocument(mergeDoc);
223
           }
224
       }
225
226
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
227
228
       return docs;
229
    }
230
231
    /*
232
     * Generate a Document from the InputStream
233
     */
234
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
235
        Document doc = null;
236
237
        try {
238
            doc = builder.parse(smdStream);
239
        } catch (IOException e) {
240
            log.error(e.getMessage(), e);
241
        }
242
243
        return doc;
244
    }
245
246
    /*
247
     * Index the fields of the system metadata
248
     */
249
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
250
251
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
252
        // solrFields is the list of fields defined in the application context
253
254
        for (SolrField field : sysmetaSolrFields) {
255
            try {
256
                // the field.getFields method can return a single value or
257
                // multiple values for multi-valued fields
258
                // or can return multiple SOLR document fields.
259
                fieldList.addAll(field.getFields(doc, identifier));
260
            } catch (Exception e) {
261
                e.printStackTrace();
262
            }
263
        }
264
        return fieldList;
265
266
    }
267
268
    /**
269
     * Generate indexes for a newly inserted document.
270
     * @param pid  the id of this document
271
     * @param systemMetadata  the system metadata associated with the data object
272
     * @param data  the data object itself
273
     * @throws SolrServerException
274 7555 tao
     * @throws JiBXException
275 7546 tao
     */
276 7555 tao
    public void insert(String pid, SystemMetadata systemMetadata, InputStream data)
277 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
278 7555 tao
                    XPathExpressionException, SolrServerException, JiBXException {
279 7546 tao
        Map<String, SolrDoc> docs = process(pid, systemMetadata, data);
280
281
        //transform the Map to the SolrInputDocument which can be used by the solr server
282
        if(docs != null) {
283
            Set<String> ids = docs.keySet();
284
            for(String id : ids) {
285
                SolrInputDocument solrDoc = new SolrInputDocument();
286
                if(id != null) {
287
                    SolrDoc doc = docs.get(id);
288
                    if(doc != null) {
289
                        List<SolrElementField> list = doc.getFieldList();
290
                        if(list != null) {
291 7547 tao
                            //solrDoc.addField(METACATPIDFIELD, pid);
292 7546 tao
                            Iterator<SolrElementField> iterator = list.iterator();
293
                            while (iterator.hasNext()) {
294
                                SolrElementField field = iterator.next();
295
                                if(field != null) {
296
                                    String value = field.getValue();
297
                                    String name = field.getName();
298 7555 tao
                                    //System.out.println("add name/value pair - "+name+"/"+value);
299 7546 tao
                                    solrDoc.addField(name, value);
300
                                }
301
                            }
302
                        }
303
                    }
304
                }
305
                if(!solrDoc.isEmpty()) {
306 7547 tao
                    UpdateResponse response = solrServer.add(solrDoc);
307
                    solrServer.commit();
308 7555 tao
                    //System.out.println("=================the response is:\n"+response.toString());
309 7546 tao
                }
310
            }
311
        }
312
    }
313 7547 tao
314
    /**
315
     * Remove the indexed associated with specified pid.
316
     * @param pid  the pid which the indexes are associated with
317
     * @throws IOException
318
     * @throws SolrServerException
319
     */
320
    public void remove(String pid) throws IOException, SolrServerException {
321 7548 tao
        solrServer.deleteById(pid);
322
        solrServer.commit();
323 7547 tao
324
    }
325 7542 tao
}