Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.ByteArrayInputStream;
30
import java.io.IOException;
31
import java.io.InputStream;
32
import java.util.ArrayList;
33
import java.util.HashMap;
34
import java.util.Iterator;
35
import java.util.List;
36
import java.util.Map;
37
import java.util.Set;
38

    
39
import javax.xml.parsers.DocumentBuilder;
40
import javax.xml.parsers.DocumentBuilderFactory;
41
import javax.xml.parsers.ParserConfigurationException;
42
import javax.xml.xpath.XPath;
43
import javax.xml.xpath.XPathExpressionException;
44
import javax.xml.xpath.XPathFactory;
45

    
46
import org.apache.commons.codec.EncoderException;
47
import org.apache.commons.io.output.ByteArrayOutputStream;
48
import org.apache.commons.logging.Log;
49
import org.apache.commons.logging.LogFactory;
50
import org.apache.solr.client.solrj.SolrServer;
51
import org.apache.solr.client.solrj.SolrServerException;
52
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
53
//import org.apache.solr.client.solrj.impl.HttpSolrServer;
54
import org.apache.solr.client.solrj.response.UpdateResponse;
55
import org.apache.solr.common.SolrInputDocument;
56
import org.apache.solr.core.CoreContainer;
57
import org.dataone.cn.indexer.XMLNamespaceConfig;
58
import org.dataone.cn.indexer.XPathDocumentParser;
59
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
60
import org.dataone.cn.indexer.parser.SolrField;
61
import org.dataone.cn.indexer.solrhttp.SolrDoc;
62
import org.dataone.cn.indexer.solrhttp.SolrElementAdd;
63
import org.dataone.cn.indexer.solrhttp.SolrElementField;
64
import org.dataone.configuration.Settings;
65
import org.dataone.service.types.v1.Identifier;
66
import org.dataone.service.types.v1.SystemMetadata;
67
import org.dataone.service.util.TypeMarshaller;
68
import org.jibx.runtime.JiBXException;
69
import org.w3c.dom.Document;
70
import org.xml.sax.SAXException;
71

    
72
/**
73
 * A class does insert, update and remove indexes to a SOLR server
74
 * @author tao
75
 *
76
 */
77
public class SolrIndex {
78
    
79
    
80
    
81
    public static final String SOLRHOME = "solr.solr.home";
82
    public static final String SOLRHOMEPROPERTYNAME = "solr.homeDir";
83
    
84
    private static final String SOLRINDEXWEBCONTEXT = "metacat-index";
85
    private static final String SOLRSERVERNAME = "metacat-core";
86
    //private static final String DEFAULTSOLRHOMEPATH = "/Users/tao/Downloads/apache-solr-3.4.0/example/solr";
87
    
88
    private List<IDocumentSubprocessor> subprocessors = null;
89
    private SolrServer solrServer = null;
90
    private XMLNamespaceConfig xmlNamespaceConfig = null;
91
    private List<SolrField> sysmetaSolrFields = null;
92

    
93
    private static DocumentBuilderFactory documentBuilderFactory = null;
94
    private static DocumentBuilder builder = null;
95

    
96
    private static XPathFactory xpathFactory = null;
97
    private static XPath xpath = null;
98
    Log log = LogFactory.getLog(SolrIndex.class);
99
    
100
    static {
101
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
102
        documentBuilderFactory.setNamespaceAware(true);
103
        try {
104
            builder = documentBuilderFactory.newDocumentBuilder();
105
        } catch (ParserConfigurationException e) {
106
            e.printStackTrace();
107
        }
108
        xpathFactory = XPathFactory.newInstance();
109
        xpath = xpathFactory.newXPath();
110
    }
111
    
112
    /**
113
     * Constructor
114
     * @throws SAXException 
115
     * @throws IOException 
116
     */
117
    public SolrIndex(List<SolrField> sysmetaSolrFields, XMLNamespaceConfig xmlNamespaceConfig)
118
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
119
         this.xmlNamespaceConfig = xmlNamespaceConfig;
120
         this.sysmetaSolrFields = sysmetaSolrFields;
121
         initSolrServer();
122
         init();
123
    }
124
    
125
    private void initSolrServer() throws IOException, ParserConfigurationException, SAXException {
126
        String solrHomeDir = null;
127
        solrHomeDir = Settings.getConfiguration().getString(SOLRHOMEPROPERTYNAME);
128
        log.info("========================= the solr home from the metacat.properties is "+solrHomeDir);
129
        if(solrHomeDir == null || solrHomeDir.trim().equals("")) {
130
            String deployDir = Settings.getConfiguration().getString("application.deployDir");
131
            if(deployDir == null || deployDir.trim().equals("")) {
132
                solrHomeDir =  SOLRINDEXWEBCONTEXT+"/WEB-INF/classes/solr-home";
133
            } else {
134
                solrHomeDir =  deployDir + "/" +SOLRINDEXWEBCONTEXT+"/WEB-INF/classes/solr-home";
135
            }
136
            
137
        }
138
        log.info("==========================================final solr home is "+solrHomeDir);
139
        System.setProperty(SOLRHOME, solrHomeDir);
140
        CoreContainer.Initializer init = new CoreContainer.Initializer();
141
        CoreContainer c = init.initialize();
142
        solrServer = new EmbeddedSolrServer(c, "collection1");
143
    }
144
    
145
    private void init() throws ParserConfigurationException, XPathExpressionException {
146
        xpath.setNamespaceContext(xmlNamespaceConfig);
147
        initExpressions();
148
    }
149

    
150
    private void initExpressions() throws XPathExpressionException {
151
        for (SolrField field : sysmetaSolrFields) {
152
            field.initExpression(xpath);
153
        }
154

    
155
    }
156
    
157
    
158
    /**
159
     * Get the list of the Subprocessors in this index.
160
     * @return the list of the Subprocessors.
161
     */
162
    public List<IDocumentSubprocessor> getSubprocessors() {
163
        return subprocessors;
164
    }
165

    
166
    /**
167
     * Set the list of Subprocessors.
168
     * @param subprocessorList  the list will be set.
169
     */
170
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
171
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
172
            subprocessor.initExpression(xpath);
173
        }
174
        this.subprocessors = subprocessorList;
175
    }
176
    
177
    /**
178
     * Generate the index for the given information
179
     * @param id
180
     * @param systemMetadata
181
     * @param dataStream
182
     * @return
183
     * @throws IOException
184
     * @throws SAXException
185
     * @throws ParserConfigurationException
186
     * @throws XPathExpressionException
187
     * @throws JiBXException 
188
     * @throws EncoderException
189
     */
190
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
191
                    throws IOException, SAXException, ParserConfigurationException,
192
                    XPathExpressionException, JiBXException{
193

    
194
        // Load the System Metadata document
195
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
196
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
197
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
198
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
199
        if (sysMetaDoc == null) {
200
            log.error("Could not load System metadata for ID: " + id);
201
            return null;
202
        }
203

    
204
        // Extract the field values from the System Metadata
205
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
206
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
207
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
208
        docs.put(id, indexDocument);
209

    
210
        // Determine if subprocessors are available for this ID
211
        if (subprocessors != null) {
212
                    // for each subprocessor loaded from the spring config
213
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
214
                        // Does this subprocessor apply?
215
                        if (subprocessor.canProcess(sysMetaDoc)) {
216
                            // if so, then extract the additional information from the
217
                            // document.
218
                            try {
219
                                // docObject = the resource map document or science
220
                                // metadata document.
221
                                // note that resource map processing touches all objects
222
                                // referenced by the resource map.
223
                                Document docObject = generateXmlDocument(dataStream);
224
                                if (docObject == null) {
225
                                    log.error("Could not load OBJECT for ID " + id );
226
                                } else {
227
                                    docs = subprocessor.processDocument(id, docs, docObject);
228
                                }
229
                            } catch (Exception e) {
230
                                log.error(e.getStackTrace().toString());
231
                            }
232
                        }
233
                    }
234
       }
235

    
236
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
237
       // merge is only for resource map. We need more work here.
238
       for (SolrDoc mergeDoc : docs.values()) {
239
           if (!mergeDoc.isMerged()) {
240
                 //mergeWithIndexedDocument(mergeDoc);
241
           }
242
       }
243

    
244
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
245
               
246
       return docs;
247
    }
248
    
249
    /*
250
     * Generate a Document from the InputStream
251
     */
252
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
253
        Document doc = null;
254

    
255
        try {
256
            doc = builder.parse(smdStream);
257
        } catch (IOException e) {
258
            log.error(e.getMessage(), e);
259
        }
260

    
261
        return doc;
262
    }
263
    
264
    /*
265
     * Index the fields of the system metadata
266
     */
267
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
268

    
269
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
270
        // solrFields is the list of fields defined in the application context
271
       
272
        for (SolrField field : sysmetaSolrFields) {
273
            try {
274
                // the field.getFields method can return a single value or
275
                // multiple values for multi-valued fields
276
                // or can return multiple SOLR document fields.
277
                fieldList.addAll(field.getFields(doc, identifier));
278
            } catch (Exception e) {
279
                e.printStackTrace();
280
            }
281
        }
282
        return fieldList;
283

    
284
    }
285
    
286
    /**
287
     * Check the parameters of the insert or update methods.
288
     * @param pid
289
     * @param systemMetadata
290
     * @param data
291
     * @throws SolrServerException
292
     */
293
    private void checkParams(String pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException {
294
        if(pid == null || pid.trim().equals("")) {
295
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
296
        }
297
        if(systemMetadata == null) {
298
            throw new SolrServerException("The system metadata of the indexed document should not be null.");
299
        }
300
        if(data == null) {
301
            throw new SolrServerException("The indexed document itself should not be null.");
302
        }
303
    }
304
    
305
    /**
306
     * Generate indexes for a newly inserted document.
307
     * @param pid  the id of this document
308
     * @param systemMetadata  the system metadata associated with the data object
309
     * @param data  the data object itself
310
     * @throws SolrServerException 
311
     * @throws JiBXException 
312
     */
313
    public void insert(String pid, SystemMetadata systemMetadata, InputStream data) 
314
                    throws IOException, SAXException, ParserConfigurationException,
315
                    XPathExpressionException, SolrServerException, JiBXException {
316
        checkParams(pid, systemMetadata, data);
317
        Map<String, SolrDoc> docs = process(pid, systemMetadata, data);
318
        
319
        //transform the Map to the SolrInputDocument which can be used by the solr server
320
        if(docs != null) {
321
            Set<String> ids = docs.keySet();
322
            for(String id : ids) {
323
                SolrInputDocument solrDoc = new SolrInputDocument();
324
                if(id != null) {
325
                    SolrDoc doc = docs.get(id);
326
                    if(doc != null) {
327
                        List<SolrElementField> list = doc.getFieldList();
328
                        if(list != null) {
329
                            //solrDoc.addField(METACATPIDFIELD, pid);
330
                            Iterator<SolrElementField> iterator = list.iterator();
331
                            while (iterator.hasNext()) {
332
                                SolrElementField field = iterator.next();
333
                                if(field != null) {
334
                                    String value = field.getValue();
335
                                    String name = field.getName();
336
                                    //System.out.println("add name/value pair - "+name+"/"+value);
337
                                    solrDoc.addField(name, value);
338
                                }
339
                            }
340
                        }
341
                    }
342
                }
343
                if(!solrDoc.isEmpty()) {
344
                    UpdateResponse response = solrServer.add(solrDoc);
345
                    solrServer.commit();
346
                    //System.out.println("=================the response is:\n"+response.toString());
347
                }
348
            }
349
        }
350
    }
351
    
352
    /**
353
     * Update an existed document. First, remove the index of the old one. Second,
354
     * insert the new document
355
     * @param newPid  the new id of the document
356
     * @param systemMetadata  the system metadata associated with the data object
357
     * @param data  the data object itself
358
     * @throws SolrServerException 
359
     * @throws JiBXException 
360
     */
361
    public void update(String newPid, SystemMetadata systemMetadata, InputStream data) 
362
                    throws IOException, SAXException, ParserConfigurationException,
363
                    XPathExpressionException, SolrServerException, JiBXException {
364
        checkParams(newPid, systemMetadata, data);
365
        Identifier oldIdentifier = systemMetadata.getObsoletes();
366
        if(oldIdentifier == null) {
367
            throw new SolrServerException("The system metadata of the new document doesn't have the obsoletes element in the update operation.");
368
        }
369
        String oldIdStr = oldIdentifier.getValue();
370
        remove(oldIdStr);
371
        insert(newPid, systemMetadata, data);
372
    }
373
 
374
    /**
375
     * Remove the indexed associated with specified pid.
376
     * @param pid  the pid which the indexes are associated with
377
     * @throws IOException
378
     * @throws SolrServerException
379
     */
380
    public void remove(String pid) throws IOException, SolrServerException {
381
        solrServer.deleteById(pid);
382
        solrServer.commit();
383
       
384
    }
385

    
386
    /**
387
     * Get the solrServer
388
     * @return
389
     */
390
    SolrServer getSolrServer() {
391
        return solrServer;
392
    }
393

    
394
    /**
395
     * Set the solrServer. This method is only for setting a test solr server in the junit test.
396
     * @param solrServer
397
     */
398
    void setSolrServer(SolrServer solrServer) {
399
        this.solrServer = solrServer;
400
    }
401
}
(3-3/4)