Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.ByteArrayInputStream;
30
import java.io.IOException;
31
import java.io.InputStream;
32
import java.util.ArrayList;
33
import java.util.HashMap;
34
import java.util.Iterator;
35
import java.util.List;
36
import java.util.Map;
37
import java.util.Set;
38

    
39
import javax.xml.parsers.DocumentBuilder;
40
import javax.xml.parsers.DocumentBuilderFactory;
41
import javax.xml.parsers.ParserConfigurationException;
42
import javax.xml.xpath.XPath;
43
import javax.xml.xpath.XPathExpressionException;
44
import javax.xml.xpath.XPathFactory;
45

    
46
import org.apache.commons.codec.EncoderException;
47
import org.apache.commons.io.output.ByteArrayOutputStream;
48
import org.apache.commons.logging.Log;
49
import org.apache.commons.logging.LogFactory;
50
import org.apache.solr.client.solrj.SolrServer;
51
import org.apache.solr.client.solrj.SolrServerException;
52
import org.apache.solr.client.solrj.response.UpdateResponse;
53
import org.apache.solr.common.SolrInputDocument;
54
import org.dataone.cn.indexer.XMLNamespaceConfig;
55
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
56
import org.dataone.cn.indexer.parser.SolrField;
57
import org.dataone.cn.indexer.solrhttp.SolrDoc;
58
import org.dataone.cn.indexer.solrhttp.SolrElementField;
59
import org.dataone.service.types.v1.Identifier;
60
import org.dataone.service.types.v1.SystemMetadata;
61
import org.dataone.service.util.TypeMarshaller;
62
import org.jibx.runtime.JiBXException;
63
import org.w3c.dom.Document;
64
import org.xml.sax.SAXException;
65

    
66
/**
67
 * A class does insert, update and remove indexes to a SOLR server
68
 * @author tao
69
 *
70
 */
71
public class SolrIndex {
72
            
73
    private List<IDocumentSubprocessor> subprocessors = null;
74
    private SolrServer solrServer = null;
75
    private XMLNamespaceConfig xmlNamespaceConfig = null;
76
    private List<SolrField> sysmetaSolrFields = null;
77

    
78
    private static DocumentBuilderFactory documentBuilderFactory = null;
79
    private static DocumentBuilder builder = null;
80

    
81
    private static XPathFactory xpathFactory = null;
82
    private static XPath xpath = null;
83
    Log log = LogFactory.getLog(SolrIndex.class);
84
    
85
    static {
86
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
87
        documentBuilderFactory.setNamespaceAware(true);
88
        try {
89
            builder = documentBuilderFactory.newDocumentBuilder();
90
        } catch (ParserConfigurationException e) {
91
            e.printStackTrace();
92
        }
93
        xpathFactory = XPathFactory.newInstance();
94
        xpath = xpathFactory.newXPath();
95
    }
96
    
97
    /**
98
     * Constructor
99
     * @throws SAXException 
100
     * @throws IOException 
101
     */
102
    public SolrIndex(List<SolrField> sysmetaSolrFields, XMLNamespaceConfig xmlNamespaceConfig)
103
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
104
         this.xmlNamespaceConfig = xmlNamespaceConfig;
105
         this.sysmetaSolrFields = sysmetaSolrFields;
106
         init();
107
    }
108
    
109
    private void init() throws ParserConfigurationException, XPathExpressionException {
110
        xpath.setNamespaceContext(xmlNamespaceConfig);
111
        initExpressions();
112
    }
113

    
114
    private void initExpressions() throws XPathExpressionException {
115
        for (SolrField field : sysmetaSolrFields) {
116
            field.initExpression(xpath);
117
        }
118

    
119
    }
120
    
121
    
122
    /**
123
     * Get the list of the Subprocessors in this index.
124
     * @return the list of the Subprocessors.
125
     */
126
    public List<IDocumentSubprocessor> getSubprocessors() {
127
        return subprocessors;
128
    }
129

    
130
    /**
131
     * Set the list of Subprocessors.
132
     * @param subprocessorList  the list will be set.
133
     */
134
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
135
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
136
            subprocessor.initExpression(xpath);
137
        }
138
        this.subprocessors = subprocessorList;
139
    }
140
    
141
    /**
142
     * Generate the index for the given information
143
     * @param id
144
     * @param systemMetadata
145
     * @param dataStream
146
     * @return
147
     * @throws IOException
148
     * @throws SAXException
149
     * @throws ParserConfigurationException
150
     * @throws XPathExpressionException
151
     * @throws JiBXException 
152
     * @throws EncoderException
153
     */
154
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
155
                    throws IOException, SAXException, ParserConfigurationException,
156
                    XPathExpressionException, JiBXException{
157

    
158
        // Load the System Metadata document
159
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
160
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
161
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
162
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
163
        if (sysMetaDoc == null) {
164
            log.error("Could not load System metadata for ID: " + id);
165
            return null;
166
        }
167

    
168
        // Extract the field values from the System Metadata
169
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
170
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
171
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
172
        docs.put(id, indexDocument);
173

    
174
        // Determine if subprocessors are available for this ID
175
        if (subprocessors != null) {
176
                    // for each subprocessor loaded from the spring config
177
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
178
                        // Does this subprocessor apply?
179
                        if (subprocessor.canProcess(sysMetaDoc)) {
180
                            // if so, then extract the additional information from the
181
                            // document.
182
                            try {
183
                                // docObject = the resource map document or science
184
                                // metadata document.
185
                                // note that resource map processing touches all objects
186
                                // referenced by the resource map.
187
                                Document docObject = generateXmlDocument(dataStream);
188
                                if (docObject == null) {
189
                                    log.error("Could not load OBJECT for ID " + id );
190
                                } else {
191
                                    docs = subprocessor.processDocument(id, docs, docObject);
192
                                }
193
                            } catch (Exception e) {
194
                                log.error(e.getStackTrace().toString());
195
                            }
196
                        }
197
                    }
198
       }
199

    
200
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
201
       // merge is only for resource map. We need more work here.
202
       for (SolrDoc mergeDoc : docs.values()) {
203
           if (!mergeDoc.isMerged()) {
204
                 //mergeWithIndexedDocument(mergeDoc);
205
           }
206
       }
207

    
208
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
209
               
210
       return docs;
211
    }
212
    
213
    /*
214
     * Generate a Document from the InputStream
215
     */
216
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
217
        Document doc = null;
218

    
219
        try {
220
            doc = builder.parse(smdStream);
221
        } catch (IOException e) {
222
            log.error(e.getMessage(), e);
223
        }
224

    
225
        return doc;
226
    }
227
    
228
    /*
229
     * Index the fields of the system metadata
230
     */
231
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
232

    
233
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
234
        // solrFields is the list of fields defined in the application context
235
       
236
        for (SolrField field : sysmetaSolrFields) {
237
            try {
238
                // the field.getFields method can return a single value or
239
                // multiple values for multi-valued fields
240
                // or can return multiple SOLR document fields.
241
                fieldList.addAll(field.getFields(doc, identifier));
242
            } catch (Exception e) {
243
                e.printStackTrace();
244
            }
245
        }
246
        return fieldList;
247

    
248
    }
249
    
250
    /**
251
     * Check the parameters of the insert or update methods.
252
     * @param pid
253
     * @param systemMetadata
254
     * @param data
255
     * @throws SolrServerException
256
     */
257
    private void checkParams(String pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException {
258
        if(pid == null || pid.trim().equals("")) {
259
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
260
        }
261
        if(systemMetadata == null) {
262
            throw new SolrServerException("The system metadata of the indexed document should not be null.");
263
        }
264
        if(data == null) {
265
            throw new SolrServerException("The indexed document itself should not be null.");
266
        }
267
    }
268
    
269
    /**
270
     * Generate indexes for a newly inserted document.
271
     * @param pid  the id of this document
272
     * @param systemMetadata  the system metadata associated with the data object
273
     * @param data  the data object itself
274
     * @throws SolrServerException 
275
     * @throws JiBXException 
276
     */
277
    public void insert(String pid, SystemMetadata systemMetadata, InputStream data) 
278
                    throws IOException, SAXException, ParserConfigurationException,
279
                    XPathExpressionException, SolrServerException, JiBXException {
280
        checkParams(pid, systemMetadata, data);
281
        Map<String, SolrDoc> docs = process(pid, systemMetadata, data);
282
        
283
        //transform the Map to the SolrInputDocument which can be used by the solr server
284
        if(docs != null) {
285
            Set<String> ids = docs.keySet();
286
            for(String id : ids) {
287
                SolrInputDocument solrDoc = new SolrInputDocument();
288
                if(id != null) {
289
                    SolrDoc doc = docs.get(id);
290
                    if(doc != null) {
291
                        List<SolrElementField> list = doc.getFieldList();
292
                        if(list != null) {
293
                            //solrDoc.addField(METACATPIDFIELD, pid);
294
                            Iterator<SolrElementField> iterator = list.iterator();
295
                            while (iterator.hasNext()) {
296
                                SolrElementField field = iterator.next();
297
                                if(field != null) {
298
                                    String value = field.getValue();
299
                                    String name = field.getName();
300
                                    //System.out.println("add name/value pair - "+name+"/"+value);
301
                                    solrDoc.addField(name, value);
302
                                }
303
                            }
304
                        }
305
                    }
306
                }
307
                if(!solrDoc.isEmpty()) {
308
                    UpdateResponse response = solrServer.add(solrDoc);
309
                    solrServer.commit();
310
                    //System.out.println("=================the response is:\n"+response.toString());
311
                }
312
            }
313
        }
314
    }
315
    
316
    /**
317
     * Update an existed document. First, remove the index of the old one. Second,
318
     * insert the new document
319
     * @param newPid  the new id of the document
320
     * @param systemMetadata  the system metadata associated with the data object
321
     * @param data  the data object itself
322
     * @throws SolrServerException 
323
     * @throws JiBXException 
324
     */
325
    public void update(String newPid, SystemMetadata systemMetadata, InputStream data) 
326
                    throws IOException, SAXException, ParserConfigurationException,
327
                    XPathExpressionException, SolrServerException, JiBXException {
328
        checkParams(newPid, systemMetadata, data);
329
        Identifier oldIdentifier = systemMetadata.getObsoletes();
330
        if(oldIdentifier == null) {
331
            throw new SolrServerException("The system metadata of the new document doesn't have the obsoletes element in the update operation.");
332
        }
333
        String oldIdStr = oldIdentifier.getValue();
334
        remove(oldIdStr);
335
        insert(newPid, systemMetadata, data);
336
    }
337
 
338
    /**
339
     * Remove the indexed associated with specified pid.
340
     * @param pid  the pid which the indexes are associated with
341
     * @throws IOException
342
     * @throws SolrServerException
343
     */
344
    public void remove(String pid) throws IOException, SolrServerException {
345
        solrServer.deleteById(pid);
346
        solrServer.commit();
347
       
348
    }
349

    
350
    /**
351
     * Get the solrServer
352
     * @return
353
     */
354
    SolrServer getSolrServer() {
355
        return solrServer;
356
    }
357

    
358
    /**
359
     * Set the solrServer. This method is only for setting a test solr server in the junit test.
360
     * @param solrServer
361
     */
362
    void setSolrServer(SolrServer solrServer) {
363
        this.solrServer = solrServer;
364
    }
365
}
(3-3/5)