Project

General

Profile

1 7542 tao
/**
2 8138 tao
 *  Copyright: 2013 Regents of the University of California and the
3 7542 tao
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20
21 7555 tao
import java.io.ByteArrayInputStream;
22 8343 tao
import java.io.FileInputStream;
23 7786 tao
import java.io.FileNotFoundException;
24 7546 tao
import java.io.IOException;
25
import java.io.InputStream;
26
import java.util.ArrayList;
27 7800 tao
import java.util.Calendar;
28 8464 leinfelder
import java.util.Date;
29 7546 tao
import java.util.HashMap;
30
import java.util.Iterator;
31 7542 tao
import java.util.List;
32 7546 tao
import java.util.Map;
33
import java.util.Set;
34 7542 tao
35
import javax.xml.parsers.DocumentBuilder;
36
import javax.xml.parsers.DocumentBuilderFactory;
37 7546 tao
import javax.xml.parsers.ParserConfigurationException;
38 7542 tao
import javax.xml.xpath.XPath;
39 7546 tao
import javax.xml.xpath.XPathExpressionException;
40 7542 tao
import javax.xml.xpath.XPathFactory;
41
42 7546 tao
import org.apache.commons.codec.EncoderException;
43
import org.apache.commons.io.output.ByteArrayOutputStream;
44 7786 tao
import org.apache.commons.lang.StringUtils;
45 7546 tao
import org.apache.commons.logging.Log;
46
import org.apache.commons.logging.LogFactory;
47 7604 tao
import org.apache.solr.client.solrj.SolrQuery;
48 7542 tao
import org.apache.solr.client.solrj.SolrServer;
49 7546 tao
import org.apache.solr.client.solrj.SolrServerException;
50 7604 tao
import org.apache.solr.client.solrj.response.QueryResponse;
51 7547 tao
import org.apache.solr.client.solrj.response.UpdateResponse;
52 7604 tao
import org.apache.solr.common.SolrDocument;
53
import org.apache.solr.common.SolrDocumentList;
54 7546 tao
import org.apache.solr.common.SolrInputDocument;
55 8464 leinfelder
import org.apache.solr.schema.IndexSchema;
56 7542 tao
import org.dataone.cn.indexer.XMLNamespaceConfig;
57 8464 leinfelder
import org.dataone.cn.indexer.convert.SolrDateConverter;
58 7542 tao
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
59 7546 tao
import org.dataone.cn.indexer.parser.SolrField;
60 7786 tao
import org.dataone.cn.indexer.resourcemap.ResourceEntry;
61
import org.dataone.cn.indexer.resourcemap.ResourceMap;
62 8343 tao
import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
63 7546 tao
import org.dataone.cn.indexer.solrhttp.SolrDoc;
64
import org.dataone.cn.indexer.solrhttp.SolrElementField;
65 7733 tao
import org.dataone.service.exceptions.NotFound;
66
import org.dataone.service.exceptions.NotImplemented;
67 7786 tao
import org.dataone.service.exceptions.ServiceFailure;
68 7733 tao
import org.dataone.service.exceptions.UnsupportedType;
69 7815 leinfelder
import org.dataone.service.types.v1.Event;
70 7577 tao
import org.dataone.service.types.v1.Identifier;
71 8826 leinfelder
import org.dataone.service.types.v2.SystemMetadata;
72 8464 leinfelder
import org.dataone.service.util.DateTimeMarshaller;
73 7555 tao
import org.dataone.service.util.TypeMarshaller;
74 8023 tao
import org.dspace.foresite.OREParserException;
75 7555 tao
import org.jibx.runtime.JiBXException;
76 7546 tao
import org.w3c.dom.Document;
77
import org.xml.sax.SAXException;
78 7542 tao
79 7828 leinfelder
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
80 8464 leinfelder
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
81 7800 tao
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
82 7711 tao
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
83
84 7542 tao
/**
85
 * A class does insert, update and remove indexes to a SOLR server
86
 * @author tao
87
 *
88
 */
89
public class SolrIndex {
90 7591 leinfelder
91 7604 tao
    public static final String ID = "id";
92
    private static final String IDQUERY = ID+":*";
93 7542 tao
    private List<IDocumentSubprocessor> subprocessors = null;
94
    private SolrServer solrServer = null;
95
    private XMLNamespaceConfig xmlNamespaceConfig = null;
96 7546 tao
    private List<SolrField> sysmetaSolrFields = null;
97 7542 tao
98
    private static DocumentBuilderFactory documentBuilderFactory = null;
99
    private static DocumentBuilder builder = null;
100
101
    private static XPathFactory xpathFactory = null;
102
    private static XPath xpath = null;
103 7546 tao
    Log log = LogFactory.getLog(SolrIndex.class);
104 7542 tao
105 7546 tao
    static {
106
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
107
        documentBuilderFactory.setNamespaceAware(true);
108
        try {
109
            builder = documentBuilderFactory.newDocumentBuilder();
110
        } catch (ParserConfigurationException e) {
111
            e.printStackTrace();
112
        }
113
        xpathFactory = XPathFactory.newInstance();
114
        xpath = xpathFactory.newXPath();
115
    }
116
117 7542 tao
    /**
118
     * Constructor
119 7548 tao
     * @throws SAXException
120
     * @throws IOException
121 7542 tao
     */
122 8161 tao
    public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, List<SolrField> sysmetaSolrFields)
123 7548 tao
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
124
         this.xmlNamespaceConfig = xmlNamespaceConfig;
125
         this.sysmetaSolrFields = sysmetaSolrFields;
126
         init();
127 8125 tao
    }
128 7542 tao
129 7546 tao
    private void init() throws ParserConfigurationException, XPathExpressionException {
130
        xpath.setNamespaceContext(xmlNamespaceConfig);
131
        initExpressions();
132
    }
133
134
    private void initExpressions() throws XPathExpressionException {
135
        for (SolrField field : sysmetaSolrFields) {
136
            field.initExpression(xpath);
137
        }
138
139
    }
140 7542 tao
141 7546 tao
142 7542 tao
    /**
143
     * Get the list of the Subprocessors in this index.
144
     * @return the list of the Subprocessors.
145
     */
146
    public List<IDocumentSubprocessor> getSubprocessors() {
147
        return subprocessors;
148
    }
149
150
    /**
151
     * Set the list of Subprocessors.
152
     * @param subprocessorList  the list will be set.
153
     */
154
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
155 7546 tao
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
156 7542 tao
            subprocessor.initExpression(xpath);
157 7546 tao
        }
158 7542 tao
        this.subprocessors = subprocessorList;
159
    }
160 7546 tao
161
    /**
162
     * Generate the index for the given information
163
     * @param id
164 7555 tao
     * @param systemMetadata
165 7546 tao
     * @param dataStream
166
     * @return
167
     * @throws IOException
168
     * @throws SAXException
169
     * @throws ParserConfigurationException
170
     * @throws XPathExpressionException
171 7555 tao
     * @throws JiBXException
172 7711 tao
     * @throws SolrServerException
173 7546 tao
     * @throws EncoderException
174 7733 tao
     * @throws UnsupportedType
175
     * @throws NotFound
176
     * @throws NotImplemented
177 7546 tao
     */
178 8766 leinfelder
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, String objectPath)
179 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
180 7733 tao
                    XPathExpressionException, JiBXException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
181 7546 tao
182
        // Load the System Metadata document
183 7555 tao
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
184
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
185
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
186
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
187 7546 tao
        if (sysMetaDoc == null) {
188
            log.error("Could not load System metadata for ID: " + id);
189
            return null;
190
        }
191
192
        // Extract the field values from the System Metadata
193
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
194
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
195
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
196
        docs.put(id, indexDocument);
197
198
        // Determine if subprocessors are available for this ID
199
        if (subprocessors != null) {
200
                    // for each subprocessor loaded from the spring config
201
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
202
                        // Does this subprocessor apply?
203
                        if (subprocessor.canProcess(sysMetaDoc)) {
204
                            // if so, then extract the additional information from the
205
                            // document.
206
                            try {
207
                                // docObject = the resource map document or science
208
                                // metadata document.
209
                                // note that resource map processing touches all objects
210
                                // referenced by the resource map.
211 8766 leinfelder
                            	InputStream dataStream = new FileInputStream(objectPath);
212 7546 tao
                                Document docObject = generateXmlDocument(dataStream);
213
                                if (docObject == null) {
214 7852 tao
                                    throw new Exception("Could not load OBJECT for ID " + id );
215 7546 tao
                                } else {
216
                                    docs = subprocessor.processDocument(id, docs, docObject);
217
                                }
218
                            } catch (Exception e) {
219 8554 leinfelder
                                log.error(e.getMessage(), e);
220 7852 tao
                                throw new SolrServerException(e.getMessage());
221 7546 tao
                            }
222
                        }
223
                    }
224
       }
225
226
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
227
       // merge is only for resource map. We need more work here.
228
       for (SolrDoc mergeDoc : docs.values()) {
229
           if (!mergeDoc.isMerged()) {
230 7711 tao
                 mergeWithIndexedDocument(mergeDoc);
231 7546 tao
           }
232
       }
233
234
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
235
236
       return docs;
237
    }
238
239 7711 tao
    /**
240
     * Merge updates with existing solr documents
241
     *
242
     * This method appears to re-set the data package field data into the
243
     * document about to be updated in the solr index. Since packaging
244
     * information is derived from the package document (resource map), this
245
     * information is not present when processing a document contained in a data
246
     * package. This method replaces those values from the existing solr index
247
     * record for the document being processed. -- sroseboo, 1-18-12
248
     *
249
     * @param indexDocument
250
     * @return
251
     * @throws IOException
252
     * @throws EncoderException
253
     * @throws XPathExpressionException
254
     * @throws SAXException
255
     * @throws ParserConfigurationException
256
     * @throws SolrServerException
257 7733 tao
     * @throws UnsupportedType
258
     * @throws NotFound
259
     * @throws NotImplemented
260 7711 tao
     */
261
    // TODO:combine merge function with resourcemap merge function
262
263
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
264 7733 tao
            EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
265 7711 tao
        List<String> ids = new ArrayList<String>();
266
        ids.add(indexDocument.getIdentifier());
267
        List<SolrDoc> indexedDocuments = ResourceMapSubprocessor.getSolrDocs(ids);
268
        SolrDoc indexedDocument = indexedDocuments == null || indexedDocuments.size() <= 0 ? null
269
                : indexedDocuments.get(0);
270 8464 leinfelder
271
        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
272
273 7711 tao
        if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
274
            return indexDocument;
275
        } else {
276
            for (SolrElementField field : indexedDocument.getFieldList()) {
277
                if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY)
278
                        || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field
279
                        .getName().equals(SolrElementField.FIELD_RESOURCEMAP))
280
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
281
                    indexDocument.addField(field);
282 8464 leinfelder
                } else if (!indexSchema.isCopyFieldTarget(indexSchema.getField(field.getName())) && !indexDocument.hasField(field.getName())) {
283
                    indexDocument.addField(field);
284 7711 tao
                }
285
            }
286
287
            indexDocument.setMerged(true);
288
            return indexDocument;
289
        }
290
    }
291
292 7546 tao
    /*
293
     * Generate a Document from the InputStream
294
     */
295
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
296
        Document doc = null;
297
298
        try {
299
            doc = builder.parse(smdStream);
300
        } catch (IOException e) {
301
            log.error(e.getMessage(), e);
302
        }
303
304
        return doc;
305
    }
306
307
    /*
308
     * Index the fields of the system metadata
309
     */
310
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
311
312
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
313
        // solrFields is the list of fields defined in the application context
314
315
        for (SolrField field : sysmetaSolrFields) {
316
            try {
317
                // the field.getFields method can return a single value or
318
                // multiple values for multi-valued fields
319
                // or can return multiple SOLR document fields.
320
                fieldList.addAll(field.getFields(doc, identifier));
321
            } catch (Exception e) {
322
                e.printStackTrace();
323
            }
324
        }
325
        return fieldList;
326
327
    }
328
329
    /**
330 7577 tao
     * Check the parameters of the insert or update methods.
331
     * @param pid
332
     * @param systemMetadata
333
     * @param data
334
     * @throws SolrServerException
335
     */
336 8766 leinfelder
    private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException {
337 8343 tao
        if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) {
338 7577 tao
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
339
        }
340
        if(systemMetadata == null) {
341 8343 tao
            throw new SolrServerException("The system metadata of the indexed document "+pid.getValue()+ " should not be null.");
342 7577 tao
        }
343 8766 leinfelder
        if(objectPath == null) {
344 8343 tao
            throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null.");
345 7577 tao
        }
346
    }
347
348
    /**
349 7627 tao
     * Insert the indexes for a document.
350 7546 tao
     * @param pid  the id of this document
351
     * @param systemMetadata  the system metadata associated with the data object
352 8766 leinfelder
     * @param data  the path to the object file itself
353 7546 tao
     * @throws SolrServerException
354 7555 tao
     * @throws JiBXException
355 7711 tao
     * @throws EncoderException
356 7733 tao
     * @throws UnsupportedType
357
     * @throws NotFound
358
     * @throws NotImplemented
359 7546 tao
     */
360 8766 leinfelder
    private synchronized void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath)
361 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
362 7733 tao
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType {
363 8766 leinfelder
        checkParams(pid, systemMetadata, objectPath);
364
        Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, objectPath);
365 7546 tao
366
        //transform the Map to the SolrInputDocument which can be used by the solr server
367
        if(docs != null) {
368
            Set<String> ids = docs.keySet();
369
            for(String id : ids) {
370
                if(id != null) {
371
                    SolrDoc doc = docs.get(id);
372 7786 tao
                    insertToIndex(doc);
373
                }
374
375
            }
376
        }
377
    }
378
379 8464 leinfelder
    /**
380
     * Adds the given fields to the solr index for the given pid, preserving the index values
381
     * that previously existed
382
     * @param pid
383
     * @param fields
384
     */
385
    public void insertFields(Identifier pid, Map<String, List<Object>> fields) {
386
387
    	try {
388
			// copy the original values already indexed for this document
389
	    	SolrQuery query = new SolrQuery("id:\"" + pid.getValue() + "\"");
390
	    	QueryResponse res = solrServer.query(query);
391
	    	SolrDoc doc = new SolrDoc();
392 8503 leinfelder
393
	    	// include existing values if they exist
394 8761 leinfelder
	        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
395
396 8503 leinfelder
	        if (res.getResults().size() > 0) {
397
		        SolrDocument orig = res.getResults().get(0);
398
		    	for (String fieldName: orig.getFieldNames()) {
399
		        	//  don't transfer the copyTo fields, otherwise there are errors
400
		        	if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
401
		        		continue;
402
		        	}
403
		        	for (Object value: orig.getFieldValues(fieldName)) {
404
		        		String stringValue = value.toString();
405
		        		// special handling for dates in ISO 8601
406
		        		if (value instanceof Date) {
407
		        			stringValue = DateTimeMarshaller.serializeDateToUTC((Date)value);
408
		        			SolrDateConverter converter = new SolrDateConverter();
409
		        			stringValue = converter.convert(stringValue);
410
		        		}
411
						SolrElementField field = new SolrElementField(fieldName, stringValue);
412
						log.debug("Adding field: " + fieldName);
413
						doc.addField(field);
414
		        	}
415
		        }
416 8464 leinfelder
	        }
417
418
	        // add the additional fields we are trying to include in the index
419
	        for (String fieldName: fields.keySet()) {
420
	    		List<Object> values = fields.get(fieldName);
421
	    		for (Object value: values) {
422 8756 leinfelder
	    			if (!doc.hasFieldWithValue(fieldName, value.toString())) {
423 8761 leinfelder
	    				if (indexSchema.getField(fieldName).multiValued()) {
424
	    					doc.addField(new SolrElementField(fieldName, value.toString()));
425
	    				} else {
426
	    	    	    	doc.updateOrAddField(fieldName, value.toString());
427
	    				}
428 8756 leinfelder
	    			}
429 8464 leinfelder
	    		}
430
	    	}
431
432 8580 leinfelder
	        // make sure there is an id in the solrdoc so it is added to the index
433
	        if (!doc.hasField(ID)) {
434
	        	doc.updateOrAddField(ID, pid.getValue());
435
	        }
436
437 8464 leinfelder
	        // insert the whole thing
438
	        insertToIndex(doc);
439
    	} catch (Exception e) {
440
    		String error = "SolrIndex.insetFields - could not update the solr index: " + e.getMessage();
441
            writeEventLog(null, pid, error);
442
            log.error(error, e);
443
    	}
444
445
    }
446
447 7786 tao
    /*
448
     * Insert a SolrDoc to the solr server.
449
     */
450
    private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
451
        if(doc != null ) {
452
            SolrInputDocument solrDoc = new SolrInputDocument();
453
            List<SolrElementField> list = doc.getFieldList();
454
            if(list != null) {
455
                //solrDoc.addField(METACATPIDFIELD, pid);
456
                Iterator<SolrElementField> iterator = list.iterator();
457
                while (iterator.hasNext()) {
458
                    SolrElementField field = iterator.next();
459
                    if(field != null) {
460
                        String value = field.getValue();
461
                        String name = field.getName();
462
                        //System.out.println("add name/value pair - "+name+"/"+value);
463
                        solrDoc.addField(name, value);
464 7546 tao
                    }
465
                }
466
            }
467 7786 tao
            if(!solrDoc.isEmpty()) {
468 7856 tao
                /*IndexEvent event = new IndexEvent();
469 7800 tao
                event.setDate(Calendar.getInstance().getTime());
470
                Identifier pid = new Identifier();
471
                pid.setValue(doc.getIdentifier());
472 7856 tao
                event.setIdentifier(pid);*/
473 7800 tao
                try {
474 7801 tao
                    UpdateResponse response = solrServer.add(solrDoc);
475
                    solrServer.commit();
476 7805 tao
                    /*event.setType(IndexEvent.SUCCESSINSERT);
477 7801 tao
                    event.setDescription("Successfully insert the solr index for the id "+pid.getValue());
478
                    try {
479
                        EventlogFactory.createIndexEventLog().write(event);
480
                    } catch (Exception e) {
481 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+e.getMessage());
482 7805 tao
                    }*/
483 7801 tao
                } catch (SolrServerException e) {
484 7856 tao
                    /*event.setAction(Event.CREATE);
485 7801 tao
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
486
                    try {
487
                        EventlogFactory.createIndexEventLog().write(event);
488
                    } catch (Exception ee) {
489 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
490 7856 tao
                    }*/
491 7801 tao
                    throw e;
492
                } catch (IOException e) {
493 7856 tao
                    /*event.setAction(Event.CREATE);
494 7801 tao
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
495
                    try {
496
                        EventlogFactory.createIndexEventLog().write(event);
497
                    } catch (Exception ee) {
498 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
499 7856 tao
                    }*/
500 7801 tao
                    throw e;
501
502 7800 tao
                }
503 7786 tao
                //System.out.println("=================the response is:\n"+response.toString());
504
            }
505 7546 tao
        }
506
    }
507 7577 tao
508
    /**
509 7627 tao
     * Update the solr index. This method handles the three scenarios:
510 8343 tao
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
511 7627 tao
     *    remove the index for the previous version(s) and generate new index for the doc.
512 8343 tao
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
513 7627 tao
     *    index for the doc.
514 7577 tao
     */
515 8343 tao
    public void update(Identifier pid, SystemMetadata systemMetadata) {
516
        String objectPath = null;
517
        try {
518
            objectPath = DistributedMapsFactory.getObjectPathMap().get(pid);
519 8766 leinfelder
            update(pid, systemMetadata, objectPath);
520 8343 tao
            EventlogFactory.createIndexEventLog().remove(pid);
521
        } catch (Exception e) {
522
            String error = "SolrIndex.update - could not update the solr index since " + e.getMessage();
523
            writeEventLog(systemMetadata, pid, error);
524
            log.error(error, e);
525
        }
526 7577 tao
    }
527 7603 tao
528 7627 tao
529 8343 tao
    /**
530
     * Update the solr index. This method handles the three scenarios:
531
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
532
     *    remove the index for the previous version(s) and generate new index for the doc.
533
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
534
     *    index for the doc.
535
     * @param pid
536
     * @param systemMetadata
537
     * @param data
538
     * @throws SolrServerException
539
     * @throws ServiceFailure
540
     * @throws XPathExpressionException
541
     * @throws NotImplemented
542
     * @throws NotFound
543
     * @throws UnsupportedType
544
     * @throws IOException
545
     * @throws SAXException
546
     * @throws ParserConfigurationException
547
     * @throws OREParserException
548
     * @throws JiBXException
549
     * @throws EncoderException
550
     */
551 8766 leinfelder
    void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException,
552 8343 tao
                                ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType,
553
                                IOException, SAXException, ParserConfigurationException, OREParserException, JiBXException, EncoderException {
554 8766 leinfelder
        checkParams(pid, systemMetadata, objectPath);
555 8722 leinfelder
        boolean isArchive = systemMetadata.getArchived() != null && systemMetadata.getArchived();
556 8343 tao
        if(isArchive ) {
557
            //delete the index for the archived objects
558
            remove(pid.getValue(), systemMetadata);
559
            log.info("SolrIndex.update============================= archive the idex for the identifier "+pid);
560
        } else {
561
            //generate index for either add or update.
562 8766 leinfelder
            insert(pid, systemMetadata, objectPath);
563 8343 tao
            log.info("SolrIndex.update============================= insert index for the identifier "+pid);
564
        }
565
    }
566 7627 tao
567 7786 tao
568
569
    /*
570
     * Is the pid a resource map
571
     */
572 8343 tao
    private boolean isDataPackage(String pid, SystemMetadata sysmeta) throws FileNotFoundException, ServiceFailure {
573 7786 tao
        boolean isDataPackage = false;
574 8343 tao
        //SystemMetadata sysmeta = DistributedMapsFactory.getSystemMetadata(pid);
575 7786 tao
        if(sysmeta != null) {
576 8352 tao
            isDataPackage = IndexGeneratorTimerTask.isResourceMap(sysmeta.getFormatId());
577 7786 tao
        }
578
        return isDataPackage;
579
    }
580
581
    private boolean isPartOfDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
582
        SolrDoc dataPackageIndexDoc = ResourceMapSubprocessor.getSolrDoc(pid);
583
        if (dataPackageIndexDoc != null) {
584
            String resourceMapId = dataPackageIndexDoc
585
                    .getFirstFieldValue(SolrElementField.FIELD_RESOURCEMAP);
586
            return StringUtils.isNotEmpty(resourceMapId);
587
        } else {
588
            return false;
589
        }
590
    }
591 8343 tao
    /**
592
     * Remove the indexed associated with specified pid.
593
     * @param pid  the pid which the indexes are associated with
594
     * @throws IOException
595
     * @throws SolrServerException
596
     * @throws ParserConfigurationException
597
     * @throws SAXException
598
     * @throws UnsupportedType
599
     * @throws NotFound
600
     * @throws NotImplemented
601
     * @throws XPathExpressionException
602
     * @throws ServiceFailure
603
     * @throws OREParserException
604
     */
605
    private void remove(String pid, SystemMetadata sysmeta) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
606
        if (isDataPackage(pid, sysmeta)) {
607
            removeDataPackage(pid);
608
        } else if (isPartOfDataPackage(pid)) {
609
            removeFromDataPackage(pid);
610
        } else {
611
            removeFromIndex(pid);
612
        }
613
    }
614
615
    /*
616
     * Remove a resource map pid
617
     */
618
    private void removeDataPackage(String pid) throws ServiceFailure, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, OREParserException  {
619
        Document resourceMapDoc = generateXmlDocument(DistributedMapsFactory.getDataObject(pid));
620
        //ResourceMap resourceMap = new ResourceMap(resourceMapDoc);
621
        ResourceMap resourceMap = ResourceMapFactory.buildResourceMap(resourceMapDoc);
622
        List<String> documentIds = resourceMap.getAllDocumentIDs();
623
        List<SolrDoc> indexDocuments =ResourceMapSubprocessor.getSolrDocs(documentIds);
624
        removeFromIndex(pid);
625
        //List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
626
        // for each document in data package:
627
        for (SolrDoc indexDoc : indexDocuments) {
628 7786 tao
629 8343 tao
            if (indexDoc.getIdentifier().equals(pid)) {
630
                continue; // skipping the resource map, no need update
631
                          // it.
632
                          // will
633
                          // be removed.
634
            }
635
636
            // Remove resourceMap reference
637
            indexDoc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
638
                    resourceMap.getIdentifier());
639
640
            // // Remove documents/documentedby values for this resource
641
            // map
642
            for (ResourceEntry entry : resourceMap.getMappedReferences()) {
643
                if (indexDoc.getIdentifier().equals(entry.getIdentifier())) {
644
                    for (String documentedBy : entry.getDocumentedBy()) {
645
                        // Using removeOneFieldWithValue in-case same
646
                        // documents
647
                        // are in more than one data package. just
648
                        // remove
649
                        // one
650
                        // instance of data package info.
651
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY,
652
                                documentedBy);
653
                    }
654
                    for (String documents : entry.getDocuments()) {
655
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_DOCUMENTS,
656
                                documents);
657
                    }
658
                    break;
659
                }
660
            }
661
            removeFromIndex(indexDoc.getIdentifier());
662
            insertToIndex(indexDoc);
663
            //docsToUpdate.add(indexDoc);
664
        }
665
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
666
        //httpService.sendUpdate(solrIndexUri, addCommand);
667
    }
668
669 8344 tao
    /*
670
     * Remove a pid which is part of resource map.
671
     */
672 8343 tao
    private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException  {
673
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
674
        removeFromIndex(pid);
675
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
676
677
        List<String> documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
678
        for (String documentsValue : documents) {
679
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentsValue);
680
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY, pid);
681
            removeFromIndex(documentsValue);
682
            insertToIndex(solrDoc);
683
        }
684
685
        List<String> documentedBy = indexedDoc
686
                .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
687
        for (String documentedByValue : documentedBy) {
688
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentedByValue);
689 8345 tao
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_DOCUMENTS, pid);
690 8343 tao
            //docsToUpdate.add(solrDoc);
691
            removeFromIndex(documentedByValue);
692
            insertToIndex(solrDoc);
693
        }
694
695
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
696
        //httpService.sendUpdate(solrIndexUri, addCommand);
697
    }
698
699
    /*
700
     * Remove a pid from the solr index
701
     */
702 8344 tao
    private synchronized void removeFromIndex(String pid) throws SolrServerException, IOException {
703 8343 tao
        if(pid != null && !pid.trim().equals("")) {
704
            /*IndexEvent event = new IndexEvent();
705
            event.setDate(Calendar.getInstance().getTime());
706
            Identifier identifier = new Identifier();
707
            identifier.setValue(pid);
708
            event.setIdentifier(identifier);*/
709
            try {
710
                solrServer.deleteById(pid);
711
                solrServer.commit();
712
                /*event.setType(IndexEvent.SUCCESSDELETE);
713
                event.setDescription("Successfully remove the solr index for the id "+identifier.getValue());
714
                try {
715
                    EventlogFactory.createIndexEventLog().write(event);
716
                } catch (Exception e) {
717
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+e.getMessage());
718
                }*/
719
            } catch (SolrServerException e) {
720
                /*event.setAction(Event.DELETE);
721
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
722
                try {
723
                    EventlogFactory.createIndexEventLog().write(event);
724
                } catch (Exception ee) {
725
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
726
                }*/
727
                throw e;
728
729
            } catch (IOException e) {
730
                /*event.setAction(Event.DELETE);
731
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
732
                try {
733
                    EventlogFactory.createIndexEventLog().write(event);
734
                } catch (Exception ee) {
735
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
736
                }*/
737
                throw e;
738
            }
739
740
        }
741
    }
742
743 7569 tao
    /**
744
     * Get the solrServer
745
     * @return
746
     */
747 7604 tao
    public SolrServer getSolrServer() {
748 7569 tao
        return solrServer;
749
    }
750
751
    /**
752 7604 tao
     * Set the solrServer.
753 7569 tao
     * @param solrServer
754
     */
755 7604 tao
    public void setSolrServer(SolrServer solrServer) {
756 7569 tao
        this.solrServer = solrServer;
757
    }
758 7604 tao
759
    /**
760 7606 tao
     * Get all indexed ids in the solr server.
761
     * @return an empty list if there is no index.
762 7604 tao
     * @throws SolrServerException
763
     */
764
    public List<String> getSolrIds() throws SolrServerException {
765
        List<String> list = new ArrayList<String>();
766
        SolrQuery query = new SolrQuery(IDQUERY);
767
        query.setRows(Integer.MAX_VALUE);
768
        query.setFields(ID);
769
        QueryResponse response = solrServer.query(query);
770
        SolrDocumentList docs = response.getResults();
771
        if(docs != null) {
772
            for(SolrDocument doc :docs) {
773
                String identifier = (String)doc.getFieldValue(ID);
774
                //System.out.println("======================== "+identifier);
775
                list.add(identifier);
776
            }
777
        }
778
        return list;
779
    }
780 8343 tao
781
    private void writeEventLog(SystemMetadata systemMetadata, Identifier pid, String error) {
782
        IndexEvent event = new IndexEvent();
783
        event.setIdentifier(pid);
784
        event.setDate(Calendar.getInstance().getTime());
785
        String action = null;
786
        if (systemMetadata == null ) {
787
            action = Event.CREATE.xmlValue();
788
            event.setAction(Event.CREATE);
789
        }
790
        else if(systemMetadata.getArchived()) {
791
            action = Event.DELETE.xmlValue();
792
            event.setAction(Event.DELETE);
793
        } else {
794
            action = Event.CREATE.xmlValue();
795
            event.setAction(Event.CREATE);
796
        }
797
        event.setDescription("Failed to "+action+"the solr index for the id "+pid.getValue()+" since "+error);
798
        try {
799
            EventlogFactory.createIndexEventLog().write(event);
800
        } catch (Exception ee) {
801
            log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
802
        }
803
    }
804 7542 tao
}