Project

General

Profile

1
/**
2
 *  Copyright: 2013 Regents of the University of California and the
3
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20

    
21
import java.io.ByteArrayInputStream;
22
import java.io.FileInputStream;
23
import java.io.FileNotFoundException;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.net.MalformedURLException;
27
import java.util.ArrayList;
28
import java.util.Calendar;
29
import java.util.Date;
30
import java.util.HashMap;
31
import java.util.Iterator;
32
import java.util.List;
33
import java.util.Map;
34
import java.util.Set;
35

    
36
import javax.xml.parsers.DocumentBuilder;
37
import javax.xml.parsers.DocumentBuilderFactory;
38
import javax.xml.parsers.ParserConfigurationException;
39
import javax.xml.xpath.XPath;
40
import javax.xml.xpath.XPathExpressionException;
41
import javax.xml.xpath.XPathFactory;
42

    
43
import org.apache.commons.codec.EncoderException;
44
import org.apache.commons.io.output.ByteArrayOutputStream;
45
import org.apache.commons.lang.StringUtils;
46
import org.apache.commons.logging.Log;
47
import org.apache.commons.logging.LogFactory;
48
import org.apache.solr.client.solrj.SolrQuery;
49
import org.apache.solr.client.solrj.SolrServer;
50
import org.apache.solr.client.solrj.SolrServerException;
51
import org.apache.solr.client.solrj.response.QueryResponse;
52
import org.apache.solr.client.solrj.response.UpdateResponse;
53
import org.apache.solr.common.SolrDocument;
54
import org.apache.solr.common.SolrDocumentList;
55
import org.apache.solr.common.SolrInputDocument;
56
import org.apache.solr.schema.IndexSchema;
57
import org.dataone.cn.indexer.XMLNamespaceConfig;
58
import org.dataone.cn.indexer.convert.SolrDateConverter;
59
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
60
import org.dataone.cn.indexer.parser.SolrField;
61
import org.dataone.cn.indexer.resourcemap.ResourceEntry;
62
import org.dataone.cn.indexer.resourcemap.ResourceMap;
63
import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
64
import org.dataone.cn.indexer.solrhttp.SolrDoc;
65
import org.dataone.cn.indexer.solrhttp.SolrElementField;
66
import org.dataone.service.exceptions.NotFound;
67
import org.dataone.service.exceptions.NotImplemented;
68
import org.dataone.service.exceptions.ServiceFailure;
69
import org.dataone.service.exceptions.UnsupportedType;
70
import org.dataone.service.types.v1.Event;
71
import org.dataone.service.types.v1.Identifier;
72
import org.dataone.service.types.v1.SystemMetadata;
73
import org.dataone.service.util.DateTimeMarshaller;
74
import org.dataone.service.util.TypeMarshaller;
75
import org.dspace.foresite.OREParserException;
76
import org.jibx.runtime.JiBXException;
77
import org.w3c.dom.Document;
78
import org.xml.sax.SAXException;
79

    
80
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
81
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
82
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
83
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
84

    
85
/**
86
 * A class does insert, update and remove indexes to a SOLR server
87
 * @author tao
88
 *
89
 */
90
public class SolrIndex {
91
            
92
    public static final String ID = "id";
93
    private static final String IDQUERY = ID+":*";
94
    private List<IDocumentSubprocessor> subprocessors = null;
95
    private SolrServer solrServer = null;
96
    private XMLNamespaceConfig xmlNamespaceConfig = null;
97
    private List<SolrField> sysmetaSolrFields = null;
98

    
99
    private static DocumentBuilderFactory documentBuilderFactory = null;
100
    private static DocumentBuilder builder = null;
101

    
102
    private static XPathFactory xpathFactory = null;
103
    private static XPath xpath = null;
104
    Log log = LogFactory.getLog(SolrIndex.class);
105
    
106
    static {
107
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
108
        documentBuilderFactory.setNamespaceAware(true);
109
        try {
110
            builder = documentBuilderFactory.newDocumentBuilder();
111
        } catch (ParserConfigurationException e) {
112
            e.printStackTrace();
113
        }
114
        xpathFactory = XPathFactory.newInstance();
115
        xpath = xpathFactory.newXPath();
116
    }
117
    
118
    /**
119
     * Constructor
120
     * @throws SAXException 
121
     * @throws IOException 
122
     */
123
    public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, List<SolrField> sysmetaSolrFields)
124
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
125
         this.xmlNamespaceConfig = xmlNamespaceConfig;
126
         this.sysmetaSolrFields = sysmetaSolrFields;
127
         init();
128
    }
129
    
130
    private void init() throws ParserConfigurationException, XPathExpressionException {
131
        xpath.setNamespaceContext(xmlNamespaceConfig);
132
        initExpressions();
133
    }
134

    
135
    private void initExpressions() throws XPathExpressionException {
136
        for (SolrField field : sysmetaSolrFields) {
137
            field.initExpression(xpath);
138
        }
139

    
140
    }
141
    
142
    
143
    /**
144
     * Get the list of the Subprocessors in this index.
145
     * @return the list of the Subprocessors.
146
     */
147
    public List<IDocumentSubprocessor> getSubprocessors() {
148
        return subprocessors;
149
    }
150

    
151
    /**
152
     * Set the list of Subprocessors.
153
     * @param subprocessorList  the list will be set.
154
     */
155
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
156
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
157
            subprocessor.initExpression(xpath);
158
        }
159
        this.subprocessors = subprocessorList;
160
    }
161
    
162
    /**
163
     * Generate the index for the given information
164
     * @param id
165
     * @param systemMetadata
166
     * @param dataStream
167
     * @return
168
     * @throws IOException
169
     * @throws SAXException
170
     * @throws ParserConfigurationException
171
     * @throws XPathExpressionException
172
     * @throws JiBXException 
173
     * @throws SolrServerException 
174
     * @throws EncoderException
175
     * @throws UnsupportedType 
176
     * @throws NotFound 
177
     * @throws NotImplemented 
178
     */
179
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
180
                    throws IOException, SAXException, ParserConfigurationException,
181
                    XPathExpressionException, JiBXException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
182

    
183
        // Load the System Metadata document
184
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
185
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
186
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
187
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
188
        if (sysMetaDoc == null) {
189
            log.error("Could not load System metadata for ID: " + id);
190
            return null;
191
        }
192

    
193
        // Extract the field values from the System Metadata
194
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
195
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
196
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
197
        docs.put(id, indexDocument);
198

    
199
        // Determine if subprocessors are available for this ID
200
        if (subprocessors != null) {
201
                    // for each subprocessor loaded from the spring config
202
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
203
                        // Does this subprocessor apply?
204
                        if (subprocessor.canProcess(sysMetaDoc)) {
205
                            // if so, then extract the additional information from the
206
                            // document.
207
                            try {
208
                                // docObject = the resource map document or science
209
                                // metadata document.
210
                                // note that resource map processing touches all objects
211
                                // referenced by the resource map.
212
                                Document docObject = generateXmlDocument(dataStream);
213
                                if (docObject == null) {
214
                                    throw new Exception("Could not load OBJECT for ID " + id );
215
                                } else {
216
                                    docs = subprocessor.processDocument(id, docs, docObject);
217
                                }
218
                            } catch (Exception e) {
219
                                log.error(e.getMessage(), e);
220
                                throw new SolrServerException(e.getMessage());
221
                            }
222
                        }
223
                    }
224
       }
225

    
226
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
227
       // merge is only for resource map. We need more work here.
228
       for (SolrDoc mergeDoc : docs.values()) {
229
           if (!mergeDoc.isMerged()) {
230
                 mergeWithIndexedDocument(mergeDoc);
231
           }
232
       }
233

    
234
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
235
               
236
       return docs;
237
    }
238
    
239
    /**
240
     * Merge updates with existing solr documents
241
     * 
242
     * This method appears to re-set the data package field data into the
243
     * document about to be updated in the solr index. Since packaging
244
     * information is derived from the package document (resource map), this
245
     * information is not present when processing a document contained in a data
246
     * package. This method replaces those values from the existing solr index
247
     * record for the document being processed. -- sroseboo, 1-18-12
248
     * 
249
     * @param indexDocument
250
     * @return
251
     * @throws IOException
252
     * @throws EncoderException
253
     * @throws XPathExpressionException
254
     * @throws SAXException 
255
     * @throws ParserConfigurationException 
256
     * @throws SolrServerException 
257
     * @throws UnsupportedType 
258
     * @throws NotFound 
259
     * @throws NotImplemented 
260
     */
261
    // TODO:combine merge function with resourcemap merge function
262

    
263
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
264
            EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
265
        List<String> ids = new ArrayList<String>();
266
        ids.add(indexDocument.getIdentifier());
267
        List<SolrDoc> indexedDocuments = ResourceMapSubprocessor.getSolrDocs(ids);
268
        SolrDoc indexedDocument = indexedDocuments == null || indexedDocuments.size() <= 0 ? null
269
                : indexedDocuments.get(0);
270
        
271
        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
272

    
273
        if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
274
            return indexDocument;
275
        } else {
276
            for (SolrElementField field : indexedDocument.getFieldList()) {
277
                if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY)
278
                        || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field
279
                        .getName().equals(SolrElementField.FIELD_RESOURCEMAP))
280
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
281
                    indexDocument.addField(field);
282
                } else if (!indexSchema.isCopyFieldTarget(indexSchema.getField(field.getName())) && !indexDocument.hasField(field.getName())) {
283
                    indexDocument.addField(field);
284
                }
285
            }
286

    
287
            indexDocument.setMerged(true);
288
            return indexDocument;
289
        }
290
    }
291
    
292
    /*
293
     * Generate a Document from the InputStream
294
     */
295
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
296
        Document doc = null;
297

    
298
        try {
299
            doc = builder.parse(smdStream);
300
        } catch (IOException e) {
301
            log.error(e.getMessage(), e);
302
        }
303

    
304
        return doc;
305
    }
306
    
307
    /*
308
     * Index the fields of the system metadata
309
     */
310
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
311

    
312
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
313
        // solrFields is the list of fields defined in the application context
314
       
315
        for (SolrField field : sysmetaSolrFields) {
316
            try {
317
                // the field.getFields method can return a single value or
318
                // multiple values for multi-valued fields
319
                // or can return multiple SOLR document fields.
320
                fieldList.addAll(field.getFields(doc, identifier));
321
            } catch (Exception e) {
322
                e.printStackTrace();
323
            }
324
        }
325
        return fieldList;
326

    
327
    }
328
    
329
    /**
330
     * Check the parameters of the insert or update methods.
331
     * @param pid
332
     * @param systemMetadata
333
     * @param data
334
     * @throws SolrServerException
335
     */
336
    private void checkParams(Identifier pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException {
337
        if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) {
338
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
339
        }
340
        if(systemMetadata == null) {
341
            throw new SolrServerException("The system metadata of the indexed document "+pid.getValue()+ " should not be null.");
342
        }
343
        if(data == null) {
344
            throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null.");
345
        }
346
    }
347
    
348
    /**
349
     * Insert the indexes for a document.
350
     * @param pid  the id of this document
351
     * @param systemMetadata  the system metadata associated with the data object
352
     * @param data  the data object itself
353
     * @throws SolrServerException 
354
     * @throws JiBXException 
355
     * @throws EncoderException 
356
     * @throws UnsupportedType 
357
     * @throws NotFound 
358
     * @throws NotImplemented 
359
     */
360
    private synchronized void insert(Identifier pid, SystemMetadata systemMetadata, InputStream data) 
361
                    throws IOException, SAXException, ParserConfigurationException,
362
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType {
363
        checkParams(pid, systemMetadata, data);
364
        Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, data);
365
        
366
        //transform the Map to the SolrInputDocument which can be used by the solr server
367
        if(docs != null) {
368
            Set<String> ids = docs.keySet();
369
            for(String id : ids) {
370
                if(id != null) {
371
                    SolrDoc doc = docs.get(id);
372
                    insertToIndex(doc);
373
                }
374
                
375
            }
376
        }
377
    }
378
    
379
    /**
380
     * Adds the given fields to the solr index for the given pid, preserving the index values
381
     * that previously existed
382
     * @param pid
383
     * @param fields
384
     */
385
    public void insertFields(Identifier pid, Map<String, List<Object>> fields) {
386
    	
387
    	try {
388
			// copy the original values already indexed for this document	
389
	    	SolrQuery query = new SolrQuery("id:\"" + pid.getValue() + "\"");
390
	    	QueryResponse res = solrServer.query(query);
391
	    	SolrDoc doc = new SolrDoc();
392
	    	
393
	    	// include existing values if they exist
394
	        if (res.getResults().size() > 0) {
395
		        SolrDocument orig = res.getResults().get(0);
396
		        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
397
		    	for (String fieldName: orig.getFieldNames()) {
398
		        	//  don't transfer the copyTo fields, otherwise there are errors
399
		        	if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
400
		        		continue;
401
		        	}
402
		        	for (Object value: orig.getFieldValues(fieldName)) {
403
		        		String stringValue = value.toString();
404
		        		// special handling for dates in ISO 8601
405
		        		if (value instanceof Date) {
406
		        			stringValue = DateTimeMarshaller.serializeDateToUTC((Date)value);
407
		        			SolrDateConverter converter = new SolrDateConverter();
408
		        			stringValue = converter.convert(stringValue);
409
		        		}
410
						SolrElementField field = new SolrElementField(fieldName, stringValue);
411
						log.debug("Adding field: " + fieldName);
412
						doc.addField(field);
413
		        	}
414
		        }
415
	        }
416
	    	
417
	        // add the additional fields we are trying to include in the index
418
	        for (String fieldName: fields.keySet()) {
419
	    		List<Object> values = fields.get(fieldName);
420
	    		for (Object value: values) {
421
	    			if (!doc.hasFieldWithValue(fieldName, value.toString())) {
422
		    			doc.addField(new SolrElementField(fieldName, value.toString()));
423
	    			}
424
	    	    	//doc.updateOrAddField(fieldName, value.toString());
425
	    		}
426
	    	}
427
	        
428
	        // make sure there is an id in the solrdoc so it is added to the index
429
	        if (!doc.hasField(ID)) {
430
	        	doc.updateOrAddField(ID, pid.getValue());
431
	        }
432
	        
433
	        // insert the whole thing
434
	        insertToIndex(doc);
435
    	} catch (Exception e) {
436
    		String error = "SolrIndex.insetFields - could not update the solr index: " + e.getMessage();
437
            writeEventLog(null, pid, error);
438
            log.error(error, e);
439
    	}
440

    
441
    }
442
    
443
    /*
444
     * Insert a SolrDoc to the solr server.
445
     */
446
    private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
447
        if(doc != null ) {
448
            SolrInputDocument solrDoc = new SolrInputDocument();
449
            List<SolrElementField> list = doc.getFieldList();
450
            if(list != null) {
451
                //solrDoc.addField(METACATPIDFIELD, pid);
452
                Iterator<SolrElementField> iterator = list.iterator();
453
                while (iterator.hasNext()) {
454
                    SolrElementField field = iterator.next();
455
                    if(field != null) {
456
                        String value = field.getValue();
457
                        String name = field.getName();
458
                        //System.out.println("add name/value pair - "+name+"/"+value);
459
                        solrDoc.addField(name, value);
460
                    }
461
                }
462
            }
463
            if(!solrDoc.isEmpty()) {
464
                /*IndexEvent event = new IndexEvent();
465
                event.setDate(Calendar.getInstance().getTime());
466
                Identifier pid = new Identifier();
467
                pid.setValue(doc.getIdentifier());
468
                event.setIdentifier(pid);*/
469
                try {
470
                    UpdateResponse response = solrServer.add(solrDoc);
471
                    solrServer.commit();
472
                    /*event.setType(IndexEvent.SUCCESSINSERT);
473
                    event.setDescription("Successfully insert the solr index for the id "+pid.getValue());
474
                    try {
475
                        EventlogFactory.createIndexEventLog().write(event);
476
                    } catch (Exception e) {
477
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+e.getMessage());
478
                    }*/
479
                } catch (SolrServerException e) {
480
                    /*event.setAction(Event.CREATE);
481
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
482
                    try {
483
                        EventlogFactory.createIndexEventLog().write(event);
484
                    } catch (Exception ee) {
485
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
486
                    }*/
487
                    throw e;
488
                } catch (IOException e) {
489
                    /*event.setAction(Event.CREATE);
490
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
491
                    try {
492
                        EventlogFactory.createIndexEventLog().write(event);
493
                    } catch (Exception ee) {
494
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
495
                    }*/
496
                    throw e;
497
                    
498
                }
499
                //System.out.println("=================the response is:\n"+response.toString());
500
            }
501
        }
502
    }
503
    
504
    /**
505
     * Update the solr index. This method handles the three scenarios:
506
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
507
     *    remove the index for the previous version(s) and generate new index for the doc.
508
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
509
     *    index for the doc.
510
     */
511
    public void update(Identifier pid, SystemMetadata systemMetadata) {
512
        String objectPath = null;
513
        InputStream data = null;
514
        try {
515
            objectPath = DistributedMapsFactory.getObjectPathMap().get(pid);
516
            data = new FileInputStream(objectPath);
517
            update(pid, systemMetadata, data);
518
            EventlogFactory.createIndexEventLog().remove(pid);
519
        } catch (Exception e) {
520
            String error = "SolrIndex.update - could not update the solr index since " + e.getMessage();
521
            writeEventLog(systemMetadata, pid, error);
522
            log.error(error, e);
523
        }
524
    }
525
    
526
    
527
    /**
528
     * Update the solr index. This method handles the three scenarios:
529
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
530
     *    remove the index for the previous version(s) and generate new index for the doc.
531
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
532
     *    index for the doc.
533
     * @param pid
534
     * @param systemMetadata
535
     * @param data
536
     * @throws SolrServerException
537
     * @throws ServiceFailure
538
     * @throws XPathExpressionException
539
     * @throws NotImplemented
540
     * @throws NotFound
541
     * @throws UnsupportedType
542
     * @throws IOException
543
     * @throws SAXException
544
     * @throws ParserConfigurationException
545
     * @throws OREParserException
546
     * @throws JiBXException
547
     * @throws EncoderException
548
     */
549
    void update(Identifier pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException, 
550
                                ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, 
551
                                IOException, SAXException, ParserConfigurationException, OREParserException, JiBXException, EncoderException {
552
        checkParams(pid, systemMetadata, data);
553
        boolean isArchive = systemMetadata.getArchived() != null && systemMetadata.getArchived();
554
        if(isArchive ) {
555
            //delete the index for the archived objects
556
            remove(pid.getValue(), systemMetadata);
557
            log.info("SolrIndex.update============================= archive the idex for the identifier "+pid);
558
        } else {
559
            //generate index for either add or update.
560
            insert(pid, systemMetadata, data);
561
            log.info("SolrIndex.update============================= insert index for the identifier "+pid);
562
        }
563
    }
564
    
565
   
566

    
567
    /*
568
     * Is the pid a resource map
569
     */
570
    private boolean isDataPackage(String pid, SystemMetadata sysmeta) throws FileNotFoundException, ServiceFailure {
571
        boolean isDataPackage = false;
572
        //SystemMetadata sysmeta = DistributedMapsFactory.getSystemMetadata(pid);
573
        if(sysmeta != null) {
574
            isDataPackage = IndexGeneratorTimerTask.isResourceMap(sysmeta.getFormatId());
575
        }
576
        return isDataPackage;
577
    }
578

    
579
    private boolean isPartOfDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
580
        SolrDoc dataPackageIndexDoc = ResourceMapSubprocessor.getSolrDoc(pid);
581
        if (dataPackageIndexDoc != null) {
582
            String resourceMapId = dataPackageIndexDoc
583
                    .getFirstFieldValue(SolrElementField.FIELD_RESOURCEMAP);
584
            return StringUtils.isNotEmpty(resourceMapId);
585
        } else {
586
            return false;
587
        }
588
    }
589
    /**
590
     * Remove the indexed associated with specified pid.
591
     * @param pid  the pid which the indexes are associated with
592
     * @throws IOException
593
     * @throws SolrServerException
594
     * @throws ParserConfigurationException 
595
     * @throws SAXException 
596
     * @throws UnsupportedType 
597
     * @throws NotFound 
598
     * @throws NotImplemented 
599
     * @throws XPathExpressionException 
600
     * @throws ServiceFailure 
601
     * @throws OREParserException 
602
     */
603
    private void remove(String pid, SystemMetadata sysmeta) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
604
        if (isDataPackage(pid, sysmeta)) {
605
            removeDataPackage(pid);
606
        } else if (isPartOfDataPackage(pid)) {
607
            removeFromDataPackage(pid);
608
        } else {
609
            removeFromIndex(pid);
610
        }
611
    }
612
    
613
    /*
614
     * Remove a resource map pid
615
     */
616
    private void removeDataPackage(String pid) throws ServiceFailure, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, OREParserException  {
617
        Document resourceMapDoc = generateXmlDocument(DistributedMapsFactory.getDataObject(pid));
618
        //ResourceMap resourceMap = new ResourceMap(resourceMapDoc);
619
        ResourceMap resourceMap = ResourceMapFactory.buildResourceMap(resourceMapDoc);
620
        List<String> documentIds = resourceMap.getAllDocumentIDs();
621
        List<SolrDoc> indexDocuments =ResourceMapSubprocessor.getSolrDocs(documentIds);
622
        removeFromIndex(pid);
623
        //List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
624
        // for each document in data package:
625
        for (SolrDoc indexDoc : indexDocuments) {
626

    
627
            if (indexDoc.getIdentifier().equals(pid)) {
628
                continue; // skipping the resource map, no need update
629
                          // it.
630
                          // will
631
                          // be removed.
632
            }
633

    
634
            // Remove resourceMap reference
635
            indexDoc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
636
                    resourceMap.getIdentifier());
637

    
638
            // // Remove documents/documentedby values for this resource
639
            // map
640
            for (ResourceEntry entry : resourceMap.getMappedReferences()) {
641
                if (indexDoc.getIdentifier().equals(entry.getIdentifier())) {
642
                    for (String documentedBy : entry.getDocumentedBy()) {
643
                        // Using removeOneFieldWithValue in-case same
644
                        // documents
645
                        // are in more than one data package. just
646
                        // remove
647
                        // one
648
                        // instance of data package info.
649
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY,
650
                                documentedBy);
651
                    }
652
                    for (String documents : entry.getDocuments()) {
653
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_DOCUMENTS,
654
                                documents);
655
                    }
656
                    break;
657
                }
658
            }
659
            removeFromIndex(indexDoc.getIdentifier());
660
            insertToIndex(indexDoc);
661
            //docsToUpdate.add(indexDoc);
662
        }
663
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
664
        //httpService.sendUpdate(solrIndexUri, addCommand);
665
    }
666

    
667
    /*
668
     * Remove a pid which is part of resource map.
669
     */
670
    private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException  {
671
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
672
        removeFromIndex(pid);
673
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
674

    
675
        List<String> documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
676
        for (String documentsValue : documents) {
677
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentsValue);
678
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY, pid);
679
            removeFromIndex(documentsValue);
680
            insertToIndex(solrDoc);
681
        }
682

    
683
        List<String> documentedBy = indexedDoc
684
                .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
685
        for (String documentedByValue : documentedBy) {
686
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentedByValue);
687
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_DOCUMENTS, pid);
688
            //docsToUpdate.add(solrDoc);
689
            removeFromIndex(documentedByValue);
690
            insertToIndex(solrDoc);
691
        }
692

    
693
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
694
        //httpService.sendUpdate(solrIndexUri, addCommand);
695
    }
696

    
697
    /*
698
     * Remove a pid from the solr index
699
     */
700
    private synchronized void removeFromIndex(String pid) throws SolrServerException, IOException {
701
        if(pid != null && !pid.trim().equals("")) {
702
            /*IndexEvent event = new IndexEvent();
703
            event.setDate(Calendar.getInstance().getTime());
704
            Identifier identifier = new Identifier();
705
            identifier.setValue(pid);
706
            event.setIdentifier(identifier);*/
707
            try {
708
                solrServer.deleteById(pid);
709
                solrServer.commit();
710
                /*event.setType(IndexEvent.SUCCESSDELETE);
711
                event.setDescription("Successfully remove the solr index for the id "+identifier.getValue());
712
                try {
713
                    EventlogFactory.createIndexEventLog().write(event);
714
                } catch (Exception e) {
715
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+e.getMessage());
716
                }*/
717
            } catch (SolrServerException e) {
718
                /*event.setAction(Event.DELETE);
719
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
720
                try {
721
                    EventlogFactory.createIndexEventLog().write(event);
722
                } catch (Exception ee) {
723
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
724
                }*/
725
                throw e;
726
                
727
            } catch (IOException e) {
728
                /*event.setAction(Event.DELETE);
729
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
730
                try {
731
                    EventlogFactory.createIndexEventLog().write(event);
732
                } catch (Exception ee) {
733
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
734
                }*/
735
                throw e;
736
            }
737
            
738
        }
739
    }
740

    
741
    /**
742
     * Get the solrServer
743
     * @return
744
     */
745
    public SolrServer getSolrServer() {
746
        return solrServer;
747
    }
748

    
749
    /**
750
     * Set the solrServer. 
751
     * @param solrServer
752
     */
753
    public void setSolrServer(SolrServer solrServer) {
754
        this.solrServer = solrServer;
755
    }
756
    
757
    /**
758
     * Get all indexed ids in the solr server. 
759
     * @return an empty list if there is no index.
760
     * @throws SolrServerException
761
     */
762
    public List<String> getSolrIds() throws SolrServerException {
763
        List<String> list = new ArrayList<String>();
764
        SolrQuery query = new SolrQuery(IDQUERY); 
765
        query.setRows(Integer.MAX_VALUE); 
766
        query.setFields(ID); 
767
        QueryResponse response = solrServer.query(query); 
768
        SolrDocumentList docs = response.getResults();
769
        if(docs != null) {
770
            for(SolrDocument doc :docs) {
771
                String identifier = (String)doc.getFieldValue(ID);
772
                //System.out.println("======================== "+identifier);
773
                list.add(identifier);
774
            }
775
        }
776
        return list;
777
    }
778
    
779
    private void writeEventLog(SystemMetadata systemMetadata, Identifier pid, String error) {
780
        IndexEvent event = new IndexEvent();
781
        event.setIdentifier(pid);
782
        event.setDate(Calendar.getInstance().getTime());
783
        String action = null;
784
        if (systemMetadata == null ) {
785
            action = Event.CREATE.xmlValue();
786
            event.setAction(Event.CREATE);
787
        }
788
        else if(systemMetadata.getArchived()) {
789
            action = Event.DELETE.xmlValue();
790
            event.setAction(Event.DELETE);
791
        } else {
792
            action = Event.CREATE.xmlValue();
793
            event.setAction(Event.CREATE);
794
        }
795
        event.setDescription("Failed to "+action+"the solr index for the id "+pid.getValue()+" since "+error);
796
        try {
797
            EventlogFactory.createIndexEventLog().write(event);
798
        } catch (Exception ee) {
799
            log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
800
        }
801
    }
802
}
(5-5/6)