Project

General

Profile

1
/**
2
 *  Copyright: 2013 Regents of the University of California and the
3
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20

    
21
import java.io.ByteArrayInputStream;
22
import java.io.FileInputStream;
23
import java.io.FileNotFoundException;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.net.MalformedURLException;
27
import java.util.ArrayList;
28
import java.util.Calendar;
29
import java.util.Collection;
30
import java.util.Date;
31
import java.util.HashMap;
32
import java.util.Iterator;
33
import java.util.List;
34
import java.util.Map;
35
import java.util.Set;
36

    
37
import javax.xml.parsers.DocumentBuilder;
38
import javax.xml.parsers.DocumentBuilderFactory;
39
import javax.xml.parsers.ParserConfigurationException;
40
import javax.xml.xpath.XPath;
41
import javax.xml.xpath.XPathExpressionException;
42
import javax.xml.xpath.XPathFactory;
43

    
44
import org.apache.commons.codec.EncoderException;
45
import org.apache.commons.collections.CollectionUtils;
46
import org.apache.commons.io.output.ByteArrayOutputStream;
47
import org.apache.commons.lang.StringUtils;
48
import org.apache.commons.logging.Log;
49
import org.apache.commons.logging.LogFactory;
50
import org.apache.solr.client.solrj.SolrQuery;
51
import org.apache.solr.client.solrj.SolrServer;
52
import org.apache.solr.client.solrj.SolrServerException;
53
import org.apache.solr.client.solrj.response.QueryResponse;
54
import org.apache.solr.client.solrj.response.UpdateResponse;
55
import org.apache.solr.common.SolrDocument;
56
import org.apache.solr.common.SolrDocumentList;
57
import org.apache.solr.common.SolrInputDocument;
58
import org.apache.solr.schema.IndexSchema;
59
import org.dataone.cn.indexer.XMLNamespaceConfig;
60
import org.dataone.cn.indexer.convert.SolrDateConverter;
61
import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor;
62
import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor;
63
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
64
import org.dataone.cn.indexer.parser.SolrField;
65
import org.dataone.cn.indexer.solrhttp.SolrDoc;
66
import org.dataone.cn.indexer.solrhttp.SolrElementField;
67
import org.dataone.exceptions.MarshallingException;
68
import org.dataone.service.exceptions.NotFound;
69
import org.dataone.service.exceptions.NotImplemented;
70
import org.dataone.service.exceptions.ServiceFailure;
71
import org.dataone.service.exceptions.UnsupportedType;
72
import org.dataone.service.types.v1.Event;
73
import org.dataone.service.types.v1.Identifier;
74
import org.dataone.service.types.v2.SystemMetadata;
75
import org.dataone.service.util.DateTimeMarshaller;
76
import org.dataone.service.util.TypeMarshaller;
77
import org.dspace.foresite.OREParserException;
78
import org.jibx.runtime.JiBXException;
79
import org.w3c.dom.Document;
80
import org.xml.sax.SAXException;
81

    
82
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
83
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
84
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
85
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
86

    
87
/**
88
 * A class does insert, update and remove indexes to a SOLR server
89
 * @author tao
90
 *
91
 */
92
public class SolrIndex {
93
            
94
    public static final String ID = "id";
95
    private static final String IDQUERY = ID+":*";
96
    private List<IDocumentSubprocessor> subprocessors = null;
97
    private List<IDocumentDeleteSubprocessor> deleteSubprocessors = null;
98

    
99
    private SolrServer solrServer = null;
100
    private XMLNamespaceConfig xmlNamespaceConfig = null;
101
    private List<SolrField> sysmetaSolrFields = null;
102

    
103
    private static DocumentBuilderFactory documentBuilderFactory = null;
104
    private static DocumentBuilder builder = null;
105

    
106
    private static XPathFactory xpathFactory = null;
107
    private static XPath xpath = null;
108
    Log log = LogFactory.getLog(SolrIndex.class);
109
    
110
    static {
111
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
112
        documentBuilderFactory.setNamespaceAware(true);
113
        try {
114
            builder = documentBuilderFactory.newDocumentBuilder();
115
        } catch (ParserConfigurationException e) {
116
            e.printStackTrace();
117
        }
118
        xpathFactory = XPathFactory.newInstance();
119
        xpath = xpathFactory.newXPath();
120
    }
121
    
122
    /**
123
     * Constructor
124
     * @throws SAXException 
125
     * @throws IOException 
126
     */
127
    public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, List<SolrField> sysmetaSolrFields)
128
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
129
         this.xmlNamespaceConfig = xmlNamespaceConfig;
130
         this.sysmetaSolrFields = sysmetaSolrFields;
131
         init();
132
    }
133
    
134
    private void init() throws ParserConfigurationException, XPathExpressionException {
135
        xpath.setNamespaceContext(xmlNamespaceConfig);
136
        initExpressions();
137
    }
138

    
139
    private void initExpressions() throws XPathExpressionException {
140
        for (SolrField field : sysmetaSolrFields) {
141
            field.initExpression(xpath);
142
        }
143

    
144
    }
145
    
146
    
147
    /**
148
     * Get the list of the Subprocessors in this index.
149
     * @return the list of the Subprocessors.
150
     */
151
    public List<IDocumentSubprocessor> getSubprocessors() {
152
        return subprocessors;
153
    }
154

    
155
    /**
156
     * Set the list of Subprocessors.
157
     * @param subprocessorList  the list will be set.
158
     */
159
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
160
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
161
        	if (subprocessor instanceof BaseXPathDocumentSubprocessor) {
162
        		((BaseXPathDocumentSubprocessor)subprocessor).initExpression(xpath);
163
        	}
164
        }
165
        this.subprocessors = subprocessorList;
166
    }
167
    
168
    public List<IDocumentDeleteSubprocessor> getDeleteSubprocessors() {
169
		return deleteSubprocessors;
170
	}
171

    
172
	public void setDeleteSubprocessors(
173
			List<IDocumentDeleteSubprocessor> deleteSubprocessors) {
174
		this.deleteSubprocessors = deleteSubprocessors;
175
	}
176

    
177
	/**
178
     * Generate the index for the given information
179
     * @param id
180
     * @param systemMetadata
181
     * @param dataStream
182
     * @return
183
     * @throws IOException
184
     * @throws SAXException
185
     * @throws ParserConfigurationException
186
     * @throws XPathExpressionException
187
     * @throws JiBXException 
188
     * @throws SolrServerException 
189
     * @throws EncoderException
190
     * @throws UnsupportedType 
191
     * @throws NotFound 
192
     * @throws NotImplemented 
193
     */
194
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, String objectPath)
195
                    throws IOException, SAXException, ParserConfigurationException,
196
                    XPathExpressionException, MarshallingException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
197

    
198
        // Load the System Metadata document
199
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
200
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
201
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
202
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
203
        if (sysMetaDoc == null) {
204
            log.error("Could not load System metadata for ID: " + id);
205
            return null;
206
        }
207

    
208
        // Extract the field values from the System Metadata
209
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
210
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
211
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
212
        docs.put(id, indexDocument);
213
        
214
        // get the format id for this object
215
        String formatId = indexDocument.getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT);
216

    
217
        // Determine if subprocessors are available for this ID
218
        if (subprocessors != null) {
219
	        // for each subprocessor loaded from the spring config
220
	        for (IDocumentSubprocessor subprocessor : subprocessors) {
221
	            // Does this subprocessor apply?
222
	            if (subprocessor.canProcess(formatId)) {
223
	                // if so, then extract the additional information from the
224
	                // document.
225
	                try {
226
	                    // docObject = the resource map document or science
227
	                    // metadata document.
228
	                    // note that resource map processing touches all objects
229
	                    // referenced by the resource map.
230
	                	FileInputStream dataStream = new FileInputStream(objectPath);
231
	                    if (!dataStream.getFD().valid()) {
232
	                    	log.error("Could not load OBJECT file for ID,Path=" + id + ", "
233
                                    + objectPath);
234
	                        //throw new Exception("Could not load OBJECT for ID " + id );
235
	                    } else {
236
	                        docs = subprocessor.processDocument(id, docs, dataStream);
237
	                    }
238
	                } catch (Exception e) {
239
	                    e.printStackTrace();
240
	                    log.error(e.getMessage(), e);
241
	                    throw new SolrServerException(e.getMessage());
242
	                }
243
	            }
244
	        }
245
       }
246

    
247
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
248
       // merge is only for resource map. We need more work here.
249
       for (SolrDoc mergeDoc : docs.values()) {
250
           if (!mergeDoc.isMerged()) {
251
                 mergeWithIndexedDocument(mergeDoc);
252
           }
253
       }
254

    
255
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
256
               
257
       return docs;
258
    }
259
    
260
    /**
261
     * Merge updates with existing solr documents
262
     * 
263
     * This method appears to re-set the data package field data into the
264
     * document about to be updated in the solr index. Since packaging
265
     * information is derived from the package document (resource map), this
266
     * information is not present when processing a document contained in a data
267
     * package. This method replaces those values from the existing solr index
268
     * record for the document being processed. -- sroseboo, 1-18-12
269
     * 
270
     * @param indexDocument
271
     * @return
272
     * @throws IOException
273
     * @throws EncoderException
274
     * @throws XPathExpressionException
275
     * @throws SAXException 
276
     * @throws ParserConfigurationException 
277
     * @throws SolrServerException 
278
     * @throws UnsupportedType 
279
     * @throws NotFound 
280
     * @throws NotImplemented 
281
     */
282
    // TODO:combine merge function with resourcemap merge function
283

    
284
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
285
            EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
286
        List<String> ids = new ArrayList<String>();
287
        ids.add(indexDocument.getIdentifier());
288
        List<SolrDoc> indexedDocuments = ResourceMapSubprocessor.getSolrDocs(ids);
289
        SolrDoc indexedDocument = indexedDocuments == null || indexedDocuments.size() <= 0 ? null
290
                : indexedDocuments.get(0);
291
        
292
        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
293

    
294
        if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
295
            return indexDocument;
296
        } else {
297
            for (SolrElementField field : indexedDocument.getFieldList()) {
298
                if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY)
299
                        || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field
300
                        .getName().equals(SolrElementField.FIELD_RESOURCEMAP))
301
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
302
                    indexDocument.addField(field);
303
                } else if (!indexSchema.isCopyFieldTarget(indexSchema.getField(field.getName())) && !indexDocument.hasField(field.getName())) {
304
                    indexDocument.addField(field);
305
                }
306
            }
307

    
308
            indexDocument.setMerged(true);
309
            return indexDocument;
310
        }
311
    }
312
    
313
    /*
314
     * Generate a Document from the InputStream
315
     */
316
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
317
        Document doc = null;
318

    
319
        try {
320
            doc = builder.parse(smdStream);
321
        } catch (IOException e) {
322
            log.error(e.getMessage(), e);
323
        }
324

    
325
        return doc;
326
    }
327
    
328
    /*
329
     * Index the fields of the system metadata
330
     */
331
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
332

    
333
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
334
        // solrFields is the list of fields defined in the application context
335
       
336
        for (SolrField field : sysmetaSolrFields) {
337
            try {
338
                // the field.getFields method can return a single value or
339
                // multiple values for multi-valued fields
340
                // or can return multiple SOLR document fields.
341
                fieldList.addAll(field.getFields(doc, identifier));
342
            } catch (Exception e) {
343
                e.printStackTrace();
344
            }
345
        }
346
        return fieldList;
347

    
348
    }
349
    
350
    /**
351
     * Check the parameters of the insert or update methods.
352
     * @param pid
353
     * @param systemMetadata
354
     * @param data
355
     * @throws SolrServerException
356
     */
357
    private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException {
358
        if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) {
359
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
360
        }
361
        if(systemMetadata == null) {
362
            throw new SolrServerException("The system metadata of the indexed document "+pid.getValue()+ " should not be null.");
363
        }
364
        if(objectPath == null) {
365
            throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null.");
366
        }
367
    }
368
    
369
    /**
370
     * Insert the indexes for a document.
371
     * @param pid  the id of this document
372
     * @param systemMetadata  the system metadata associated with the data object
373
     * @param data  the path to the object file itself
374
     * @throws SolrServerException 
375
     * @throws JiBXException 
376
     * @throws EncoderException 
377
     * @throws UnsupportedType 
378
     * @throws NotFound 
379
     * @throws NotImplemented 
380
     */
381
    private synchronized void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath) 
382
                    throws IOException, SAXException, ParserConfigurationException,
383
                    XPathExpressionException, SolrServerException, MarshallingException, EncoderException, NotImplemented, NotFound, UnsupportedType {
384
        checkParams(pid, systemMetadata, objectPath);
385
        Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, objectPath);
386
        
387
        //transform the Map to the SolrInputDocument which can be used by the solr server
388
        if(docs != null) {
389
            Set<String> ids = docs.keySet();
390
            for(String id : ids) {
391
                if(id != null) {
392
                    SolrDoc doc = docs.get(id);
393
                    insertToIndex(doc);
394
                }
395
                
396
            }
397
        }
398
    }
399
    
400
    /**
401
     * Adds the given fields to the solr index for the given pid, preserving the index values
402
     * that previously existed
403
     * @param pid
404
     * @param fields
405
     */
406
    public void insertFields(Identifier pid, Map<String, List<Object>> fields) {
407
    	
408
    	try {
409
			// copy the original values already indexed for this document	
410
	    	SolrQuery query = new SolrQuery("id:\"" + pid.getValue() + "\"");
411
	    	QueryResponse res = solrServer.query(query);
412
	    	SolrDoc doc = new SolrDoc();
413
	    	
414
	    	// include existing values if they exist
415
	        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
416

    
417
	        if (res.getResults().size() > 0) {
418
		        SolrDocument orig = res.getResults().get(0);
419
		    	for (String fieldName: orig.getFieldNames()) {
420
		        	//  don't transfer the copyTo fields, otherwise there are errors
421
		        	if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
422
		        		continue;
423
		        	}
424
		        	for (Object value: orig.getFieldValues(fieldName)) {
425
		        		String stringValue = value.toString();
426
		        		// special handling for dates in ISO 8601
427
		        		if (value instanceof Date) {
428
		        			stringValue = DateTimeMarshaller.serializeDateToUTC((Date)value);
429
		        			SolrDateConverter converter = new SolrDateConverter();
430
		        			stringValue = converter.convert(stringValue);
431
		        		}
432
						SolrElementField field = new SolrElementField(fieldName, stringValue);
433
						log.debug("Adding field: " + fieldName);
434
						doc.addField(field);
435
		        	}
436
		        }
437
	        }
438
	    	
439
	        // add the additional fields we are trying to include in the index
440
	        for (String fieldName: fields.keySet()) {
441
	    		List<Object> values = fields.get(fieldName);
442
	    		for (Object value: values) {
443
	    			if (!doc.hasFieldWithValue(fieldName, value.toString())) {
444
	    				if (indexSchema.getField(fieldName).multiValued()) {
445
	    					doc.addField(new SolrElementField(fieldName, value.toString()));
446
	    				} else {
447
	    	    	    	doc.updateOrAddField(fieldName, value.toString());
448
	    				}
449
	    			}
450
	    		}
451
	    	}
452
	        
453
	        // make sure there is an id in the solrdoc so it is added to the index
454
	        if (!doc.hasField(ID)) {
455
	        	doc.updateOrAddField(ID, pid.getValue());
456
	        }
457
	        
458
	        // insert the whole thing
459
	        insertToIndex(doc);
460
    	} catch (Exception e) {
461
    		String error = "SolrIndex.insetFields - could not update the solr index: " + e.getMessage();
462
            writeEventLog(null, pid, error);
463
            log.error(error, e);
464
    	}
465

    
466
    }
467
    
468
    /*
469
     * Insert a SolrDoc to the solr server.
470
     */
471
    private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
472
        if(doc != null ) {
473
            SolrInputDocument solrDoc = new SolrInputDocument();
474
            List<SolrElementField> list = doc.getFieldList();
475
            if(list != null) {
476
                //solrDoc.addField(METACATPIDFIELD, pid);
477
                Iterator<SolrElementField> iterator = list.iterator();
478
                while (iterator.hasNext()) {
479
                    SolrElementField field = iterator.next();
480
                    if(field != null) {
481
                        String value = field.getValue();
482
                        String name = field.getName();
483
                        //System.out.println("add name/value pair - "+name+"/"+value);
484
                        solrDoc.addField(name, value);
485
                    }
486
                }
487
            }
488
            if(!solrDoc.isEmpty()) {
489
                /*IndexEvent event = new IndexEvent();
490
                event.setDate(Calendar.getInstance().getTime());
491
                Identifier pid = new Identifier();
492
                pid.setValue(doc.getIdentifier());
493
                event.setIdentifier(pid);*/
494
                try {
495
                    UpdateResponse response = solrServer.add(solrDoc);
496
                    solrServer.commit();
497
                    /*event.setType(IndexEvent.SUCCESSINSERT);
498
                    event.setDescription("Successfully insert the solr index for the id "+pid.getValue());
499
                    try {
500
                        EventlogFactory.createIndexEventLog().write(event);
501
                    } catch (Exception e) {
502
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+e.getMessage());
503
                    }*/
504
                } catch (SolrServerException e) {
505
                    /*event.setAction(Event.CREATE);
506
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
507
                    try {
508
                        EventlogFactory.createIndexEventLog().write(event);
509
                    } catch (Exception ee) {
510
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
511
                    }*/
512
                    throw e;
513
                } catch (IOException e) {
514
                    /*event.setAction(Event.CREATE);
515
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
516
                    try {
517
                        EventlogFactory.createIndexEventLog().write(event);
518
                    } catch (Exception ee) {
519
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
520
                    }*/
521
                    throw e;
522
                    
523
                }
524
                //System.out.println("=================the response is:\n"+response.toString());
525
            }
526
        }
527
    }
528
    
529
    /**
530
     * Update the solr index. This method handles the three scenarios:
531
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
532
     *    remove the index for the previous version(s) and generate new index for the doc.
533
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
534
     *    index for the doc.
535
     */
536
    public void update(Identifier pid, SystemMetadata systemMetadata) {
537
        if(systemMetadata==null || pid==null) {
538
            log.error("SolrIndex.update - the systemMetadata or pid is null. So nothing will be indexed.");
539
            return;
540
        }
541
        String objectPath = null;
542
        try {
543
            if (systemMetadata.getArchived() == null || !systemMetadata.getArchived()) {
544
                objectPath = DistributedMapsFactory.getObjectPathMap().get(pid);
545
            }
546
            update(pid, systemMetadata, objectPath);
547
            EventlogFactory.createIndexEventLog().remove(pid);
548
        } catch (Exception e) {
549
            String error = "SolrIndex.update - could not update the solr index since " + e.getMessage();
550
            writeEventLog(systemMetadata, pid, error);
551
            log.error(error, e);
552
        }
553
    }
554
   
555
    
556
    /**
557
     * Update the solr index. This method handles the three scenarios:
558
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
559
     *    remove the index for the previous version(s) and generate new index for the doc.
560
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
561
     *    index for the doc.
562
     * @param pid
563
     * @param systemMetadata
564
     * @param data
565
     * @throws SolrServerException
566
     * @throws ServiceFailure
567
     * @throws XPathExpressionException
568
     * @throws NotImplemented
569
     * @throws NotFound
570
     * @throws UnsupportedType
571
     * @throws IOException
572
     * @throws SAXException
573
     * @throws ParserConfigurationException
574
     * @throws OREParserException
575
     * @throws JiBXException
576
     * @throws EncoderException
577
     */
578
    void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws Exception {
579
        //checkParams(pid, systemMetadata, objectPath);
580
        if(systemMetadata==null || pid==null) {
581
            log.error("SolrIndex.update - the systemMetadata or pid is null. So nothing will be indexed.");
582
            return;
583
        }
584
        boolean isArchive = systemMetadata.getArchived() != null && systemMetadata.getArchived();
585
        if(isArchive ) {
586
            //delete the index for the archived objects
587
            remove(pid.getValue(), systemMetadata);
588
            log.info("SolrIndex.update============================= archive the idex for the identifier "+pid);
589
        } else {
590
            //generate index for either add or update.
591
            insert(pid, systemMetadata, objectPath);
592
            log.info("SolrIndex.update============================= insert index for the identifier "+pid);
593
        }
594
    }
595
    
596
   
597

    
598
    /*
599
     * Is the pid a resource map
600
     */
601
    private boolean isDataPackage(String pid, SystemMetadata sysmeta) throws FileNotFoundException, ServiceFailure {
602
        boolean isDataPackage = false;
603
        //SystemMetadata sysmeta = DistributedMapsFactory.getSystemMetadata(pid);
604
        if(sysmeta != null) {
605
            isDataPackage = IndexGeneratorTimerTask.isResourceMap(sysmeta.getFormatId());
606
        }
607
        return isDataPackage;
608
    }
609

    
610
    private boolean isPartOfDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
611
        SolrDoc dataPackageIndexDoc = ResourceMapSubprocessor.getSolrDoc(pid);
612
        if (dataPackageIndexDoc != null) {
613
            String resourceMapId = dataPackageIndexDoc
614
                    .getFirstFieldValue(SolrElementField.FIELD_RESOURCEMAP);
615
            return StringUtils.isNotEmpty(resourceMapId);
616
        } else {
617
            return false;
618
        }
619
    }
620
    /**
621
     * Remove the indexed associated with specified pid.
622
     * @param pid  the pid which the indexes are associated with
623
     * @throws IOException
624
     * @throws SolrServerException
625
     * @throws ParserConfigurationException 
626
     * @throws SAXException 
627
     * @throws UnsupportedType 
628
     * @throws NotFound 
629
     * @throws NotImplemented 
630
     * @throws XPathExpressionException 
631
     * @throws ServiceFailure 
632
     * @throws OREParserException 
633
     */
634
    private void remove(String pid, SystemMetadata sysmeta) throws Exception {
635
        if (isDataPackage(pid, sysmeta)) {
636
            removeDataPackage(pid);
637
        } else if (isPartOfDataPackage(pid)) {
638
            removeFromDataPackage(pid);
639
        } else {
640
            removeFromIndex(pid);
641
        }
642
    }
643
    
644
    /*
645
     * Remove the resource map from the solr index. It doesn't only remove the index for itself and also
646
     * remove the relationship for the related metadata and data objects.
647
     */
648
    private void removeDataPackage(String pid) throws Exception {
649
        removeFromIndex(pid);
650
        List<SolrDoc> docsToUpdate = getUpdatedSolrDocsByRemovingResourceMap(pid);
651
        if (docsToUpdate != null && !docsToUpdate.isEmpty()) {
652
            //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
653
            //httpService.sendUpdate(solrIndexUri, addCommand);
654
            for(SolrDoc doc : docsToUpdate) {
655
                removeFromIndex(doc.getIdentifier());
656
                insertToIndex(doc);
657
            }
658
        }
659

    
660
    }
661

    
662
    /*
663
     * Get the list of the solr doc which need to be updated because the removal of the resource map
664
     */
665
    private List<SolrDoc> getUpdatedSolrDocsByRemovingResourceMap(String resourceMapId)
666
            throws UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, SAXException, MalformedURLException, IOException, XPathExpressionException {
667
        List<SolrDoc> updatedSolrDocs = null;
668
        if (resourceMapId != null && !resourceMapId.trim().equals("")) {
669
            /*List<SolrDoc> docsContainResourceMap = httpService.getDocumentsByResourceMap(
670
                    solrQueryUri, resourceMapId);*/
671
            List<SolrDoc> docsContainResourceMap = ResourceMapSubprocessor.getDocumentsByResourceMap(resourceMapId);
672
            updatedSolrDocs = removeResourceMapRelationship(docsContainResourceMap,
673
                    resourceMapId);
674
        }
675
        return updatedSolrDocs;
676
    }
677

    
678
    /*
679
     * Get the list of the solr doc which need to be updated because the removal of the resource map
680
     */
681
    private List<SolrDoc> removeResourceMapRelationship(List<SolrDoc> docsContainResourceMap,
682
            String resourceMapId) throws XPathExpressionException, IOException {
683
        List<SolrDoc> totalUpdatedSolrDocs = new ArrayList<SolrDoc>();
684
        if (docsContainResourceMap != null && !docsContainResourceMap.isEmpty()) {
685
            for (SolrDoc doc : docsContainResourceMap) {
686
                List<SolrDoc> updatedSolrDocs = new ArrayList<SolrDoc>();
687
                List<String> resourceMapIdStrs = doc
688
                        .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
689
                List<String> dataIdStrs = doc
690
                        .getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
691
                List<String> metadataIdStrs = doc
692
                        .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
693
                if ((dataIdStrs == null || dataIdStrs.isEmpty())
694
                        && (metadataIdStrs == null || metadataIdStrs.isEmpty())) {
695
                    // only has resourceMap field, doesn't have either documentBy or documents fields.
696
                    // so we only remove the resource map field.
697
                    doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, resourceMapId);
698
                    updatedSolrDocs.add(doc);
699
                } else if ((dataIdStrs != null && !dataIdStrs.isEmpty())
700
                        && (metadataIdStrs == null || metadataIdStrs.isEmpty())) {
701
                    //The solr doc is for a metadata object since the solr doc documents data files
702
                    updatedSolrDocs = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
703
                            dataIdStrs, SolrElementField.FIELD_DOCUMENTS);
704
                } else if ((dataIdStrs == null || dataIdStrs.isEmpty())
705
                        && (metadataIdStrs != null && !metadataIdStrs.isEmpty())) {
706
                    //The solr doc is for a data object since it documentedBy elements.
707
                    updatedSolrDocs = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
708
                            metadataIdStrs, SolrElementField.FIELD_ISDOCUMENTEDBY);
709
                } else if ((dataIdStrs != null && !dataIdStrs.isEmpty())
710
                        && (metadataIdStrs != null && !metadataIdStrs.isEmpty())){
711
                    // both metadata and data for one object
712
                    List<SolrDoc> solrDocsRemovedDocuments = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
713
                            dataIdStrs, SolrElementField.FIELD_DOCUMENTS);
714
                    List<SolrDoc> solrDocsRemovedDocumentBy = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
715
                            metadataIdStrs, SolrElementField.FIELD_ISDOCUMENTEDBY);
716
                    updatedSolrDocs = mergeUpdatedSolrDocs(solrDocsRemovedDocumentBy, solrDocsRemovedDocuments);
717
                }
718
                //move them to the final result
719
                if(updatedSolrDocs != null) {
720
                    for(SolrDoc updatedDoc: updatedSolrDocs) {
721
                        totalUpdatedSolrDocs.add(updatedDoc);
722
                    }
723
                }
724
                
725
            }
726

    
727
        }
728
        return totalUpdatedSolrDocs;
729
    }
730
    
731
    /*
732
     * Process the list of ids of the documentBy/documents in a slor doc.
733
     */
734
    private List<SolrDoc> removeAggregatedItems(String targetResourceMapId, SolrDoc doc,
735
            List<String> resourceMapIdsInDoc, List<String> aggregatedItemsInDoc, String fieldNameRemoved) {
736
        List<SolrDoc> updatedSolrDocs = new ArrayList<SolrDoc>();
737
        if (doc != null && resourceMapIdsInDoc != null && aggregatedItemsInDoc != null
738
                && fieldNameRemoved != null) {
739
            if (resourceMapIdsInDoc.size() == 1) {
740
                //only has one resource map. remove the resource map. also remove the documentBy
741
                doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, targetResourceMapId);
742
                doc.removeAllFields(fieldNameRemoved);
743
                updatedSolrDocs.add(doc);
744
            } else if (resourceMapIdsInDoc.size() > 1) {
745
                //we have multiple resource maps. We should match them.                     
746
                Map<String, String> ids = matchResourceMapsAndItems(doc.getIdentifier(),
747
                        targetResourceMapId, resourceMapIdsInDoc, aggregatedItemsInDoc, fieldNameRemoved);
748
                if (ids != null) {
749
                    for (String id : ids.keySet()) {
750
                        doc.removeFieldsWithValue(fieldNameRemoved, id);
751
                    }
752
                }
753
                doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
754
                        targetResourceMapId);
755
                updatedSolrDocs.add(doc);
756
                /*if (aggregatedItemsInDoc.size() > 1) {
757
                    
758

    
759
                } else {
760
                    //multiple resource map aggregate same metadata and data. Just remove the resource map
761
                    doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
762
                            targetResourceMapId);
763
                    updatedSolrDocs.add(doc);
764
                }*/
765
            }
766
        }
767
        return updatedSolrDocs;
768
    }
769

    
770
    /*
771
     * Return a map of mapping aggregation id map the target resourceMapId.
772
     * This will look the aggregation information in another side - If the targetId
773
     * is a metadata object, we will look the data objects which it describes; If 
774
     * the targetId is a data object, we will look the metadata object which documents it.
775
     */
776
    private Map<String, String> matchResourceMapsAndItems(String targetId,
777
            String targetResourceMapId, List<String> originalResourceMaps, List<String> aggregatedItems, String fieldName) {
778
        Map<String, String> map = new HashMap<String, String>();
779
        if (targetId != null && targetResourceMapId != null && aggregatedItems != null
780
                && fieldName != null) {
781
            String newFieldName = null;
782
            if (fieldName.equals(SolrElementField.FIELD_ISDOCUMENTEDBY)) {
783
                newFieldName = SolrElementField.FIELD_DOCUMENTS;
784
            } else if (fieldName.equals(SolrElementField.FIELD_DOCUMENTS)) {
785
                newFieldName = SolrElementField.FIELD_ISDOCUMENTEDBY;
786
            }
787
            if (newFieldName != null) {
788
                for (String item : aggregatedItems) {
789
                    SolrDoc doc = null;
790
                    try {
791
                        doc = getDocumentById(item);
792
                        List<String> fieldValues = doc.getAllFieldValues(newFieldName);
793
                        List<String> resourceMapIds = doc
794
                                .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
795
                        if ((fieldValues != null && fieldValues.contains(targetId))
796
                                && (resourceMapIds != null && resourceMapIds
797
                                        .contains(targetResourceMapId))) {
798
                            //okay, we found the target aggregation item id and the resource map id
799
                            //in this solr doc. However, we need check if another resource map with different
800
                            //id but specify the same relationship. If we have the id(s), we should not
801
                            // remove the documents( or documentBy) element since we need to preserve the 
802
                            // relationship for the remain resource map. 
803
                            boolean hasDuplicateIds = false;
804
                            if(originalResourceMaps != null) {
805
                               for(String id :resourceMapIds) {
806
                                    if (originalResourceMaps.contains(id) && !id.equals(targetResourceMapId)) {
807
                                        hasDuplicateIds = true;
808
                                        break;
809
                                    }
810
                                }
811
                            }
812
                            if(!hasDuplicateIds) {
813
                                map.put(item, targetResourceMapId);
814
                            }
815
                            
816
                        }
817
                    } catch (Exception e) {
818
                        log.warn("SolrIndex.matchResourceMapsAndItems - can't get the solrdoc for the id "
819
                                + item + " since " + e.getMessage());
820
                    }
821
                }
822
            }
823
        }
824
        return map;
825
    }
826

    
827
    /*
828
     * Get the solr index doc from the index server for the given id.
829
     */
830
    private SolrDoc getDocumentById(String id) throws NotImplemented, NotFound, UnsupportedType, 
831
                SolrServerException, ParserConfigurationException, SAXException, XPathExpressionException, IOException {
832
        SolrDoc doc = ResourceMapSubprocessor.getSolrDoc(id);
833
        return doc;
834
    }
835
    
836
    /*
837
     * Merge two list of updated solr docs. removedDocumentBy has the correct information about documentBy element.
838
     * removedDocuments has the correct information about the documents element.
839
     * So we go through the two list and found the two docs having the same identifier.
840
     * Get the list of the documents value from the one in the removedDoucments (1). 
841
     * Remove all values of documents from the one in the removedDocumentBy. 
842
     * Then copy the list of documents value from (1) to to the one in the removedDocumentBy.
843
     */
844
    private List<SolrDoc> mergeUpdatedSolrDocs(List<SolrDoc>removedDocumentBy, List<SolrDoc>removedDocuments) {
845
        List<SolrDoc> mergedDocuments = new ArrayList<SolrDoc>();
846
        if(removedDocumentBy == null || removedDocumentBy.isEmpty()) {
847
            mergedDocuments = removedDocuments;
848
        } else if (removedDocuments == null || removedDocuments.isEmpty()) {
849
            mergedDocuments = removedDocumentBy;
850
        } else {
851
            int sizeOfDocBy = removedDocumentBy.size();
852
            int sizeOfDocs = removedDocuments.size();
853
            for(int i=sizeOfDocBy-1; i>= 0; i--) {
854
                SolrDoc docInRemovedDocBy = removedDocumentBy.get(i);
855
                for(int j= sizeOfDocs-1; j>=0; j--) {
856
                    SolrDoc docInRemovedDocs = removedDocuments.get(j);
857
                    if(docInRemovedDocBy.getIdentifier().equals(docInRemovedDocs.getIdentifier())) {
858
                        //find the same doc in both list. let's merge them.
859
                        //first get all the documents element from the docWithDocs(it has the correct information about the documents element)
860
                        List<String> idsInDocuments = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
861
                        docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_DOCUMENTS);//clear out any documents element in docInRemovedDocBy
862
                        //add the Documents element from the docInRemovedDocs if it has any.
863
                        // The docInRemovedDocs has the correct information about the documentBy. Now it copied the correct information of the documents element.
864
                        // So docInRemovedDocs has both correct information about the documentBy and documents elements.
865
                        if(idsInDocuments != null) {
866
                            for(String id : idsInDocuments) {
867
                                if(id != null && !id.trim().equals("")) {
868
                                    docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_DOCUMENTS, id));
869
                                }
870
                                
871
                            }
872
                        }
873
                        //intersect the resource map ids.
874
                        List<String> resourceMapIdsInWithDocs = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
875
                        List<String> resourceMapIdsInWithDocBy = docInRemovedDocBy.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
876
                        docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_RESOURCEMAP);
877
                        Collection resourceMapIds = CollectionUtils.union(resourceMapIdsInWithDocs, resourceMapIdsInWithDocBy);
878
                        if(resourceMapIds != null) {
879
                            for(Object idObj : resourceMapIds) {
880
                                String id = (String)idObj;
881
                                docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_RESOURCEMAP, id));
882
                            }
883
                        }
884
                        //we don't need do anything about the documentBy elements since the docInRemovedDocBy has the correct information.
885
                        mergedDocuments.add(docInRemovedDocBy);
886
                        //delete the two documents from the list
887
                        removedDocumentBy.remove(i);
888
                        removedDocuments.remove(j);
889
                        break;
890
                    }
891
                    
892
                }
893
            }
894
            // when we get there, if the two lists are empty, this will be a perfect merge. However, if something are left. we 
895
            //just put them in.
896
            for(SolrDoc doc: removedDocumentBy) {
897
                mergedDocuments.add(doc);
898
            }
899
            for(SolrDoc doc: removedDocuments) {
900
                mergedDocuments.add(doc);
901
            }
902
        }
903
        return mergedDocuments;
904
    }
905
    
906

    
907
    /*
908
     * Remove a pid which is part of resource map.
909
     */
910
    private void removeFromDataPackage(String pid) throws Exception  {
911
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
912
        removeFromIndex(pid);
913
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
914

    
915
        List<String> documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
916
        for (String documentsValue : documents) {
917
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentsValue);
918
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY, pid);
919
            removeFromIndex(documentsValue);
920
            insertToIndex(solrDoc);
921
        }
922

    
923
        List<String> documentedBy = indexedDoc
924
                .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
925
        for (String documentedByValue : documentedBy) {
926
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentedByValue);
927
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_DOCUMENTS, pid);
928
            //docsToUpdate.add(solrDoc);
929
            removeFromIndex(documentedByValue);
930
            insertToIndex(solrDoc);
931
        }
932

    
933
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
934
        //httpService.sendUpdate(solrIndexUri, addCommand);
935
    }
936

    
937
    /*
938
     * Remove a pid from the solr index
939
     */
940
    private synchronized void removeFromIndex(String identifier) throws Exception {
941
    	
942
    	
943
    	Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
944

    
945
        for (IDocumentDeleteSubprocessor deleteSubprocessor : deleteSubprocessors) {
946
            docs.putAll(deleteSubprocessor.processDocForDelete(identifier, docs));
947
        }
948
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
949
        List<String> idsToIndex = new ArrayList<String>();
950
        for (String idToUpdate : docs.keySet()) {
951
            if (docs.get(idToUpdate) != null) {
952
                docsToUpdate.add(docs.get(idToUpdate));
953
            } else {
954
                idsToIndex.add(idToUpdate);
955
            }
956
        }
957

    
958
        // update the docs we have
959
        for (SolrDoc docToUpdate : docsToUpdate) {
960
        	insertToIndex(docToUpdate);
961
        }
962
        
963
        // delete this one
964
        deleteDocFromIndex(identifier);
965

    
966
        // index the rest
967
        for (String idToIndex : idsToIndex) {
968
        	Identifier pid = new Identifier();
969
        	pid.setValue(idToIndex);
970
            SystemMetadata sysMeta = DistributedMapsFactory.getSystemMetadata(idToIndex);
971
            if (SolrDoc.visibleInIndex(sysMeta)) {
972
                String objectPath = DistributedMapsFactory.getObjectPathMap().get(pid);
973
                insert(pid, sysMeta, objectPath);
974
            }
975
        }
976
    		
977
    }
978
    
979
    private void deleteDocFromIndex(String pid) throws Exception {
980
    	if (pid != null && !pid.trim().equals("")) {
981
            /*IndexEvent event = new IndexEvent();
982
            event.setDate(Calendar.getInstance().getTime());
983
            Identifier identifier = new Identifier();
984
            identifier.setValue(pid);
985
            event.setIdentifier(identifier);*/
986
            try {
987
                solrServer.deleteById(pid);
988
                solrServer.commit();
989
                /*event.setType(IndexEvent.SUCCESSDELETE);
990
                event.setDescription("Successfully remove the solr index for the id "+identifier.getValue());
991
                try {
992
                    EventlogFactory.createIndexEventLog().write(event);
993
                } catch (Exception e) {
994
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+e.getMessage());
995
                }*/
996
            } catch (SolrServerException e) {
997
                /*event.setAction(Event.DELETE);
998
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
999
                try {
1000
                    EventlogFactory.createIndexEventLog().write(event);
1001
                } catch (Exception ee) {
1002
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
1003
                }*/
1004
                throw e;
1005
                
1006
            } catch (IOException e) {
1007
                /*event.setAction(Event.DELETE);
1008
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
1009
                try {
1010
                    EventlogFactory.createIndexEventLog().write(event);
1011
                } catch (Exception ee) {
1012
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
1013
                }*/
1014
                throw e;
1015
            }
1016
            
1017
        }
1018
    
1019
    }
1020

    
1021
    /**
1022
     * Get the solrServer
1023
     * @return
1024
     */
1025
    public SolrServer getSolrServer() {
1026
        return solrServer;
1027
    }
1028

    
1029
    /**
1030
     * Set the solrServer. 
1031
     * @param solrServer
1032
     */
1033
    public void setSolrServer(SolrServer solrServer) {
1034
        this.solrServer = solrServer;
1035
    }
1036
    
1037
    /**
1038
     * Get all indexed ids in the solr server. 
1039
     * @return an empty list if there is no index.
1040
     * @throws SolrServerException
1041
     */
1042
    public List<String> getSolrIds() throws SolrServerException {
1043
        List<String> list = new ArrayList<String>();
1044
        SolrQuery query = new SolrQuery(IDQUERY); 
1045
        query.setRows(Integer.MAX_VALUE); 
1046
        query.setFields(ID); 
1047
        QueryResponse response = solrServer.query(query); 
1048
        SolrDocumentList docs = response.getResults();
1049
        if(docs != null) {
1050
            for(SolrDocument doc :docs) {
1051
                String identifier = (String)doc.getFieldValue(ID);
1052
                //System.out.println("======================== "+identifier);
1053
                list.add(identifier);
1054
            }
1055
        }
1056
        return list;
1057
    }
1058
    
1059
    private void writeEventLog(SystemMetadata systemMetadata, Identifier pid, String error) {
1060
        IndexEvent event = new IndexEvent();
1061
        event.setIdentifier(pid);
1062
        event.setDate(Calendar.getInstance().getTime());
1063
        String action = null;
1064
        if (systemMetadata == null ) {
1065
            action = Event.CREATE.xmlValue();
1066
            event.setAction(Event.CREATE);
1067
        }
1068
        else if(systemMetadata.getArchived() != null && systemMetadata.getArchived()) {
1069
            action = Event.DELETE.xmlValue();
1070
            event.setAction(Event.DELETE);
1071
        } else {
1072
            action = Event.CREATE.xmlValue();
1073
            event.setAction(Event.CREATE);
1074
        }
1075
        event.setDescription("Failed to "+action+"the solr index for the id "+pid.getValue()+" since "+error);
1076
        try {
1077
            EventlogFactory.createIndexEventLog().write(event);
1078
        } catch (Exception ee) {
1079
            log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
1080
        }
1081
    }
1082
}
(6-6/7)