Project

General

Profile

1
/**
2
 *  Copyright: 2013 Regents of the University of California and the
3
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20

    
21
import java.io.ByteArrayInputStream;
22
import java.io.FileInputStream;
23
import java.io.FileNotFoundException;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.net.MalformedURLException;
27
import java.util.ArrayList;
28
import java.util.Calendar;
29
import java.util.Collection;
30
import java.util.Date;
31
import java.util.HashMap;
32
import java.util.Iterator;
33
import java.util.List;
34
import java.util.Map;
35
import java.util.Set;
36

    
37
import javax.xml.parsers.DocumentBuilder;
38
import javax.xml.parsers.DocumentBuilderFactory;
39
import javax.xml.parsers.ParserConfigurationException;
40
import javax.xml.xpath.XPath;
41
import javax.xml.xpath.XPathExpressionException;
42
import javax.xml.xpath.XPathFactory;
43

    
44
import org.apache.commons.codec.EncoderException;
45
import org.apache.commons.collections.CollectionUtils;
46
import org.apache.commons.io.output.ByteArrayOutputStream;
47
import org.apache.commons.lang.StringUtils;
48
import org.apache.commons.logging.Log;
49
import org.apache.commons.logging.LogFactory;
50
import org.apache.solr.client.solrj.SolrQuery;
51
import org.apache.solr.client.solrj.SolrServer;
52
import org.apache.solr.client.solrj.SolrServerException;
53
import org.apache.solr.client.solrj.response.QueryResponse;
54
import org.apache.solr.client.solrj.response.UpdateResponse;
55
import org.apache.solr.common.SolrDocument;
56
import org.apache.solr.common.SolrDocumentList;
57
import org.apache.solr.common.SolrInputDocument;
58
import org.apache.solr.schema.IndexSchema;
59
import org.dataone.cn.indexer.XMLNamespaceConfig;
60
import org.dataone.cn.indexer.convert.SolrDateConverter;
61
import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor;
62
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
63
import org.dataone.cn.indexer.parser.SolrField;
64
import org.dataone.cn.indexer.solrhttp.SolrDoc;
65
import org.dataone.cn.indexer.solrhttp.SolrElementField;
66
import org.dataone.service.exceptions.NotFound;
67
import org.dataone.service.exceptions.NotImplemented;
68
import org.dataone.service.exceptions.ServiceFailure;
69
import org.dataone.service.exceptions.UnsupportedType;
70
import org.dataone.service.types.v1.Event;
71
import org.dataone.service.types.v1.Identifier;
72
import org.dataone.service.types.v2.SystemMetadata;
73
import org.dataone.service.util.DateTimeMarshaller;
74
import org.dataone.service.util.TypeMarshaller;
75
import org.dspace.foresite.OREParserException;
76
import org.jibx.runtime.JiBXException;
77
import org.w3c.dom.Document;
78
import org.xml.sax.SAXException;
79

    
80
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
81
import edu.ucsb.nceas.metacat.common.query.SolrQueryServiceController;
82
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
83
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
84

    
85
/**
86
 * A class does insert, update and remove indexes to a SOLR server
87
 * @author tao
88
 *
89
 */
90
public class SolrIndex {
91
            
92
    public static final String ID = "id";
93
    private static final String IDQUERY = ID+":*";
94
    private List<IDocumentSubprocessor> subprocessors = null;
95
    private SolrServer solrServer = null;
96
    private XMLNamespaceConfig xmlNamespaceConfig = null;
97
    private List<SolrField> sysmetaSolrFields = null;
98

    
99
    private static DocumentBuilderFactory documentBuilderFactory = null;
100
    private static DocumentBuilder builder = null;
101

    
102
    private static XPathFactory xpathFactory = null;
103
    private static XPath xpath = null;
104
    Log log = LogFactory.getLog(SolrIndex.class);
105
    
106
    static {
107
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
108
        documentBuilderFactory.setNamespaceAware(true);
109
        try {
110
            builder = documentBuilderFactory.newDocumentBuilder();
111
        } catch (ParserConfigurationException e) {
112
            e.printStackTrace();
113
        }
114
        xpathFactory = XPathFactory.newInstance();
115
        xpath = xpathFactory.newXPath();
116
    }
117
    
118
    /**
119
     * Constructor
120
     * @throws SAXException 
121
     * @throws IOException 
122
     */
123
    public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, List<SolrField> sysmetaSolrFields)
124
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
125
         this.xmlNamespaceConfig = xmlNamespaceConfig;
126
         this.sysmetaSolrFields = sysmetaSolrFields;
127
         init();
128
    }
129
    
130
    private void init() throws ParserConfigurationException, XPathExpressionException {
131
        xpath.setNamespaceContext(xmlNamespaceConfig);
132
        initExpressions();
133
    }
134

    
135
    private void initExpressions() throws XPathExpressionException {
136
        for (SolrField field : sysmetaSolrFields) {
137
            field.initExpression(xpath);
138
        }
139

    
140
    }
141
    
142
    
143
    /**
144
     * Get the list of the Subprocessors in this index.
145
     * @return the list of the Subprocessors.
146
     */
147
    public List<IDocumentSubprocessor> getSubprocessors() {
148
        return subprocessors;
149
    }
150

    
151
    /**
152
     * Set the list of Subprocessors.
153
     * @param subprocessorList  the list will be set.
154
     */
155
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
156
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
157
        	if (subprocessor instanceof AbstractDocumentSubprocessor) {
158
        		((AbstractDocumentSubprocessor)subprocessor).initExpression(xpath);
159
        	}
160
        }
161
        this.subprocessors = subprocessorList;
162
    }
163
    
164
    /**
165
     * Generate the index for the given information
166
     * @param id
167
     * @param systemMetadata
168
     * @param dataStream
169
     * @return
170
     * @throws IOException
171
     * @throws SAXException
172
     * @throws ParserConfigurationException
173
     * @throws XPathExpressionException
174
     * @throws JiBXException 
175
     * @throws SolrServerException 
176
     * @throws EncoderException
177
     * @throws UnsupportedType 
178
     * @throws NotFound 
179
     * @throws NotImplemented 
180
     */
181
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, String objectPath)
182
                    throws IOException, SAXException, ParserConfigurationException,
183
                    XPathExpressionException, JiBXException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
184

    
185
        // Load the System Metadata document
186
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
187
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
188
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
189
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
190
        if (sysMetaDoc == null) {
191
            log.error("Could not load System metadata for ID: " + id);
192
            return null;
193
        }
194

    
195
        // Extract the field values from the System Metadata
196
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
197
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
198
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
199
        docs.put(id, indexDocument);
200
        
201
        // get the format id for this object
202
        String formatId = indexDocument.getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT);
203

    
204
        // Determine if subprocessors are available for this ID
205
        if (subprocessors != null) {
206
	        // for each subprocessor loaded from the spring config
207
	        for (IDocumentSubprocessor subprocessor : subprocessors) {
208
	            // Does this subprocessor apply?
209
	            if (subprocessor.canProcess(formatId)) {
210
	                // if so, then extract the additional information from the
211
	                // document.
212
	                try {
213
	                    // docObject = the resource map document or science
214
	                    // metadata document.
215
	                    // note that resource map processing touches all objects
216
	                    // referenced by the resource map.
217
	                	FileInputStream dataStream = new FileInputStream(objectPath);
218
	                    if (!dataStream.getFD().valid()) {
219
	                    	log.error("Could not load OBJECT file for ID,Path=" + id + ", "
220
                                    + objectPath);
221
	                        //throw new Exception("Could not load OBJECT for ID " + id );
222
	                    } else {
223
	                        docs = subprocessor.processDocument(id, docs, dataStream);
224
	                    }
225
	                } catch (Exception e) {
226
	                    log.error(e.getMessage(), e);
227
	                    throw new SolrServerException(e.getMessage());
228
	                }
229
	            }
230
	        }
231
       }
232

    
233
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
234
       // merge is only for resource map. We need more work here.
235
       for (SolrDoc mergeDoc : docs.values()) {
236
           if (!mergeDoc.isMerged()) {
237
                 mergeWithIndexedDocument(mergeDoc);
238
           }
239
       }
240

    
241
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
242
               
243
       return docs;
244
    }
245
    
246
    /**
247
     * Merge updates with existing solr documents
248
     * 
249
     * This method appears to re-set the data package field data into the
250
     * document about to be updated in the solr index. Since packaging
251
     * information is derived from the package document (resource map), this
252
     * information is not present when processing a document contained in a data
253
     * package. This method replaces those values from the existing solr index
254
     * record for the document being processed. -- sroseboo, 1-18-12
255
     * 
256
     * @param indexDocument
257
     * @return
258
     * @throws IOException
259
     * @throws EncoderException
260
     * @throws XPathExpressionException
261
     * @throws SAXException 
262
     * @throws ParserConfigurationException 
263
     * @throws SolrServerException 
264
     * @throws UnsupportedType 
265
     * @throws NotFound 
266
     * @throws NotImplemented 
267
     */
268
    // TODO:combine merge function with resourcemap merge function
269

    
270
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
271
            EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
272
        List<String> ids = new ArrayList<String>();
273
        ids.add(indexDocument.getIdentifier());
274
        List<SolrDoc> indexedDocuments = ResourceMapSubprocessor.getSolrDocs(ids);
275
        SolrDoc indexedDocument = indexedDocuments == null || indexedDocuments.size() <= 0 ? null
276
                : indexedDocuments.get(0);
277
        
278
        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
279

    
280
        if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
281
            return indexDocument;
282
        } else {
283
            for (SolrElementField field : indexedDocument.getFieldList()) {
284
                if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY)
285
                        || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field
286
                        .getName().equals(SolrElementField.FIELD_RESOURCEMAP))
287
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
288
                    indexDocument.addField(field);
289
                } else if (!indexSchema.isCopyFieldTarget(indexSchema.getField(field.getName())) && !indexDocument.hasField(field.getName())) {
290
                    indexDocument.addField(field);
291
                }
292
            }
293

    
294
            indexDocument.setMerged(true);
295
            return indexDocument;
296
        }
297
    }
298
    
299
    /*
300
     * Generate a Document from the InputStream
301
     */
302
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
303
        Document doc = null;
304

    
305
        try {
306
            doc = builder.parse(smdStream);
307
        } catch (IOException e) {
308
            log.error(e.getMessage(), e);
309
        }
310

    
311
        return doc;
312
    }
313
    
314
    /*
315
     * Index the fields of the system metadata
316
     */
317
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
318

    
319
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
320
        // solrFields is the list of fields defined in the application context
321
       
322
        for (SolrField field : sysmetaSolrFields) {
323
            try {
324
                // the field.getFields method can return a single value or
325
                // multiple values for multi-valued fields
326
                // or can return multiple SOLR document fields.
327
                fieldList.addAll(field.getFields(doc, identifier));
328
            } catch (Exception e) {
329
                e.printStackTrace();
330
            }
331
        }
332
        return fieldList;
333

    
334
    }
335
    
336
    /**
337
     * Check the parameters of the insert or update methods.
338
     * @param pid
339
     * @param systemMetadata
340
     * @param data
341
     * @throws SolrServerException
342
     */
343
    private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException {
344
        if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) {
345
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
346
        }
347
        if(systemMetadata == null) {
348
            throw new SolrServerException("The system metadata of the indexed document "+pid.getValue()+ " should not be null.");
349
        }
350
        if(objectPath == null) {
351
            throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null.");
352
        }
353
    }
354
    
355
    /**
356
     * Insert the indexes for a document.
357
     * @param pid  the id of this document
358
     * @param systemMetadata  the system metadata associated with the data object
359
     * @param data  the path to the object file itself
360
     * @throws SolrServerException 
361
     * @throws JiBXException 
362
     * @throws EncoderException 
363
     * @throws UnsupportedType 
364
     * @throws NotFound 
365
     * @throws NotImplemented 
366
     */
367
    private synchronized void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath) 
368
                    throws IOException, SAXException, ParserConfigurationException,
369
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType {
370
        checkParams(pid, systemMetadata, objectPath);
371
        Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, objectPath);
372
        
373
        //transform the Map to the SolrInputDocument which can be used by the solr server
374
        if(docs != null) {
375
            Set<String> ids = docs.keySet();
376
            for(String id : ids) {
377
                if(id != null) {
378
                    SolrDoc doc = docs.get(id);
379
                    insertToIndex(doc);
380
                }
381
                
382
            }
383
        }
384
    }
385
    
386
    /**
387
     * Adds the given fields to the solr index for the given pid, preserving the index values
388
     * that previously existed
389
     * @param pid
390
     * @param fields
391
     */
392
    public void insertFields(Identifier pid, Map<String, List<Object>> fields) {
393
    	
394
    	try {
395
			// copy the original values already indexed for this document	
396
	    	SolrQuery query = new SolrQuery("id:\"" + pid.getValue() + "\"");
397
	    	QueryResponse res = solrServer.query(query);
398
	    	SolrDoc doc = new SolrDoc();
399
	    	
400
	    	// include existing values if they exist
401
	        IndexSchema indexSchema = SolrQueryServiceController.getInstance().getSchema();
402

    
403
	        if (res.getResults().size() > 0) {
404
		        SolrDocument orig = res.getResults().get(0);
405
		    	for (String fieldName: orig.getFieldNames()) {
406
		        	//  don't transfer the copyTo fields, otherwise there are errors
407
		        	if (indexSchema.isCopyFieldTarget(indexSchema.getField(fieldName))) {
408
		        		continue;
409
		        	}
410
		        	for (Object value: orig.getFieldValues(fieldName)) {
411
		        		String stringValue = value.toString();
412
		        		// special handling for dates in ISO 8601
413
		        		if (value instanceof Date) {
414
		        			stringValue = DateTimeMarshaller.serializeDateToUTC((Date)value);
415
		        			SolrDateConverter converter = new SolrDateConverter();
416
		        			stringValue = converter.convert(stringValue);
417
		        		}
418
						SolrElementField field = new SolrElementField(fieldName, stringValue);
419
						log.debug("Adding field: " + fieldName);
420
						doc.addField(field);
421
		        	}
422
		        }
423
	        }
424
	    	
425
	        // add the additional fields we are trying to include in the index
426
	        for (String fieldName: fields.keySet()) {
427
	    		List<Object> values = fields.get(fieldName);
428
	    		for (Object value: values) {
429
	    			if (!doc.hasFieldWithValue(fieldName, value.toString())) {
430
	    				if (indexSchema.getField(fieldName).multiValued()) {
431
	    					doc.addField(new SolrElementField(fieldName, value.toString()));
432
	    				} else {
433
	    	    	    	doc.updateOrAddField(fieldName, value.toString());
434
	    				}
435
	    			}
436
	    		}
437
	    	}
438
	        
439
	        // make sure there is an id in the solrdoc so it is added to the index
440
	        if (!doc.hasField(ID)) {
441
	        	doc.updateOrAddField(ID, pid.getValue());
442
	        }
443
	        
444
	        // insert the whole thing
445
	        insertToIndex(doc);
446
    	} catch (Exception e) {
447
    		String error = "SolrIndex.insetFields - could not update the solr index: " + e.getMessage();
448
            writeEventLog(null, pid, error);
449
            log.error(error, e);
450
    	}
451

    
452
    }
453
    
454
    /*
455
     * Insert a SolrDoc to the solr server.
456
     */
457
    private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
458
        if(doc != null ) {
459
            SolrInputDocument solrDoc = new SolrInputDocument();
460
            List<SolrElementField> list = doc.getFieldList();
461
            if(list != null) {
462
                //solrDoc.addField(METACATPIDFIELD, pid);
463
                Iterator<SolrElementField> iterator = list.iterator();
464
                while (iterator.hasNext()) {
465
                    SolrElementField field = iterator.next();
466
                    if(field != null) {
467
                        String value = field.getValue();
468
                        String name = field.getName();
469
                        //System.out.println("add name/value pair - "+name+"/"+value);
470
                        solrDoc.addField(name, value);
471
                    }
472
                }
473
            }
474
            if(!solrDoc.isEmpty()) {
475
                /*IndexEvent event = new IndexEvent();
476
                event.setDate(Calendar.getInstance().getTime());
477
                Identifier pid = new Identifier();
478
                pid.setValue(doc.getIdentifier());
479
                event.setIdentifier(pid);*/
480
                try {
481
                    UpdateResponse response = solrServer.add(solrDoc);
482
                    solrServer.commit();
483
                    /*event.setType(IndexEvent.SUCCESSINSERT);
484
                    event.setDescription("Successfully insert the solr index for the id "+pid.getValue());
485
                    try {
486
                        EventlogFactory.createIndexEventLog().write(event);
487
                    } catch (Exception e) {
488
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+e.getMessage());
489
                    }*/
490
                } catch (SolrServerException e) {
491
                    /*event.setAction(Event.CREATE);
492
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
493
                    try {
494
                        EventlogFactory.createIndexEventLog().write(event);
495
                    } catch (Exception ee) {
496
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
497
                    }*/
498
                    throw e;
499
                } catch (IOException e) {
500
                    /*event.setAction(Event.CREATE);
501
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
502
                    try {
503
                        EventlogFactory.createIndexEventLog().write(event);
504
                    } catch (Exception ee) {
505
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
506
                    }*/
507
                    throw e;
508
                    
509
                }
510
                //System.out.println("=================the response is:\n"+response.toString());
511
            }
512
        }
513
    }
514
    
515
    /**
516
     * Update the solr index. This method handles the three scenarios:
517
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
518
     *    remove the index for the previous version(s) and generate new index for the doc.
519
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
520
     *    index for the doc.
521
     */
522
    public void update(Identifier pid, SystemMetadata systemMetadata) {
523
        if(systemMetadata==null || pid==null) {
524
            log.error("SolrIndex.update - the systemMetadata or pid is null. So nothing will be indexed.");
525
            return;
526
        }
527
        String objectPath = null;
528
        try {
529
            if(!systemMetadata.getArchived()) {
530
                objectPath = DistributedMapsFactory.getObjectPathMap().get(pid);
531
            }
532
            update(pid, systemMetadata, objectPath);
533
            EventlogFactory.createIndexEventLog().remove(pid);
534
        } catch (Exception e) {
535
            String error = "SolrIndex.update - could not update the solr index since " + e.getMessage();
536
            writeEventLog(systemMetadata, pid, error);
537
            log.error(error, e);
538
        }
539
    }
540
   
541
    
542
    /**
543
     * Update the solr index. This method handles the three scenarios:
544
     * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true,
545
     *    remove the index for the previous version(s) and generate new index for the doc.
546
     * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the
547
     *    index for the doc.
548
     * @param pid
549
     * @param systemMetadata
550
     * @param data
551
     * @throws SolrServerException
552
     * @throws ServiceFailure
553
     * @throws XPathExpressionException
554
     * @throws NotImplemented
555
     * @throws NotFound
556
     * @throws UnsupportedType
557
     * @throws IOException
558
     * @throws SAXException
559
     * @throws ParserConfigurationException
560
     * @throws OREParserException
561
     * @throws JiBXException
562
     * @throws EncoderException
563
     */
564
    void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException, 
565
                                ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, 
566
                                IOException, SAXException, ParserConfigurationException, OREParserException, JiBXException, EncoderException {
567
        //checkParams(pid, systemMetadata, objectPath);
568
        if(systemMetadata==null || pid==null) {
569
            log.error("SolrIndex.update - the systemMetadata or pid is null. So nothing will be indexed.");
570
            return;
571
        }
572
        boolean isArchive = systemMetadata.getArchived() != null && systemMetadata.getArchived();
573
        if(isArchive ) {
574
            //delete the index for the archived objects
575
            remove(pid.getValue(), systemMetadata);
576
            log.info("SolrIndex.update============================= archive the idex for the identifier "+pid);
577
        } else {
578
            //generate index for either add or update.
579
            insert(pid, systemMetadata, objectPath);
580
            log.info("SolrIndex.update============================= insert index for the identifier "+pid);
581
        }
582
    }
583
    
584
   
585

    
586
    /*
587
     * Is the pid a resource map
588
     */
589
    private boolean isDataPackage(String pid, SystemMetadata sysmeta) throws FileNotFoundException, ServiceFailure {
590
        boolean isDataPackage = false;
591
        //SystemMetadata sysmeta = DistributedMapsFactory.getSystemMetadata(pid);
592
        if(sysmeta != null) {
593
            isDataPackage = IndexGeneratorTimerTask.isResourceMap(sysmeta.getFormatId());
594
        }
595
        return isDataPackage;
596
    }
597

    
598
    private boolean isPartOfDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
599
        SolrDoc dataPackageIndexDoc = ResourceMapSubprocessor.getSolrDoc(pid);
600
        if (dataPackageIndexDoc != null) {
601
            String resourceMapId = dataPackageIndexDoc
602
                    .getFirstFieldValue(SolrElementField.FIELD_RESOURCEMAP);
603
            return StringUtils.isNotEmpty(resourceMapId);
604
        } else {
605
            return false;
606
        }
607
    }
608
    /**
609
     * Remove the indexed associated with specified pid.
610
     * @param pid  the pid which the indexes are associated with
611
     * @throws IOException
612
     * @throws SolrServerException
613
     * @throws ParserConfigurationException 
614
     * @throws SAXException 
615
     * @throws UnsupportedType 
616
     * @throws NotFound 
617
     * @throws NotImplemented 
618
     * @throws XPathExpressionException 
619
     * @throws ServiceFailure 
620
     * @throws OREParserException 
621
     */
622
    private void remove(String pid, SystemMetadata sysmeta) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
623
        if (isDataPackage(pid, sysmeta)) {
624
            removeDataPackage(pid);
625
        } else if (isPartOfDataPackage(pid)) {
626
            removeFromDataPackage(pid);
627
        } else {
628
            removeFromIndex(pid);
629
        }
630
    }
631
    
632
    /*
633
     * Remove the resource map from the solr index. It doesn't only remove the index for itself and also
634
     * remove the relationship for the related metadata and data objects.
635
     */
636
    private void removeDataPackage(String pid) throws  XPathExpressionException, IOException, 
637
            SolrServerException, UnsupportedType, NotFound, ParserConfigurationException, SAXException {
638
        removeFromIndex(pid);
639
        List<SolrDoc> docsToUpdate = getUpdatedSolrDocsByRemovingResourceMap(pid);
640
        if (docsToUpdate != null && !docsToUpdate.isEmpty()) {
641
            //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
642
            //httpService.sendUpdate(solrIndexUri, addCommand);
643
            for(SolrDoc doc : docsToUpdate) {
644
                removeFromIndex(doc.getIdentifier());
645
                insertToIndex(doc);
646
            }
647
        }
648

    
649
    }
650

    
651
    /*
652
     * Get the list of the solr doc which need to be updated because the removal of the resource map
653
     */
654
    private List<SolrDoc> getUpdatedSolrDocsByRemovingResourceMap(String resourceMapId)
655
            throws UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, SAXException, MalformedURLException, IOException, XPathExpressionException {
656
        List<SolrDoc> updatedSolrDocs = null;
657
        if (resourceMapId != null && !resourceMapId.trim().equals("")) {
658
            /*List<SolrDoc> docsContainResourceMap = httpService.getDocumentsByResourceMap(
659
                    solrQueryUri, resourceMapId);*/
660
            List<SolrDoc> docsContainResourceMap = ResourceMapSubprocessor.getDocumentsByResourceMap(resourceMapId);
661
            updatedSolrDocs = removeResourceMapRelationship(docsContainResourceMap,
662
                    resourceMapId);
663
        }
664
        return updatedSolrDocs;
665
    }
666

    
667
    /*
668
     * Get the list of the solr doc which need to be updated because the removal of the resource map
669
     */
670
    private List<SolrDoc> removeResourceMapRelationship(List<SolrDoc> docsContainResourceMap,
671
            String resourceMapId) throws XPathExpressionException, IOException {
672
        List<SolrDoc> totalUpdatedSolrDocs = new ArrayList<SolrDoc>();
673
        if (docsContainResourceMap != null && !docsContainResourceMap.isEmpty()) {
674
            for (SolrDoc doc : docsContainResourceMap) {
675
                List<SolrDoc> updatedSolrDocs = new ArrayList<SolrDoc>();
676
                List<String> resourceMapIdStrs = doc
677
                        .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
678
                List<String> dataIdStrs = doc
679
                        .getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
680
                List<String> metadataIdStrs = doc
681
                        .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
682
                if ((dataIdStrs == null || dataIdStrs.isEmpty())
683
                        && (metadataIdStrs == null || metadataIdStrs.isEmpty())) {
684
                    // only has resourceMap field, doesn't have either documentBy or documents fields.
685
                    // so we only remove the resource map field.
686
                    doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, resourceMapId);
687
                    updatedSolrDocs.add(doc);
688
                } else if ((dataIdStrs != null && !dataIdStrs.isEmpty())
689
                        && (metadataIdStrs == null || metadataIdStrs.isEmpty())) {
690
                    //The solr doc is for a metadata object since the solr doc documents data files
691
                    updatedSolrDocs = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
692
                            dataIdStrs, SolrElementField.FIELD_DOCUMENTS);
693
                } else if ((dataIdStrs == null || dataIdStrs.isEmpty())
694
                        && (metadataIdStrs != null && !metadataIdStrs.isEmpty())) {
695
                    //The solr doc is for a data object since it documentedBy elements.
696
                    updatedSolrDocs = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
697
                            metadataIdStrs, SolrElementField.FIELD_ISDOCUMENTEDBY);
698
                } else if ((dataIdStrs != null && !dataIdStrs.isEmpty())
699
                        && (metadataIdStrs != null && !metadataIdStrs.isEmpty())){
700
                    // both metadata and data for one object
701
                    List<SolrDoc> solrDocsRemovedDocuments = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
702
                            dataIdStrs, SolrElementField.FIELD_DOCUMENTS);
703
                    List<SolrDoc> solrDocsRemovedDocumentBy = removeAggregatedItems(resourceMapId, doc, resourceMapIdStrs,
704
                            metadataIdStrs, SolrElementField.FIELD_ISDOCUMENTEDBY);
705
                    updatedSolrDocs = mergeUpdatedSolrDocs(solrDocsRemovedDocumentBy, solrDocsRemovedDocuments);
706
                }
707
                //move them to the final result
708
                if(updatedSolrDocs != null) {
709
                    for(SolrDoc updatedDoc: updatedSolrDocs) {
710
                        totalUpdatedSolrDocs.add(updatedDoc);
711
                    }
712
                }
713
                
714
            }
715

    
716
        }
717
        return totalUpdatedSolrDocs;
718
    }
719
    
720
    /*
721
     * Process the list of ids of the documentBy/documents in a slor doc.
722
     */
723
    private List<SolrDoc> removeAggregatedItems(String targetResourceMapId, SolrDoc doc,
724
            List<String> resourceMapIdsInDoc, List<String> aggregatedItemsInDoc, String fieldNameRemoved) {
725
        List<SolrDoc> updatedSolrDocs = new ArrayList<SolrDoc>();
726
        if (doc != null && resourceMapIdsInDoc != null && aggregatedItemsInDoc != null
727
                && fieldNameRemoved != null) {
728
            if (resourceMapIdsInDoc.size() == 1) {
729
                //only has one resource map. remove the resource map. also remove the documentBy
730
                doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, targetResourceMapId);
731
                doc.removeAllFields(fieldNameRemoved);
732
                updatedSolrDocs.add(doc);
733
            } else if (resourceMapIdsInDoc.size() > 1) {
734
                //we have multiple resource maps. We should match them.                     
735
                Map<String, String> ids = matchResourceMapsAndItems(doc.getIdentifier(),
736
                        targetResourceMapId, resourceMapIdsInDoc, aggregatedItemsInDoc, fieldNameRemoved);
737
                if (ids != null) {
738
                    for (String id : ids.keySet()) {
739
                        doc.removeFieldsWithValue(fieldNameRemoved, id);
740
                    }
741
                }
742
                doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
743
                        targetResourceMapId);
744
                updatedSolrDocs.add(doc);
745
                /*if (aggregatedItemsInDoc.size() > 1) {
746
                    
747

    
748
                } else {
749
                    //multiple resource map aggregate same metadata and data. Just remove the resource map
750
                    doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
751
                            targetResourceMapId);
752
                    updatedSolrDocs.add(doc);
753
                }*/
754
            }
755
        }
756
        return updatedSolrDocs;
757
    }
758

    
759
    /*
760
     * Return a map of mapping aggregation id map the target resourceMapId.
761
     * This will look the aggregation information in another side - If the targetId
762
     * is a metadata object, we will look the data objects which it describes; If 
763
     * the targetId is a data object, we will look the metadata object which documents it.
764
     */
765
    private Map<String, String> matchResourceMapsAndItems(String targetId,
766
            String targetResourceMapId, List<String> originalResourceMaps, List<String> aggregatedItems, String fieldName) {
767
        Map<String, String> map = new HashMap<String, String>();
768
        if (targetId != null && targetResourceMapId != null && aggregatedItems != null
769
                && fieldName != null) {
770
            String newFieldName = null;
771
            if (fieldName.equals(SolrElementField.FIELD_ISDOCUMENTEDBY)) {
772
                newFieldName = SolrElementField.FIELD_DOCUMENTS;
773
            } else if (fieldName.equals(SolrElementField.FIELD_DOCUMENTS)) {
774
                newFieldName = SolrElementField.FIELD_ISDOCUMENTEDBY;
775
            }
776
            if (newFieldName != null) {
777
                for (String item : aggregatedItems) {
778
                    SolrDoc doc = null;
779
                    try {
780
                        doc = getDocumentById(item);
781
                        List<String> fieldValues = doc.getAllFieldValues(newFieldName);
782
                        List<String> resourceMapIds = doc
783
                                .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
784
                        if ((fieldValues != null && fieldValues.contains(targetId))
785
                                && (resourceMapIds != null && resourceMapIds
786
                                        .contains(targetResourceMapId))) {
787
                            //okay, we found the target aggregation item id and the resource map id
788
                            //in this solr doc. However, we need check if another resource map with different
789
                            //id but specify the same relationship. If we have the id(s), we should not
790
                            // remove the documents( or documentBy) element since we need to preserve the 
791
                            // relationship for the remain resource map. 
792
                            boolean hasDuplicateIds = false;
793
                            if(originalResourceMaps != null) {
794
                               for(String id :resourceMapIds) {
795
                                    if (originalResourceMaps.contains(id) && !id.equals(targetResourceMapId)) {
796
                                        hasDuplicateIds = true;
797
                                        break;
798
                                    }
799
                                }
800
                            }
801
                            if(!hasDuplicateIds) {
802
                                map.put(item, targetResourceMapId);
803
                            }
804
                            
805
                        }
806
                    } catch (Exception e) {
807
                        log.warn("SolrIndex.matchResourceMapsAndItems - can't get the solrdoc for the id "
808
                                + item + " since " + e.getMessage());
809
                    }
810
                }
811
            }
812
        }
813
        return map;
814
    }
815

    
816
    /*
817
     * Get the solr index doc from the index server for the given id.
818
     */
819
    private SolrDoc getDocumentById(String id) throws NotImplemented, NotFound, UnsupportedType, 
820
                SolrServerException, ParserConfigurationException, SAXException, XPathExpressionException, IOException {
821
        SolrDoc doc = ResourceMapSubprocessor.getSolrDoc(id);
822
        return doc;
823
    }
824
    
825
    /*
826
     * Merge two list of updated solr docs. removedDocumentBy has the correct information about documentBy element.
827
     * removedDocuments has the correct information about the documents element.
828
     * So we go through the two list and found the two docs having the same identifier.
829
     * Get the list of the documents value from the one in the removedDoucments (1). 
830
     * Remove all values of documents from the one in the removedDocumentBy. 
831
     * Then copy the list of documents value from (1) to to the one in the removedDocumentBy.
832
     */
833
    private List<SolrDoc> mergeUpdatedSolrDocs(List<SolrDoc>removedDocumentBy, List<SolrDoc>removedDocuments) {
834
        List<SolrDoc> mergedDocuments = new ArrayList<SolrDoc>();
835
        if(removedDocumentBy == null || removedDocumentBy.isEmpty()) {
836
            mergedDocuments = removedDocuments;
837
        } else if (removedDocuments == null || removedDocuments.isEmpty()) {
838
            mergedDocuments = removedDocumentBy;
839
        } else {
840
            int sizeOfDocBy = removedDocumentBy.size();
841
            int sizeOfDocs = removedDocuments.size();
842
            for(int i=sizeOfDocBy-1; i>= 0; i--) {
843
                SolrDoc docInRemovedDocBy = removedDocumentBy.get(i);
844
                for(int j= sizeOfDocs-1; j>=0; j--) {
845
                    SolrDoc docInRemovedDocs = removedDocuments.get(j);
846
                    if(docInRemovedDocBy.getIdentifier().equals(docInRemovedDocs.getIdentifier())) {
847
                        //find the same doc in both list. let's merge them.
848
                        //first get all the documents element from the docWithDocs(it has the correct information about the documents element)
849
                        List<String> idsInDocuments = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
850
                        docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_DOCUMENTS);//clear out any documents element in docInRemovedDocBy
851
                        //add the Documents element from the docInRemovedDocs if it has any.
852
                        // The docInRemovedDocs has the correct information about the documentBy. Now it copied the correct information of the documents element.
853
                        // So docInRemovedDocs has both correct information about the documentBy and documents elements.
854
                        if(idsInDocuments != null) {
855
                            for(String id : idsInDocuments) {
856
                                if(id != null && !id.trim().equals("")) {
857
                                    docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_DOCUMENTS, id));
858
                                }
859
                                
860
                            }
861
                        }
862
                        //intersect the resource map ids.
863
                        List<String> resourceMapIdsInWithDocs = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
864
                        List<String> resourceMapIdsInWithDocBy = docInRemovedDocBy.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP);
865
                        docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_RESOURCEMAP);
866
                        Collection resourceMapIds = CollectionUtils.union(resourceMapIdsInWithDocs, resourceMapIdsInWithDocBy);
867
                        if(resourceMapIds != null) {
868
                            for(Object idObj : resourceMapIds) {
869
                                String id = (String)idObj;
870
                                docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_RESOURCEMAP, id));
871
                            }
872
                        }
873
                        //we don't need do anything about the documentBy elements since the docInRemovedDocBy has the correct information.
874
                        mergedDocuments.add(docInRemovedDocBy);
875
                        //delete the two documents from the list
876
                        removedDocumentBy.remove(i);
877
                        removedDocuments.remove(j);
878
                        break;
879
                    }
880
                    
881
                }
882
            }
883
            // when we get there, if the two lists are empty, this will be a perfect merge. However, if something are left. we 
884
            //just put them in.
885
            for(SolrDoc doc: removedDocumentBy) {
886
                mergedDocuments.add(doc);
887
            }
888
            for(SolrDoc doc: removedDocuments) {
889
                mergedDocuments.add(doc);
890
            }
891
        }
892
        return mergedDocuments;
893
    }
894
    
895

    
896
    /*
897
     * Remove a pid which is part of resource map.
898
     */
899
    private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException  {
900
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
901
        removeFromIndex(pid);
902
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
903

    
904
        List<String> documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
905
        for (String documentsValue : documents) {
906
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentsValue);
907
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY, pid);
908
            removeFromIndex(documentsValue);
909
            insertToIndex(solrDoc);
910
        }
911

    
912
        List<String> documentedBy = indexedDoc
913
                .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
914
        for (String documentedByValue : documentedBy) {
915
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentedByValue);
916
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_DOCUMENTS, pid);
917
            //docsToUpdate.add(solrDoc);
918
            removeFromIndex(documentedByValue);
919
            insertToIndex(solrDoc);
920
        }
921

    
922
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
923
        //httpService.sendUpdate(solrIndexUri, addCommand);
924
    }
925

    
926
    /*
927
     * Remove a pid from the solr index
928
     */
929
    private synchronized void removeFromIndex(String pid) throws SolrServerException, IOException {
930
        if(pid != null && !pid.trim().equals("")) {
931
            /*IndexEvent event = new IndexEvent();
932
            event.setDate(Calendar.getInstance().getTime());
933
            Identifier identifier = new Identifier();
934
            identifier.setValue(pid);
935
            event.setIdentifier(identifier);*/
936
            try {
937
                solrServer.deleteById(pid);
938
                solrServer.commit();
939
                /*event.setType(IndexEvent.SUCCESSDELETE);
940
                event.setDescription("Successfully remove the solr index for the id "+identifier.getValue());
941
                try {
942
                    EventlogFactory.createIndexEventLog().write(event);
943
                } catch (Exception e) {
944
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+e.getMessage());
945
                }*/
946
            } catch (SolrServerException e) {
947
                /*event.setAction(Event.DELETE);
948
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
949
                try {
950
                    EventlogFactory.createIndexEventLog().write(event);
951
                } catch (Exception ee) {
952
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
953
                }*/
954
                throw e;
955
                
956
            } catch (IOException e) {
957
                /*event.setAction(Event.DELETE);
958
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
959
                try {
960
                    EventlogFactory.createIndexEventLog().write(event);
961
                } catch (Exception ee) {
962
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
963
                }*/
964
                throw e;
965
            }
966
            
967
        }
968
    }
969

    
970
    /**
971
     * Get the solrServer
972
     * @return
973
     */
974
    public SolrServer getSolrServer() {
975
        return solrServer;
976
    }
977

    
978
    /**
979
     * Set the solrServer. 
980
     * @param solrServer
981
     */
982
    public void setSolrServer(SolrServer solrServer) {
983
        this.solrServer = solrServer;
984
    }
985
    
986
    /**
987
     * Get all indexed ids in the solr server. 
988
     * @return an empty list if there is no index.
989
     * @throws SolrServerException
990
     */
991
    public List<String> getSolrIds() throws SolrServerException {
992
        List<String> list = new ArrayList<String>();
993
        SolrQuery query = new SolrQuery(IDQUERY); 
994
        query.setRows(Integer.MAX_VALUE); 
995
        query.setFields(ID); 
996
        QueryResponse response = solrServer.query(query); 
997
        SolrDocumentList docs = response.getResults();
998
        if(docs != null) {
999
            for(SolrDocument doc :docs) {
1000
                String identifier = (String)doc.getFieldValue(ID);
1001
                //System.out.println("======================== "+identifier);
1002
                list.add(identifier);
1003
            }
1004
        }
1005
        return list;
1006
    }
1007
    
1008
    private void writeEventLog(SystemMetadata systemMetadata, Identifier pid, String error) {
1009
        IndexEvent event = new IndexEvent();
1010
        event.setIdentifier(pid);
1011
        event.setDate(Calendar.getInstance().getTime());
1012
        String action = null;
1013
        if (systemMetadata == null ) {
1014
            action = Event.CREATE.xmlValue();
1015
            event.setAction(Event.CREATE);
1016
        }
1017
        else if(systemMetadata.getArchived()) {
1018
            action = Event.DELETE.xmlValue();
1019
            event.setAction(Event.DELETE);
1020
        } else {
1021
            action = Event.CREATE.xmlValue();
1022
            event.setAction(Event.CREATE);
1023
        }
1024
        event.setDescription("Failed to "+action+"the solr index for the id "+pid.getValue()+" since "+error);
1025
        try {
1026
            EventlogFactory.createIndexEventLog().write(event);
1027
        } catch (Exception ee) {
1028
            log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
1029
        }
1030
    }
1031
}
(5-5/6)