Project

General

Profile

1 7542 tao
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28
29 7555 tao
import java.io.ByteArrayInputStream;
30 7786 tao
import java.io.FileNotFoundException;
31 7546 tao
import java.io.IOException;
32
import java.io.InputStream;
33
import java.util.ArrayList;
34 7800 tao
import java.util.Calendar;
35 7546 tao
import java.util.HashMap;
36
import java.util.Iterator;
37 7542 tao
import java.util.List;
38 7546 tao
import java.util.Map;
39
import java.util.Set;
40 7542 tao
41
import javax.xml.parsers.DocumentBuilder;
42
import javax.xml.parsers.DocumentBuilderFactory;
43 7546 tao
import javax.xml.parsers.ParserConfigurationException;
44 7542 tao
import javax.xml.xpath.XPath;
45 7546 tao
import javax.xml.xpath.XPathExpressionException;
46 7542 tao
import javax.xml.xpath.XPathFactory;
47
48 7546 tao
import org.apache.commons.codec.EncoderException;
49
import org.apache.commons.io.output.ByteArrayOutputStream;
50 7786 tao
import org.apache.commons.lang.StringUtils;
51 7546 tao
import org.apache.commons.logging.Log;
52
import org.apache.commons.logging.LogFactory;
53 7604 tao
import org.apache.solr.client.solrj.SolrQuery;
54 7542 tao
import org.apache.solr.client.solrj.SolrServer;
55 7546 tao
import org.apache.solr.client.solrj.SolrServerException;
56 7604 tao
import org.apache.solr.client.solrj.response.QueryResponse;
57 7547 tao
import org.apache.solr.client.solrj.response.UpdateResponse;
58 7604 tao
import org.apache.solr.common.SolrDocument;
59
import org.apache.solr.common.SolrDocumentList;
60 7546 tao
import org.apache.solr.common.SolrInputDocument;
61 7542 tao
import org.dataone.cn.indexer.XMLNamespaceConfig;
62
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
63 7546 tao
import org.dataone.cn.indexer.parser.SolrField;
64 7786 tao
import org.dataone.cn.indexer.resourcemap.ResourceEntry;
65
import org.dataone.cn.indexer.resourcemap.ResourceMap;
66 7546 tao
import org.dataone.cn.indexer.solrhttp.SolrDoc;
67
import org.dataone.cn.indexer.solrhttp.SolrElementField;
68 7733 tao
import org.dataone.service.exceptions.NotFound;
69
import org.dataone.service.exceptions.NotImplemented;
70 7786 tao
import org.dataone.service.exceptions.ServiceFailure;
71 7733 tao
import org.dataone.service.exceptions.UnsupportedType;
72 7815 leinfelder
import org.dataone.service.types.v1.Event;
73 7577 tao
import org.dataone.service.types.v1.Identifier;
74 7555 tao
import org.dataone.service.types.v1.SystemMetadata;
75
import org.dataone.service.util.TypeMarshaller;
76 8023 tao
import org.dspace.foresite.OREParserException;
77 7555 tao
import org.jibx.runtime.JiBXException;
78 7546 tao
import org.w3c.dom.Document;
79
import org.xml.sax.SAXException;
80 7542 tao
81 7828 leinfelder
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
82 7800 tao
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
83 7711 tao
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
84
85 7542 tao
/**
86
 * A class does insert, update and remove indexes to a SOLR server
87
 * @author tao
88
 *
89
 */
90
public class SolrIndex {
91 7591 leinfelder
92 7604 tao
    public static final String ID = "id";
93
    private static final String IDQUERY = ID+":*";
94 7542 tao
    private List<IDocumentSubprocessor> subprocessors = null;
95
    private SolrServer solrServer = null;
96
    private XMLNamespaceConfig xmlNamespaceConfig = null;
97 7546 tao
    private List<SolrField> sysmetaSolrFields = null;
98 7542 tao
99
    private static DocumentBuilderFactory documentBuilderFactory = null;
100
    private static DocumentBuilder builder = null;
101
102
    private static XPathFactory xpathFactory = null;
103
    private static XPath xpath = null;
104 7546 tao
    Log log = LogFactory.getLog(SolrIndex.class);
105 7542 tao
106 7546 tao
    static {
107
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
108
        documentBuilderFactory.setNamespaceAware(true);
109
        try {
110
            builder = documentBuilderFactory.newDocumentBuilder();
111
        } catch (ParserConfigurationException e) {
112
            e.printStackTrace();
113
        }
114
        xpathFactory = XPathFactory.newInstance();
115
        xpath = xpathFactory.newXPath();
116
    }
117
118 7542 tao
    /**
119
     * Constructor
120 7548 tao
     * @throws SAXException
121
     * @throws IOException
122 7542 tao
     */
123 7547 tao
    public SolrIndex(List<SolrField> sysmetaSolrFields, XMLNamespaceConfig xmlNamespaceConfig)
124 7548 tao
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
125
         this.xmlNamespaceConfig = xmlNamespaceConfig;
126
         this.sysmetaSolrFields = sysmetaSolrFields;
127
         init();
128 7542 tao
    }
129
130 7546 tao
    private void init() throws ParserConfigurationException, XPathExpressionException {
131
        xpath.setNamespaceContext(xmlNamespaceConfig);
132
        initExpressions();
133
    }
134
135
    private void initExpressions() throws XPathExpressionException {
136
        for (SolrField field : sysmetaSolrFields) {
137
            field.initExpression(xpath);
138
        }
139
140
    }
141 7542 tao
142 7546 tao
143 7542 tao
    /**
144
     * Get the list of the Subprocessors in this index.
145
     * @return the list of the Subprocessors.
146
     */
147
    public List<IDocumentSubprocessor> getSubprocessors() {
148
        return subprocessors;
149
    }
150
151
    /**
152
     * Set the list of Subprocessors.
153
     * @param subprocessorList  the list will be set.
154
     */
155
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
156 7546 tao
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
157 7542 tao
            subprocessor.initExpression(xpath);
158 7546 tao
        }
159 7542 tao
        this.subprocessors = subprocessorList;
160
    }
161 7546 tao
162
    /**
163
     * Generate the index for the given information
164
     * @param id
165 7555 tao
     * @param systemMetadata
166 7546 tao
     * @param dataStream
167
     * @return
168
     * @throws IOException
169
     * @throws SAXException
170
     * @throws ParserConfigurationException
171
     * @throws XPathExpressionException
172 7555 tao
     * @throws JiBXException
173 7711 tao
     * @throws SolrServerException
174 7546 tao
     * @throws EncoderException
175 7733 tao
     * @throws UnsupportedType
176
     * @throws NotFound
177
     * @throws NotImplemented
178 7546 tao
     */
179 7555 tao
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
180 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
181 7733 tao
                    XPathExpressionException, JiBXException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
182 7546 tao
183
        // Load the System Metadata document
184 7555 tao
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
185
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
186
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
187
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
188 7546 tao
        if (sysMetaDoc == null) {
189
            log.error("Could not load System metadata for ID: " + id);
190
            return null;
191
        }
192
193
        // Extract the field values from the System Metadata
194
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
195
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
196
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
197
        docs.put(id, indexDocument);
198
199
        // Determine if subprocessors are available for this ID
200
        if (subprocessors != null) {
201
                    // for each subprocessor loaded from the spring config
202
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
203
                        // Does this subprocessor apply?
204
                        if (subprocessor.canProcess(sysMetaDoc)) {
205
                            // if so, then extract the additional information from the
206
                            // document.
207
                            try {
208
                                // docObject = the resource map document or science
209
                                // metadata document.
210
                                // note that resource map processing touches all objects
211
                                // referenced by the resource map.
212
                                Document docObject = generateXmlDocument(dataStream);
213
                                if (docObject == null) {
214 7852 tao
                                    throw new Exception("Could not load OBJECT for ID " + id );
215 7546 tao
                                } else {
216
                                    docs = subprocessor.processDocument(id, docs, docObject);
217
                                }
218
                            } catch (Exception e) {
219
                                log.error(e.getStackTrace().toString());
220 7852 tao
                                throw new SolrServerException(e.getMessage());
221 7546 tao
                            }
222
                        }
223
                    }
224
       }
225
226
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
227
       // merge is only for resource map. We need more work here.
228
       for (SolrDoc mergeDoc : docs.values()) {
229
           if (!mergeDoc.isMerged()) {
230 7711 tao
                 mergeWithIndexedDocument(mergeDoc);
231 7546 tao
           }
232
       }
233
234
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
235
236
       return docs;
237
    }
238
239 7711 tao
    /**
240
     * Merge updates with existing solr documents
241
     *
242
     * This method appears to re-set the data package field data into the
243
     * document about to be updated in the solr index. Since packaging
244
     * information is derived from the package document (resource map), this
245
     * information is not present when processing a document contained in a data
246
     * package. This method replaces those values from the existing solr index
247
     * record for the document being processed. -- sroseboo, 1-18-12
248
     *
249
     * @param indexDocument
250
     * @return
251
     * @throws IOException
252
     * @throws EncoderException
253
     * @throws XPathExpressionException
254
     * @throws SAXException
255
     * @throws ParserConfigurationException
256
     * @throws SolrServerException
257 7733 tao
     * @throws UnsupportedType
258
     * @throws NotFound
259
     * @throws NotImplemented
260 7711 tao
     */
261
    // TODO:combine merge function with resourcemap merge function
262
263
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
264 7733 tao
            EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
265 7711 tao
        List<String> ids = new ArrayList<String>();
266
        ids.add(indexDocument.getIdentifier());
267
        List<SolrDoc> indexedDocuments = ResourceMapSubprocessor.getSolrDocs(ids);
268
        SolrDoc indexedDocument = indexedDocuments == null || indexedDocuments.size() <= 0 ? null
269
                : indexedDocuments.get(0);
270
        if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
271
            return indexDocument;
272
        } else {
273
            for (SolrElementField field : indexedDocument.getFieldList()) {
274
                if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY)
275
                        || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field
276
                        .getName().equals(SolrElementField.FIELD_RESOURCEMAP))
277
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
278
                    indexDocument.addField(field);
279
                }
280
            }
281
282
            indexDocument.setMerged(true);
283
            return indexDocument;
284
        }
285
    }
286
287 7546 tao
    /*
288
     * Generate a Document from the InputStream
289
     */
290
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
291
        Document doc = null;
292
293
        try {
294
            doc = builder.parse(smdStream);
295
        } catch (IOException e) {
296
            log.error(e.getMessage(), e);
297
        }
298
299
        return doc;
300
    }
301
302
    /*
303
     * Index the fields of the system metadata
304
     */
305
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
306
307
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
308
        // solrFields is the list of fields defined in the application context
309
310
        for (SolrField field : sysmetaSolrFields) {
311
            try {
312
                // the field.getFields method can return a single value or
313
                // multiple values for multi-valued fields
314
                // or can return multiple SOLR document fields.
315
                fieldList.addAll(field.getFields(doc, identifier));
316
            } catch (Exception e) {
317
                e.printStackTrace();
318
            }
319
        }
320
        return fieldList;
321
322
    }
323
324
    /**
325 7577 tao
     * Check the parameters of the insert or update methods.
326
     * @param pid
327
     * @param systemMetadata
328
     * @param data
329
     * @throws SolrServerException
330
     */
331
    private void checkParams(String pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException {
332
        if(pid == null || pid.trim().equals("")) {
333
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
334
        }
335
        if(systemMetadata == null) {
336
            throw new SolrServerException("The system metadata of the indexed document should not be null.");
337
        }
338
        if(data == null) {
339
            throw new SolrServerException("The indexed document itself should not be null.");
340
        }
341
    }
342
343
    /**
344 7627 tao
     * Insert the indexes for a document.
345 7546 tao
     * @param pid  the id of this document
346
     * @param systemMetadata  the system metadata associated with the data object
347
     * @param data  the data object itself
348
     * @throws SolrServerException
349 7555 tao
     * @throws JiBXException
350 7711 tao
     * @throws EncoderException
351 7733 tao
     * @throws UnsupportedType
352
     * @throws NotFound
353
     * @throws NotImplemented
354 7546 tao
     */
355 7682 tao
    private synchronized void insert(String pid, SystemMetadata systemMetadata, InputStream data)
356 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
357 7733 tao
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType {
358 7577 tao
        checkParams(pid, systemMetadata, data);
359 7546 tao
        Map<String, SolrDoc> docs = process(pid, systemMetadata, data);
360
361
        //transform the Map to the SolrInputDocument which can be used by the solr server
362
        if(docs != null) {
363
            Set<String> ids = docs.keySet();
364
            for(String id : ids) {
365
                if(id != null) {
366
                    SolrDoc doc = docs.get(id);
367 7786 tao
                    insertToIndex(doc);
368
                }
369
370
            }
371
        }
372
    }
373
374
    /*
375
     * Insert a SolrDoc to the solr server.
376
     */
377
    private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
378
        if(doc != null ) {
379
            SolrInputDocument solrDoc = new SolrInputDocument();
380
            List<SolrElementField> list = doc.getFieldList();
381
            if(list != null) {
382
                //solrDoc.addField(METACATPIDFIELD, pid);
383
                Iterator<SolrElementField> iterator = list.iterator();
384
                while (iterator.hasNext()) {
385
                    SolrElementField field = iterator.next();
386
                    if(field != null) {
387
                        String value = field.getValue();
388
                        String name = field.getName();
389
                        //System.out.println("add name/value pair - "+name+"/"+value);
390
                        solrDoc.addField(name, value);
391 7546 tao
                    }
392
                }
393
            }
394 7786 tao
            if(!solrDoc.isEmpty()) {
395 7856 tao
                /*IndexEvent event = new IndexEvent();
396 7800 tao
                event.setDate(Calendar.getInstance().getTime());
397
                Identifier pid = new Identifier();
398
                pid.setValue(doc.getIdentifier());
399 7856 tao
                event.setIdentifier(pid);*/
400 7800 tao
                try {
401 7801 tao
                    UpdateResponse response = solrServer.add(solrDoc);
402
                    solrServer.commit();
403 7805 tao
                    /*event.setType(IndexEvent.SUCCESSINSERT);
404 7801 tao
                    event.setDescription("Successfully insert the solr index for the id "+pid.getValue());
405
                    try {
406
                        EventlogFactory.createIndexEventLog().write(event);
407
                    } catch (Exception e) {
408 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+e.getMessage());
409 7805 tao
                    }*/
410 7801 tao
                } catch (SolrServerException e) {
411 7856 tao
                    /*event.setAction(Event.CREATE);
412 7801 tao
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
413
                    try {
414
                        EventlogFactory.createIndexEventLog().write(event);
415
                    } catch (Exception ee) {
416 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
417 7856 tao
                    }*/
418 7801 tao
                    throw e;
419
                } catch (IOException e) {
420 7856 tao
                    /*event.setAction(Event.CREATE);
421 7801 tao
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
422
                    try {
423
                        EventlogFactory.createIndexEventLog().write(event);
424
                    } catch (Exception ee) {
425 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
426 7856 tao
                    }*/
427 7801 tao
                    throw e;
428
429 7800 tao
                }
430 7786 tao
                //System.out.println("=================the response is:\n"+response.toString());
431
            }
432 7546 tao
        }
433
    }
434 7577 tao
435
    /**
436 7627 tao
     * Update the solr index. This method handles the three scenarios:
437
     * 1. Archive (or delete) - if the the system metadata shows the value of the archive is true,
438
     *    remove the index for the document and its previous versions if it has.
439
     * 2. Update an existing doc - if the the system metadata shows the value of the archive is false and it has an obsoletes,
440
     *    remove the index for the previous version(s) and generate new index for the doc.
441
     * 3. Add a new doc - if the system metadata shows the value of the archive is false and it hasn't an obsoletes, generate the
442
     *    index for the doc.
443
     * @param pid  the id of the document
444 7603 tao
     * @param obsoleteIds  the chain of the obsoletes by this id
445 7577 tao
     * @param systemMetadata  the system metadata associated with the data object
446
     * @param data  the data object itself
447
     * @throws SolrServerException
448
     * @throws JiBXException
449 7711 tao
     * @throws EncoderException
450 7733 tao
     * @throws UnsupportedType
451
     * @throws NotFound
452
     * @throws NotImplemented
453 7786 tao
     * @throws ServiceFailure
454 8023 tao
     * @throws OREParserException
455 7577 tao
     */
456 7627 tao
    public void update(String pid, List<String> obsoleteIds, SystemMetadata systemMetadata, InputStream data)
457 7577 tao
                    throws IOException, SAXException, ParserConfigurationException,
458 8023 tao
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType, ServiceFailure, OREParserException {
459 7627 tao
        checkParams(pid, systemMetadata, data);
460
        boolean isArchive = systemMetadata.getArchived();
461 7877 tao
        if(isArchive || systemMetadata.getObsoletedBy() != null) {
462 7627 tao
            //archive(delete)
463
            Identifier obsolete = systemMetadata.getObsoletes();
464
            if(obsolete != null) {
465
                removeObsoletesChain(obsolete.getValue(), obsoleteIds);
466
            }
467
            remove(pid);
468 7683 tao
            log.info("============================= archive the idex for the identifier "+pid);
469 7627 tao
        } else {
470
            Identifier obsolete = systemMetadata.getObsoletes();
471
            if(obsolete != null) {
472
                removeObsoletesChain(obsolete.getValue(), obsoleteIds);
473
            }
474
            //generate index for either add or update.
475
            insert(pid, systemMetadata, data);
476 7683 tao
            log.info("============================= insert index for the identifier "+pid);
477 7577 tao
        }
478
    }
479 7603 tao
480 7627 tao
481 8023 tao
    private void removeObsoletesChain(String obsoleteId, List<String> obsoleteIdChain) throws SolrServerException, IOException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
482 7627 tao
        if(obsoleteId != null && !obsoleteId.trim().equals("")) {
483
            if(obsoleteIdChain == null || obsoleteIdChain.isEmpty()) {
484
                throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsoletes chain can't be null or empty since the system metadata already has the obsoletes element.");
485
            }
486
            if(!obsoleteIdChain.contains(obsoleteId)) {
487
                throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsoletes elment in the system metadata is not in the obsoleteId chain");
488
            }
489
            remove(obsoleteIdChain);
490
        } else {
491
            throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsolete id should be null.");
492
        }
493
    }
494
495 7603 tao
    /**
496
     * Remove all the indexes associated with the pids in the list.
497
     * @param pidList
498
     * @throws IOException
499
     * @throws SolrServerException
500 7786 tao
     * @throws ParserConfigurationException
501
     * @throws SAXException
502
     * @throws UnsupportedType
503
     * @throws NotFound
504
     * @throws NotImplemented
505
     * @throws XPathExpressionException
506
     * @throws ServiceFailure
507 8023 tao
     * @throws OREParserException
508 7603 tao
     */
509 8023 tao
    private void remove(List<String> pidList) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
510 7603 tao
        if(pidList != null) {
511
            for(String id : pidList) {
512
                remove(id);
513
            }
514
        }
515
    }
516 7547 tao
517
    /**
518
     * Remove the indexed associated with specified pid.
519
     * @param pid  the pid which the indexes are associated with
520
     * @throws IOException
521
     * @throws SolrServerException
522 7786 tao
     * @throws ParserConfigurationException
523
     * @throws SAXException
524
     * @throws UnsupportedType
525
     * @throws NotFound
526
     * @throws NotImplemented
527
     * @throws XPathExpressionException
528
     * @throws ServiceFailure
529 8023 tao
     * @throws OREParserException
530 7547 tao
     */
531 8023 tao
    public void remove(String pid) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
532 7786 tao
        if (isDataPackage(pid)) {
533
            removeDataPackage(pid);
534
        } else if (isPartOfDataPackage(pid)) {
535
            removeFromDataPackage(pid);
536
        } else {
537
            removeFromIndex(pid);
538
        }
539
    }
540
541
542
543
544
    /*
545
     * Remove a resource map pid
546
     */
547 8023 tao
    private void removeDataPackage(String pid) throws ServiceFailure, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, OREParserException  {
548 7786 tao
        Document resourceMapDoc = generateXmlDocument(DistributedMapsFactory.getDataObject(pid));
549 8023 tao
        //ResourceMap resourceMap = new ResourceMap(resourceMapDoc);
550 8069 leinfelder
        ResourceMap resourceMap = new ResourceMap(resourceMapDoc);
551 7786 tao
        List<String> documentIds = resourceMap.getAllDocumentIDs();
552
        List<SolrDoc> indexDocuments =ResourceMapSubprocessor.getSolrDocs(documentIds);
553
        removeFromIndex(pid);
554
        //List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
555
        // for each document in data package:
556
        for (SolrDoc indexDoc : indexDocuments) {
557
558
            if (indexDoc.getIdentifier().equals(pid)) {
559
                continue; // skipping the resource map, no need update
560
                          // it.
561
                          // will
562
                          // be removed.
563
            }
564
565
            // Remove resourceMap reference
566
            indexDoc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
567
                    resourceMap.getIdentifier());
568
569
            // // Remove documents/documentedby values for this resource
570
            // map
571
            for (ResourceEntry entry : resourceMap.getMappedReferences()) {
572
                if (indexDoc.getIdentifier().equals(entry.getIdentifier())) {
573
                    for (String documentedBy : entry.getDocumentedBy()) {
574
                        // Using removeOneFieldWithValue in-case same
575
                        // documents
576
                        // are in more than one data package. just
577
                        // remove
578
                        // one
579
                        // instance of data package info.
580
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY,
581
                                documentedBy);
582
                    }
583
                    for (String documents : entry.getDocuments()) {
584
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_DOCUMENTS,
585
                                documents);
586
                    }
587
                    break;
588
                }
589
            }
590
            removeFromIndex(indexDoc.getIdentifier());
591
            insertToIndex(indexDoc);
592
            //docsToUpdate.add(indexDoc);
593
        }
594
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
595
        //httpService.sendUpdate(solrIndexUri, addCommand);
596
    }
597
598
    private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException  {
599
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
600
        removeFromIndex(pid);
601
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
602
603
        List<String> documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
604
        for (String documentsValue : documents) {
605
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentsValue);
606
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY, pid);
607
            removeFromIndex(documentsValue);
608
            insertToIndex(solrDoc);
609
        }
610
611
        List<String> documentedBy = indexedDoc
612
                .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
613
        for (String documentedByValue : documentedBy) {
614
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentedByValue);
615
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_DOCUMENTS, documentedByValue);
616
            //docsToUpdate.add(solrDoc);
617
            removeFromIndex(documentedByValue);
618
            insertToIndex(solrDoc);
619
        }
620
621
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
622
        //httpService.sendUpdate(solrIndexUri, addCommand);
623
    }
624
625
    /*
626
     * Remove a pid from the solr index
627
     */
628
    private void removeFromIndex(String pid) throws SolrServerException, IOException {
629 7627 tao
        if(pid != null && !pid.trim().equals("")) {
630 7856 tao
            /*IndexEvent event = new IndexEvent();
631 7800 tao
            event.setDate(Calendar.getInstance().getTime());
632
            Identifier identifier = new Identifier();
633
            identifier.setValue(pid);
634 7856 tao
            event.setIdentifier(identifier);*/
635 7800 tao
            try {
636 7801 tao
                solrServer.deleteById(pid);
637
                solrServer.commit();
638 7805 tao
                /*event.setType(IndexEvent.SUCCESSDELETE);
639 7801 tao
                event.setDescription("Successfully remove the solr index for the id "+identifier.getValue());
640
                try {
641
                    EventlogFactory.createIndexEventLog().write(event);
642
                } catch (Exception e) {
643 7802 tao
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+e.getMessage());
644 7805 tao
                }*/
645 7801 tao
            } catch (SolrServerException e) {
646 7856 tao
                /*event.setAction(Event.DELETE);
647 7801 tao
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
648
                try {
649
                    EventlogFactory.createIndexEventLog().write(event);
650
                } catch (Exception ee) {
651 7802 tao
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
652 7856 tao
                }*/
653 7801 tao
                throw e;
654
655
            } catch (IOException e) {
656 7856 tao
                /*event.setAction(Event.DELETE);
657 7801 tao
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
658
                try {
659
                    EventlogFactory.createIndexEventLog().write(event);
660
                } catch (Exception ee) {
661 7802 tao
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
662 7856 tao
                }*/
663 7801 tao
                throw e;
664 7800 tao
            }
665 7801 tao
666 7627 tao
        }
667 7547 tao
    }
668 7569 tao
669 7786 tao
    /*
670
     * Is the pid a resource map
671
     */
672
    private boolean isDataPackage(String pid) throws FileNotFoundException, ServiceFailure {
673
        boolean isDataPackage = false;
674
        SystemMetadata sysmeta = DistributedMapsFactory.getSystemMetadata(pid);
675
        if(sysmeta != null) {
676
            isDataPackage = IndexGenerator.isResourceMap(sysmeta.getFormatId());
677
        }
678
        return isDataPackage;
679
    }
680
681
    private boolean isPartOfDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
682
        SolrDoc dataPackageIndexDoc = ResourceMapSubprocessor.getSolrDoc(pid);
683
        if (dataPackageIndexDoc != null) {
684
            String resourceMapId = dataPackageIndexDoc
685
                    .getFirstFieldValue(SolrElementField.FIELD_RESOURCEMAP);
686
            return StringUtils.isNotEmpty(resourceMapId);
687
        } else {
688
            return false;
689
        }
690
    }
691
692 7569 tao
    /**
693
     * Get the solrServer
694
     * @return
695
     */
696 7604 tao
    public SolrServer getSolrServer() {
697 7569 tao
        return solrServer;
698
    }
699
700
    /**
701 7604 tao
     * Set the solrServer.
702 7569 tao
     * @param solrServer
703
     */
704 7604 tao
    public void setSolrServer(SolrServer solrServer) {
705 7569 tao
        this.solrServer = solrServer;
706
    }
707 7604 tao
708
    /**
709 7606 tao
     * Get all indexed ids in the solr server.
710
     * @return an empty list if there is no index.
711 7604 tao
     * @throws SolrServerException
712
     */
713
    public List<String> getSolrIds() throws SolrServerException {
714
        List<String> list = new ArrayList<String>();
715
        SolrQuery query = new SolrQuery(IDQUERY);
716
        query.setRows(Integer.MAX_VALUE);
717
        query.setFields(ID);
718
        QueryResponse response = solrServer.query(query);
719
        SolrDocumentList docs = response.getResults();
720
        if(docs != null) {
721
            for(SolrDocument doc :docs) {
722
                String identifier = (String)doc.getFieldValue(ID);
723
                //System.out.println("======================== "+identifier);
724
                list.add(identifier);
725
            }
726
        }
727
        return list;
728
    }
729 7542 tao
}