Project

General

Profile

1 7542 tao
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28
29 7555 tao
import java.io.ByteArrayInputStream;
30 7786 tao
import java.io.FileNotFoundException;
31 7546 tao
import java.io.IOException;
32
import java.io.InputStream;
33
import java.util.ArrayList;
34 7800 tao
import java.util.Calendar;
35 7546 tao
import java.util.HashMap;
36
import java.util.Iterator;
37 7542 tao
import java.util.List;
38 7546 tao
import java.util.Map;
39
import java.util.Set;
40 7542 tao
41
import javax.xml.parsers.DocumentBuilder;
42
import javax.xml.parsers.DocumentBuilderFactory;
43 7546 tao
import javax.xml.parsers.ParserConfigurationException;
44 7542 tao
import javax.xml.xpath.XPath;
45 7546 tao
import javax.xml.xpath.XPathExpressionException;
46 7542 tao
import javax.xml.xpath.XPathFactory;
47
48 7546 tao
import org.apache.commons.codec.EncoderException;
49
import org.apache.commons.io.output.ByteArrayOutputStream;
50 7786 tao
import org.apache.commons.lang.StringUtils;
51 7546 tao
import org.apache.commons.logging.Log;
52
import org.apache.commons.logging.LogFactory;
53 7604 tao
import org.apache.solr.client.solrj.SolrQuery;
54 7542 tao
import org.apache.solr.client.solrj.SolrServer;
55 7546 tao
import org.apache.solr.client.solrj.SolrServerException;
56 7604 tao
import org.apache.solr.client.solrj.response.QueryResponse;
57 7547 tao
import org.apache.solr.client.solrj.response.UpdateResponse;
58 7604 tao
import org.apache.solr.common.SolrDocument;
59
import org.apache.solr.common.SolrDocumentList;
60 7546 tao
import org.apache.solr.common.SolrInputDocument;
61 7542 tao
import org.dataone.cn.indexer.XMLNamespaceConfig;
62
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
63 7546 tao
import org.dataone.cn.indexer.parser.SolrField;
64 7786 tao
import org.dataone.cn.indexer.resourcemap.ResourceEntry;
65
import org.dataone.cn.indexer.resourcemap.ResourceMap;
66 8023 tao
import org.dataone.cn.indexer.resourcemap.ResourceMapFactory;
67 7546 tao
import org.dataone.cn.indexer.solrhttp.SolrDoc;
68
import org.dataone.cn.indexer.solrhttp.SolrElementField;
69 7733 tao
import org.dataone.service.exceptions.NotFound;
70
import org.dataone.service.exceptions.NotImplemented;
71 7786 tao
import org.dataone.service.exceptions.ServiceFailure;
72 7733 tao
import org.dataone.service.exceptions.UnsupportedType;
73 7815 leinfelder
import org.dataone.service.types.v1.Event;
74 7577 tao
import org.dataone.service.types.v1.Identifier;
75 7555 tao
import org.dataone.service.types.v1.SystemMetadata;
76
import org.dataone.service.util.TypeMarshaller;
77 8023 tao
import org.dspace.foresite.OREParserException;
78 7555 tao
import org.jibx.runtime.JiBXException;
79 7546 tao
import org.w3c.dom.Document;
80
import org.xml.sax.SAXException;
81 7542 tao
82 7828 leinfelder
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
83 7800 tao
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
84 7711 tao
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
85
86 7542 tao
/**
87
 * A class does insert, update and remove indexes to a SOLR server
88
 * @author tao
89
 *
90
 */
91
public class SolrIndex {
92 7591 leinfelder
93 7604 tao
    public static final String ID = "id";
94
    private static final String IDQUERY = ID+":*";
95 7542 tao
    private List<IDocumentSubprocessor> subprocessors = null;
96
    private SolrServer solrServer = null;
97
    private XMLNamespaceConfig xmlNamespaceConfig = null;
98 7546 tao
    private List<SolrField> sysmetaSolrFields = null;
99 7542 tao
100
    private static DocumentBuilderFactory documentBuilderFactory = null;
101
    private static DocumentBuilder builder = null;
102
103
    private static XPathFactory xpathFactory = null;
104
    private static XPath xpath = null;
105 7546 tao
    Log log = LogFactory.getLog(SolrIndex.class);
106 7542 tao
107 7546 tao
    static {
108
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
109
        documentBuilderFactory.setNamespaceAware(true);
110
        try {
111
            builder = documentBuilderFactory.newDocumentBuilder();
112
        } catch (ParserConfigurationException e) {
113
            e.printStackTrace();
114
        }
115
        xpathFactory = XPathFactory.newInstance();
116
        xpath = xpathFactory.newXPath();
117
    }
118
119 7542 tao
    /**
120
     * Constructor
121 7548 tao
     * @throws SAXException
122
     * @throws IOException
123 7542 tao
     */
124 7547 tao
    public SolrIndex(List<SolrField> sysmetaSolrFields, XMLNamespaceConfig xmlNamespaceConfig)
125 7548 tao
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
126
         this.xmlNamespaceConfig = xmlNamespaceConfig;
127
         this.sysmetaSolrFields = sysmetaSolrFields;
128
         init();
129 7542 tao
    }
130
131 7546 tao
    private void init() throws ParserConfigurationException, XPathExpressionException {
132
        xpath.setNamespaceContext(xmlNamespaceConfig);
133
        initExpressions();
134
    }
135
136
    private void initExpressions() throws XPathExpressionException {
137
        for (SolrField field : sysmetaSolrFields) {
138
            field.initExpression(xpath);
139
        }
140
141
    }
142 7542 tao
143 7546 tao
144 7542 tao
    /**
145
     * Get the list of the Subprocessors in this index.
146
     * @return the list of the Subprocessors.
147
     */
148
    public List<IDocumentSubprocessor> getSubprocessors() {
149
        return subprocessors;
150
    }
151
152
    /**
153
     * Set the list of Subprocessors.
154
     * @param subprocessorList  the list will be set.
155
     */
156
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
157 7546 tao
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
158 7542 tao
            subprocessor.initExpression(xpath);
159 7546 tao
        }
160 7542 tao
        this.subprocessors = subprocessorList;
161
    }
162 7546 tao
163
    /**
164
     * Generate the index for the given information
165
     * @param id
166 7555 tao
     * @param systemMetadata
167 7546 tao
     * @param dataStream
168
     * @return
169
     * @throws IOException
170
     * @throws SAXException
171
     * @throws ParserConfigurationException
172
     * @throws XPathExpressionException
173 7555 tao
     * @throws JiBXException
174 7711 tao
     * @throws SolrServerException
175 7546 tao
     * @throws EncoderException
176 7733 tao
     * @throws UnsupportedType
177
     * @throws NotFound
178
     * @throws NotImplemented
179 7546 tao
     */
180 7555 tao
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
181 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
182 7733 tao
                    XPathExpressionException, JiBXException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
183 7546 tao
184
        // Load the System Metadata document
185 7555 tao
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
186
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
187
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
188
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
189 7546 tao
        if (sysMetaDoc == null) {
190
            log.error("Could not load System metadata for ID: " + id);
191
            return null;
192
        }
193
194
        // Extract the field values from the System Metadata
195
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
196
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
197
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
198
        docs.put(id, indexDocument);
199
200
        // Determine if subprocessors are available for this ID
201
        if (subprocessors != null) {
202
                    // for each subprocessor loaded from the spring config
203
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
204
                        // Does this subprocessor apply?
205
                        if (subprocessor.canProcess(sysMetaDoc)) {
206
                            // if so, then extract the additional information from the
207
                            // document.
208
                            try {
209
                                // docObject = the resource map document or science
210
                                // metadata document.
211
                                // note that resource map processing touches all objects
212
                                // referenced by the resource map.
213
                                Document docObject = generateXmlDocument(dataStream);
214
                                if (docObject == null) {
215 7852 tao
                                    throw new Exception("Could not load OBJECT for ID " + id );
216 7546 tao
                                } else {
217
                                    docs = subprocessor.processDocument(id, docs, docObject);
218
                                }
219
                            } catch (Exception e) {
220
                                log.error(e.getStackTrace().toString());
221 7852 tao
                                throw new SolrServerException(e.getMessage());
222 7546 tao
                            }
223
                        }
224
                    }
225
       }
226
227
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
228
       // merge is only for resource map. We need more work here.
229
       for (SolrDoc mergeDoc : docs.values()) {
230
           if (!mergeDoc.isMerged()) {
231 7711 tao
                 mergeWithIndexedDocument(mergeDoc);
232 7546 tao
           }
233
       }
234
235
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
236
237
       return docs;
238
    }
239
240 7711 tao
    /**
241
     * Merge updates with existing solr documents
242
     *
243
     * This method appears to re-set the data package field data into the
244
     * document about to be updated in the solr index. Since packaging
245
     * information is derived from the package document (resource map), this
246
     * information is not present when processing a document contained in a data
247
     * package. This method replaces those values from the existing solr index
248
     * record for the document being processed. -- sroseboo, 1-18-12
249
     *
250
     * @param indexDocument
251
     * @return
252
     * @throws IOException
253
     * @throws EncoderException
254
     * @throws XPathExpressionException
255
     * @throws SAXException
256
     * @throws ParserConfigurationException
257
     * @throws SolrServerException
258 7733 tao
     * @throws UnsupportedType
259
     * @throws NotFound
260
     * @throws NotImplemented
261 7711 tao
     */
262
    // TODO:combine merge function with resourcemap merge function
263
264
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
265 7733 tao
            EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
266 7711 tao
        List<String> ids = new ArrayList<String>();
267
        ids.add(indexDocument.getIdentifier());
268
        List<SolrDoc> indexedDocuments = ResourceMapSubprocessor.getSolrDocs(ids);
269
        SolrDoc indexedDocument = indexedDocuments == null || indexedDocuments.size() <= 0 ? null
270
                : indexedDocuments.get(0);
271
        if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
272
            return indexDocument;
273
        } else {
274
            for (SolrElementField field : indexedDocument.getFieldList()) {
275
                if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY)
276
                        || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field
277
                        .getName().equals(SolrElementField.FIELD_RESOURCEMAP))
278
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
279
                    indexDocument.addField(field);
280
                }
281
            }
282
283
            indexDocument.setMerged(true);
284
            return indexDocument;
285
        }
286
    }
287
288 7546 tao
    /*
289
     * Generate a Document from the InputStream
290
     */
291
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
292
        Document doc = null;
293
294
        try {
295
            doc = builder.parse(smdStream);
296
        } catch (IOException e) {
297
            log.error(e.getMessage(), e);
298
        }
299
300
        return doc;
301
    }
302
303
    /*
304
     * Index the fields of the system metadata
305
     */
306
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
307
308
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
309
        // solrFields is the list of fields defined in the application context
310
311
        for (SolrField field : sysmetaSolrFields) {
312
            try {
313
                // the field.getFields method can return a single value or
314
                // multiple values for multi-valued fields
315
                // or can return multiple SOLR document fields.
316
                fieldList.addAll(field.getFields(doc, identifier));
317
            } catch (Exception e) {
318
                e.printStackTrace();
319
            }
320
        }
321
        return fieldList;
322
323
    }
324
325
    /**
326 7577 tao
     * Check the parameters of the insert or update methods.
327
     * @param pid
328
     * @param systemMetadata
329
     * @param data
330
     * @throws SolrServerException
331
     */
332
    private void checkParams(String pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException {
333
        if(pid == null || pid.trim().equals("")) {
334
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
335
        }
336
        if(systemMetadata == null) {
337
            throw new SolrServerException("The system metadata of the indexed document should not be null.");
338
        }
339
        if(data == null) {
340
            throw new SolrServerException("The indexed document itself should not be null.");
341
        }
342
    }
343
344
    /**
345 7627 tao
     * Insert the indexes for a document.
346 7546 tao
     * @param pid  the id of this document
347
     * @param systemMetadata  the system metadata associated with the data object
348
     * @param data  the data object itself
349
     * @throws SolrServerException
350 7555 tao
     * @throws JiBXException
351 7711 tao
     * @throws EncoderException
352 7733 tao
     * @throws UnsupportedType
353
     * @throws NotFound
354
     * @throws NotImplemented
355 7546 tao
     */
356 7682 tao
    private synchronized void insert(String pid, SystemMetadata systemMetadata, InputStream data)
357 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
358 7733 tao
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType {
359 7577 tao
        checkParams(pid, systemMetadata, data);
360 7546 tao
        Map<String, SolrDoc> docs = process(pid, systemMetadata, data);
361
362
        //transform the Map to the SolrInputDocument which can be used by the solr server
363
        if(docs != null) {
364
            Set<String> ids = docs.keySet();
365
            for(String id : ids) {
366
                if(id != null) {
367
                    SolrDoc doc = docs.get(id);
368 7786 tao
                    insertToIndex(doc);
369
                }
370
371
            }
372
        }
373
    }
374
375
    /*
376
     * Insert a SolrDoc to the solr server.
377
     */
378
    private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
379
        if(doc != null ) {
380
            SolrInputDocument solrDoc = new SolrInputDocument();
381
            List<SolrElementField> list = doc.getFieldList();
382
            if(list != null) {
383
                //solrDoc.addField(METACATPIDFIELD, pid);
384
                Iterator<SolrElementField> iterator = list.iterator();
385
                while (iterator.hasNext()) {
386
                    SolrElementField field = iterator.next();
387
                    if(field != null) {
388
                        String value = field.getValue();
389
                        String name = field.getName();
390
                        //System.out.println("add name/value pair - "+name+"/"+value);
391
                        solrDoc.addField(name, value);
392 7546 tao
                    }
393
                }
394
            }
395 7786 tao
            if(!solrDoc.isEmpty()) {
396 7856 tao
                /*IndexEvent event = new IndexEvent();
397 7800 tao
                event.setDate(Calendar.getInstance().getTime());
398
                Identifier pid = new Identifier();
399
                pid.setValue(doc.getIdentifier());
400 7856 tao
                event.setIdentifier(pid);*/
401 7800 tao
                try {
402 7801 tao
                    UpdateResponse response = solrServer.add(solrDoc);
403
                    solrServer.commit();
404 7805 tao
                    /*event.setType(IndexEvent.SUCCESSINSERT);
405 7801 tao
                    event.setDescription("Successfully insert the solr index for the id "+pid.getValue());
406
                    try {
407
                        EventlogFactory.createIndexEventLog().write(event);
408
                    } catch (Exception e) {
409 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+e.getMessage());
410 7805 tao
                    }*/
411 7801 tao
                } catch (SolrServerException e) {
412 7856 tao
                    /*event.setAction(Event.CREATE);
413 7801 tao
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
414
                    try {
415
                        EventlogFactory.createIndexEventLog().write(event);
416
                    } catch (Exception ee) {
417 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
418 7856 tao
                    }*/
419 7801 tao
                    throw e;
420
                } catch (IOException e) {
421 7856 tao
                    /*event.setAction(Event.CREATE);
422 7801 tao
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
423
                    try {
424
                        EventlogFactory.createIndexEventLog().write(event);
425
                    } catch (Exception ee) {
426 7802 tao
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
427 7856 tao
                    }*/
428 7801 tao
                    throw e;
429
430 7800 tao
                }
431 7786 tao
                //System.out.println("=================the response is:\n"+response.toString());
432
            }
433 7546 tao
        }
434
    }
435 7577 tao
436
    /**
437 7627 tao
     * Update the solr index. This method handles the three scenarios:
438
     * 1. Archive (or delete) - if the the system metadata shows the value of the archive is true,
439
     *    remove the index for the document and its previous versions if it has.
440
     * 2. Update an existing doc - if the the system metadata shows the value of the archive is false and it has an obsoletes,
441
     *    remove the index for the previous version(s) and generate new index for the doc.
442
     * 3. Add a new doc - if the system metadata shows the value of the archive is false and it hasn't an obsoletes, generate the
443
     *    index for the doc.
444
     * @param pid  the id of the document
445 7603 tao
     * @param obsoleteIds  the chain of the obsoletes by this id
446 7577 tao
     * @param systemMetadata  the system metadata associated with the data object
447
     * @param data  the data object itself
448
     * @throws SolrServerException
449
     * @throws JiBXException
450 7711 tao
     * @throws EncoderException
451 7733 tao
     * @throws UnsupportedType
452
     * @throws NotFound
453
     * @throws NotImplemented
454 7786 tao
     * @throws ServiceFailure
455 8023 tao
     * @throws OREParserException
456 7577 tao
     */
457 7627 tao
    public void update(String pid, List<String> obsoleteIds, SystemMetadata systemMetadata, InputStream data)
458 7577 tao
                    throws IOException, SAXException, ParserConfigurationException,
459 8023 tao
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType, ServiceFailure, OREParserException {
460 7627 tao
        checkParams(pid, systemMetadata, data);
461
        boolean isArchive = systemMetadata.getArchived();
462 7877 tao
        if(isArchive || systemMetadata.getObsoletedBy() != null) {
463 7627 tao
            //archive(delete)
464
            Identifier obsolete = systemMetadata.getObsoletes();
465
            if(obsolete != null) {
466
                removeObsoletesChain(obsolete.getValue(), obsoleteIds);
467
            }
468
            remove(pid);
469 7683 tao
            log.info("============================= archive the idex for the identifier "+pid);
470 7627 tao
        } else {
471
            Identifier obsolete = systemMetadata.getObsoletes();
472
            if(obsolete != null) {
473
                removeObsoletesChain(obsolete.getValue(), obsoleteIds);
474
            }
475
            //generate index for either add or update.
476
            insert(pid, systemMetadata, data);
477 7683 tao
            log.info("============================= insert index for the identifier "+pid);
478 7577 tao
        }
479
    }
480 7603 tao
481 7627 tao
482 8023 tao
    private void removeObsoletesChain(String obsoleteId, List<String> obsoleteIdChain) throws SolrServerException, IOException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
483 7627 tao
        if(obsoleteId != null && !obsoleteId.trim().equals("")) {
484
            if(obsoleteIdChain == null || obsoleteIdChain.isEmpty()) {
485
                throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsoletes chain can't be null or empty since the system metadata already has the obsoletes element.");
486
            }
487
            if(!obsoleteIdChain.contains(obsoleteId)) {
488
                throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsoletes elment in the system metadata is not in the obsoleteId chain");
489
            }
490
            remove(obsoleteIdChain);
491
        } else {
492
            throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsolete id should be null.");
493
        }
494
    }
495
496 7603 tao
    /**
497
     * Remove all the indexes associated with the pids in the list.
498
     * @param pidList
499
     * @throws IOException
500
     * @throws SolrServerException
501 7786 tao
     * @throws ParserConfigurationException
502
     * @throws SAXException
503
     * @throws UnsupportedType
504
     * @throws NotFound
505
     * @throws NotImplemented
506
     * @throws XPathExpressionException
507
     * @throws ServiceFailure
508 8023 tao
     * @throws OREParserException
509 7603 tao
     */
510 8023 tao
    private void remove(List<String> pidList) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
511 7603 tao
        if(pidList != null) {
512
            for(String id : pidList) {
513
                remove(id);
514
            }
515
        }
516
    }
517 7547 tao
518
    /**
519
     * Remove the indexed associated with specified pid.
520
     * @param pid  the pid which the indexes are associated with
521
     * @throws IOException
522
     * @throws SolrServerException
523 7786 tao
     * @throws ParserConfigurationException
524
     * @throws SAXException
525
     * @throws UnsupportedType
526
     * @throws NotFound
527
     * @throws NotImplemented
528
     * @throws XPathExpressionException
529
     * @throws ServiceFailure
530 8023 tao
     * @throws OREParserException
531 7547 tao
     */
532 8023 tao
    public void remove(String pid) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
533 7786 tao
        if (isDataPackage(pid)) {
534
            removeDataPackage(pid);
535
        } else if (isPartOfDataPackage(pid)) {
536
            removeFromDataPackage(pid);
537
        } else {
538
            removeFromIndex(pid);
539
        }
540
    }
541
542
543
544
545
    /*
546
     * Remove a resource map pid
547
     */
548 8023 tao
    private void removeDataPackage(String pid) throws ServiceFailure, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, OREParserException  {
549 7786 tao
        Document resourceMapDoc = generateXmlDocument(DistributedMapsFactory.getDataObject(pid));
550 8023 tao
        //ResourceMap resourceMap = new ResourceMap(resourceMapDoc);
551
        ResourceMap resourceMap = ResourceMapFactory.buildResourceMap(resourceMapDoc);
552 7786 tao
        List<String> documentIds = resourceMap.getAllDocumentIDs();
553
        List<SolrDoc> indexDocuments =ResourceMapSubprocessor.getSolrDocs(documentIds);
554
        removeFromIndex(pid);
555
        //List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
556
        // for each document in data package:
557
        for (SolrDoc indexDoc : indexDocuments) {
558
559
            if (indexDoc.getIdentifier().equals(pid)) {
560
                continue; // skipping the resource map, no need update
561
                          // it.
562
                          // will
563
                          // be removed.
564
            }
565
566
            // Remove resourceMap reference
567
            indexDoc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
568
                    resourceMap.getIdentifier());
569
570
            // // Remove documents/documentedby values for this resource
571
            // map
572
            for (ResourceEntry entry : resourceMap.getMappedReferences()) {
573
                if (indexDoc.getIdentifier().equals(entry.getIdentifier())) {
574
                    for (String documentedBy : entry.getDocumentedBy()) {
575
                        // Using removeOneFieldWithValue in-case same
576
                        // documents
577
                        // are in more than one data package. just
578
                        // remove
579
                        // one
580
                        // instance of data package info.
581
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY,
582
                                documentedBy);
583
                    }
584
                    for (String documents : entry.getDocuments()) {
585
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_DOCUMENTS,
586
                                documents);
587
                    }
588
                    break;
589
                }
590
            }
591
            removeFromIndex(indexDoc.getIdentifier());
592
            insertToIndex(indexDoc);
593
            //docsToUpdate.add(indexDoc);
594
        }
595
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
596
        //httpService.sendUpdate(solrIndexUri, addCommand);
597
    }
598
599
    private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException  {
600
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
601
        removeFromIndex(pid);
602
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
603
604
        List<String> documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
605
        for (String documentsValue : documents) {
606
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentsValue);
607
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY, pid);
608
            removeFromIndex(documentsValue);
609
            insertToIndex(solrDoc);
610
        }
611
612
        List<String> documentedBy = indexedDoc
613
                .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
614
        for (String documentedByValue : documentedBy) {
615
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentedByValue);
616
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_DOCUMENTS, documentedByValue);
617
            //docsToUpdate.add(solrDoc);
618
            removeFromIndex(documentedByValue);
619
            insertToIndex(solrDoc);
620
        }
621
622
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
623
        //httpService.sendUpdate(solrIndexUri, addCommand);
624
    }
625
626
    /*
627
     * Remove a pid from the solr index
628
     */
629
    private void removeFromIndex(String pid) throws SolrServerException, IOException {
630 7627 tao
        if(pid != null && !pid.trim().equals("")) {
631 7856 tao
            /*IndexEvent event = new IndexEvent();
632 7800 tao
            event.setDate(Calendar.getInstance().getTime());
633
            Identifier identifier = new Identifier();
634
            identifier.setValue(pid);
635 7856 tao
            event.setIdentifier(identifier);*/
636 7800 tao
            try {
637 7801 tao
                solrServer.deleteById(pid);
638
                solrServer.commit();
639 7805 tao
                /*event.setType(IndexEvent.SUCCESSDELETE);
640 7801 tao
                event.setDescription("Successfully remove the solr index for the id "+identifier.getValue());
641
                try {
642
                    EventlogFactory.createIndexEventLog().write(event);
643
                } catch (Exception e) {
644 7802 tao
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+e.getMessage());
645 7805 tao
                }*/
646 7801 tao
            } catch (SolrServerException e) {
647 7856 tao
                /*event.setAction(Event.DELETE);
648 7801 tao
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
649
                try {
650
                    EventlogFactory.createIndexEventLog().write(event);
651
                } catch (Exception ee) {
652 7802 tao
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
653 7856 tao
                }*/
654 7801 tao
                throw e;
655
656
            } catch (IOException e) {
657 7856 tao
                /*event.setAction(Event.DELETE);
658 7801 tao
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
659
                try {
660
                    EventlogFactory.createIndexEventLog().write(event);
661
                } catch (Exception ee) {
662 7802 tao
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
663 7856 tao
                }*/
664 7801 tao
                throw e;
665 7800 tao
            }
666 7801 tao
667 7627 tao
        }
668 7547 tao
    }
669 7569 tao
670 7786 tao
    /*
671
     * Is the pid a resource map
672
     */
673
    private boolean isDataPackage(String pid) throws FileNotFoundException, ServiceFailure {
674
        boolean isDataPackage = false;
675
        SystemMetadata sysmeta = DistributedMapsFactory.getSystemMetadata(pid);
676
        if(sysmeta != null) {
677
            isDataPackage = IndexGenerator.isResourceMap(sysmeta.getFormatId());
678
        }
679
        return isDataPackage;
680
    }
681
682
    private boolean isPartOfDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
683
        SolrDoc dataPackageIndexDoc = ResourceMapSubprocessor.getSolrDoc(pid);
684
        if (dataPackageIndexDoc != null) {
685
            String resourceMapId = dataPackageIndexDoc
686
                    .getFirstFieldValue(SolrElementField.FIELD_RESOURCEMAP);
687
            return StringUtils.isNotEmpty(resourceMapId);
688
        } else {
689
            return false;
690
        }
691
    }
692
693 7569 tao
    /**
694
     * Get the solrServer
695
     * @return
696
     */
697 7604 tao
    public SolrServer getSolrServer() {
698 7569 tao
        return solrServer;
699
    }
700
701
    /**
702 7604 tao
     * Set the solrServer.
703 7569 tao
     * @param solrServer
704
     */
705 7604 tao
    public void setSolrServer(SolrServer solrServer) {
706 7569 tao
        this.solrServer = solrServer;
707
    }
708 7604 tao
709
    /**
710 7606 tao
     * Get all indexed ids in the solr server.
711
     * @return an empty list if there is no index.
712 7604 tao
     * @throws SolrServerException
713
     */
714
    public List<String> getSolrIds() throws SolrServerException {
715
        List<String> list = new ArrayList<String>();
716
        SolrQuery query = new SolrQuery(IDQUERY);
717
        query.setRows(Integer.MAX_VALUE);
718
        query.setFields(ID);
719
        QueryResponse response = solrServer.query(query);
720
        SolrDocumentList docs = response.getResults();
721
        if(docs != null) {
722
            for(SolrDocument doc :docs) {
723
                String identifier = (String)doc.getFieldValue(ID);
724
                //System.out.println("======================== "+identifier);
725
                list.add(identifier);
726
            }
727
        }
728
        return list;
729
    }
730 7542 tao
}