Project

General

Profile

1 7542 tao
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28
29 7555 tao
import java.io.ByteArrayInputStream;
30 7786 tao
import java.io.FileNotFoundException;
31 7546 tao
import java.io.IOException;
32
import java.io.InputStream;
33
import java.util.ArrayList;
34 7800 tao
import java.util.Calendar;
35 7546 tao
import java.util.HashMap;
36
import java.util.Iterator;
37 7542 tao
import java.util.List;
38 7546 tao
import java.util.Map;
39
import java.util.Set;
40 7542 tao
41
import javax.xml.parsers.DocumentBuilder;
42
import javax.xml.parsers.DocumentBuilderFactory;
43 7546 tao
import javax.xml.parsers.ParserConfigurationException;
44 7542 tao
import javax.xml.xpath.XPath;
45 7546 tao
import javax.xml.xpath.XPathExpressionException;
46 7542 tao
import javax.xml.xpath.XPathFactory;
47
48 7546 tao
import org.apache.commons.codec.EncoderException;
49
import org.apache.commons.io.output.ByteArrayOutputStream;
50 7786 tao
import org.apache.commons.lang.StringUtils;
51 7546 tao
import org.apache.commons.logging.Log;
52
import org.apache.commons.logging.LogFactory;
53 7604 tao
import org.apache.solr.client.solrj.SolrQuery;
54 7542 tao
import org.apache.solr.client.solrj.SolrServer;
55 7546 tao
import org.apache.solr.client.solrj.SolrServerException;
56 7604 tao
import org.apache.solr.client.solrj.response.QueryResponse;
57 7547 tao
import org.apache.solr.client.solrj.response.UpdateResponse;
58 7604 tao
import org.apache.solr.common.SolrDocument;
59
import org.apache.solr.common.SolrDocumentList;
60 7546 tao
import org.apache.solr.common.SolrInputDocument;
61 7604 tao
import org.apache.solr.common.util.NamedList;
62 7786 tao
63 7542 tao
import org.dataone.cn.indexer.XMLNamespaceConfig;
64
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
65 7546 tao
import org.dataone.cn.indexer.parser.SolrField;
66 7786 tao
import org.dataone.cn.indexer.resourcemap.ResourceEntry;
67
import org.dataone.cn.indexer.resourcemap.ResourceMap;
68 7546 tao
import org.dataone.cn.indexer.solrhttp.SolrDoc;
69 7786 tao
import org.dataone.cn.indexer.solrhttp.SolrElementAdd;
70 7546 tao
import org.dataone.cn.indexer.solrhttp.SolrElementField;
71 7733 tao
import org.dataone.service.exceptions.NotFound;
72
import org.dataone.service.exceptions.NotImplemented;
73 7786 tao
import org.dataone.service.exceptions.ServiceFailure;
74 7733 tao
import org.dataone.service.exceptions.UnsupportedType;
75 7577 tao
import org.dataone.service.types.v1.Identifier;
76 7555 tao
import org.dataone.service.types.v1.SystemMetadata;
77
import org.dataone.service.util.TypeMarshaller;
78
import org.jibx.runtime.JiBXException;
79 7546 tao
import org.w3c.dom.Document;
80 7604 tao
import org.w3c.dom.NameList;
81 7546 tao
import org.xml.sax.SAXException;
82 7542 tao
83 7800 tao
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
84
import edu.ucsb.nceas.metacat.index.event.IndexEvent;
85 7711 tao
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
86
87 7542 tao
/**
88
 * A class does insert, update and remove indexes to a SOLR server
89
 * @author tao
90
 *
91
 */
92
public class SolrIndex {
93 7591 leinfelder
94 7604 tao
    public static final String ID = "id";
95
    private static final String IDQUERY = ID+":*";
96 7542 tao
    private List<IDocumentSubprocessor> subprocessors = null;
97
    private SolrServer solrServer = null;
98
    private XMLNamespaceConfig xmlNamespaceConfig = null;
99 7546 tao
    private List<SolrField> sysmetaSolrFields = null;
100 7542 tao
101
    private static DocumentBuilderFactory documentBuilderFactory = null;
102
    private static DocumentBuilder builder = null;
103
104
    private static XPathFactory xpathFactory = null;
105
    private static XPath xpath = null;
106 7546 tao
    Log log = LogFactory.getLog(SolrIndex.class);
107 7542 tao
108 7546 tao
    static {
109
        documentBuilderFactory = DocumentBuilderFactory.newInstance();
110
        documentBuilderFactory.setNamespaceAware(true);
111
        try {
112
            builder = documentBuilderFactory.newDocumentBuilder();
113
        } catch (ParserConfigurationException e) {
114
            e.printStackTrace();
115
        }
116
        xpathFactory = XPathFactory.newInstance();
117
        xpath = xpathFactory.newXPath();
118
    }
119
120 7542 tao
    /**
121
     * Constructor
122 7548 tao
     * @throws SAXException
123
     * @throws IOException
124 7542 tao
     */
125 7547 tao
    public SolrIndex(List<SolrField> sysmetaSolrFields, XMLNamespaceConfig xmlNamespaceConfig)
126 7548 tao
                    throws XPathExpressionException, ParserConfigurationException, IOException, SAXException {
127
         this.xmlNamespaceConfig = xmlNamespaceConfig;
128
         this.sysmetaSolrFields = sysmetaSolrFields;
129
         init();
130 7542 tao
    }
131
132 7546 tao
    private void init() throws ParserConfigurationException, XPathExpressionException {
133
        xpath.setNamespaceContext(xmlNamespaceConfig);
134
        initExpressions();
135
    }
136
137
    private void initExpressions() throws XPathExpressionException {
138
        for (SolrField field : sysmetaSolrFields) {
139
            field.initExpression(xpath);
140
        }
141
142
    }
143 7542 tao
144 7546 tao
145 7542 tao
    /**
146
     * Get the list of the Subprocessors in this index.
147
     * @return the list of the Subprocessors.
148
     */
149
    public List<IDocumentSubprocessor> getSubprocessors() {
150
        return subprocessors;
151
    }
152
153
    /**
154
     * Set the list of Subprocessors.
155
     * @param subprocessorList  the list will be set.
156
     */
157
    public void setSubprocessors(List<IDocumentSubprocessor> subprocessorList) {
158 7546 tao
        for (IDocumentSubprocessor subprocessor : subprocessorList) {
159 7542 tao
            subprocessor.initExpression(xpath);
160 7546 tao
        }
161 7542 tao
        this.subprocessors = subprocessorList;
162
    }
163 7546 tao
164
    /**
165
     * Generate the index for the given information
166
     * @param id
167 7555 tao
     * @param systemMetadata
168 7546 tao
     * @param dataStream
169
     * @return
170
     * @throws IOException
171
     * @throws SAXException
172
     * @throws ParserConfigurationException
173
     * @throws XPathExpressionException
174 7555 tao
     * @throws JiBXException
175 7711 tao
     * @throws SolrServerException
176 7546 tao
     * @throws EncoderException
177 7733 tao
     * @throws UnsupportedType
178
     * @throws NotFound
179
     * @throws NotImplemented
180 7546 tao
     */
181 7555 tao
    private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
182 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
183 7733 tao
                    XPathExpressionException, JiBXException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{
184 7546 tao
185
        // Load the System Metadata document
186 7555 tao
        ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
187
        TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
188
        ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
189
        Document sysMetaDoc = generateXmlDocument(systemMetadataStream);
190 7546 tao
        if (sysMetaDoc == null) {
191
            log.error("Could not load System metadata for ID: " + id);
192
            return null;
193
        }
194
195
        // Extract the field values from the System Metadata
196
        List<SolrElementField> sysSolrFields = processSysmetaFields(sysMetaDoc, id);
197
        SolrDoc indexDocument = new SolrDoc(sysSolrFields);
198
        Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
199
        docs.put(id, indexDocument);
200
201
        // Determine if subprocessors are available for this ID
202
        if (subprocessors != null) {
203
                    // for each subprocessor loaded from the spring config
204
                    for (IDocumentSubprocessor subprocessor : subprocessors) {
205
                        // Does this subprocessor apply?
206
                        if (subprocessor.canProcess(sysMetaDoc)) {
207
                            // if so, then extract the additional information from the
208
                            // document.
209
                            try {
210
                                // docObject = the resource map document or science
211
                                // metadata document.
212
                                // note that resource map processing touches all objects
213
                                // referenced by the resource map.
214
                                Document docObject = generateXmlDocument(dataStream);
215
                                if (docObject == null) {
216
                                    log.error("Could not load OBJECT for ID " + id );
217
                                } else {
218
                                    docs = subprocessor.processDocument(id, docs, docObject);
219
                                }
220
                            } catch (Exception e) {
221
                                log.error(e.getStackTrace().toString());
222
                            }
223
                        }
224
                    }
225
       }
226
227
       // TODO: in the XPathDocumentParser class in d1_cn_index_process module,
228
       // merge is only for resource map. We need more work here.
229
       for (SolrDoc mergeDoc : docs.values()) {
230
           if (!mergeDoc.isMerged()) {
231 7711 tao
                 mergeWithIndexedDocument(mergeDoc);
232 7546 tao
           }
233
       }
234
235
       //SolrElementAdd addCommand = getAddCommand(new ArrayList<SolrDoc>(docs.values()));
236
237
       return docs;
238
    }
239
240 7711 tao
    /**
241
     * Merge updates with existing solr documents
242
     *
243
     * This method appears to re-set the data package field data into the
244
     * document about to be updated in the solr index. Since packaging
245
     * information is derived from the package document (resource map), this
246
     * information is not present when processing a document contained in a data
247
     * package. This method replaces those values from the existing solr index
248
     * record for the document being processed. -- sroseboo, 1-18-12
249
     *
250
     * @param indexDocument
251
     * @return
252
     * @throws IOException
253
     * @throws EncoderException
254
     * @throws XPathExpressionException
255
     * @throws SAXException
256
     * @throws ParserConfigurationException
257
     * @throws SolrServerException
258 7733 tao
     * @throws UnsupportedType
259
     * @throws NotFound
260
     * @throws NotImplemented
261 7711 tao
     */
262
    // TODO:combine merge function with resourcemap merge function
263
264
    private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException,
265 7733 tao
            EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType {
266 7711 tao
        List<String> ids = new ArrayList<String>();
267
        ids.add(indexDocument.getIdentifier());
268
        List<SolrDoc> indexedDocuments = ResourceMapSubprocessor.getSolrDocs(ids);
269
        SolrDoc indexedDocument = indexedDocuments == null || indexedDocuments.size() <= 0 ? null
270
                : indexedDocuments.get(0);
271
        if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) {
272
            return indexDocument;
273
        } else {
274
            for (SolrElementField field : indexedDocument.getFieldList()) {
275
                if ((field.getName().equals(SolrElementField.FIELD_ISDOCUMENTEDBY)
276
                        || field.getName().equals(SolrElementField.FIELD_DOCUMENTS) || field
277
                        .getName().equals(SolrElementField.FIELD_RESOURCEMAP))
278
                        && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) {
279
                    indexDocument.addField(field);
280
                }
281
            }
282
283
            indexDocument.setMerged(true);
284
            return indexDocument;
285
        }
286
    }
287
288 7546 tao
    /*
289
     * Generate a Document from the InputStream
290
     */
291
    private Document generateXmlDocument(InputStream smdStream) throws SAXException {
292
        Document doc = null;
293
294
        try {
295
            doc = builder.parse(smdStream);
296
        } catch (IOException e) {
297
            log.error(e.getMessage(), e);
298
        }
299
300
        return doc;
301
    }
302
303
    /*
304
     * Index the fields of the system metadata
305
     */
306
    private List<SolrElementField> processSysmetaFields(Document doc, String identifier) {
307
308
        List<SolrElementField> fieldList = new ArrayList<SolrElementField>();
309
        // solrFields is the list of fields defined in the application context
310
311
        for (SolrField field : sysmetaSolrFields) {
312
            try {
313
                // the field.getFields method can return a single value or
314
                // multiple values for multi-valued fields
315
                // or can return multiple SOLR document fields.
316
                fieldList.addAll(field.getFields(doc, identifier));
317
            } catch (Exception e) {
318
                e.printStackTrace();
319
            }
320
        }
321
        return fieldList;
322
323
    }
324
325
    /**
326 7577 tao
     * Check the parameters of the insert or update methods.
327
     * @param pid
328
     * @param systemMetadata
329
     * @param data
330
     * @throws SolrServerException
331
     */
332
    private void checkParams(String pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException {
333
        if(pid == null || pid.trim().equals("")) {
334
            throw new SolrServerException("The identifier of the indexed document should not be null or blank.");
335
        }
336
        if(systemMetadata == null) {
337
            throw new SolrServerException("The system metadata of the indexed document should not be null.");
338
        }
339
        if(data == null) {
340
            throw new SolrServerException("The indexed document itself should not be null.");
341
        }
342
    }
343
344
    /**
345 7627 tao
     * Insert the indexes for a document.
346 7546 tao
     * @param pid  the id of this document
347
     * @param systemMetadata  the system metadata associated with the data object
348
     * @param data  the data object itself
349
     * @throws SolrServerException
350 7555 tao
     * @throws JiBXException
351 7711 tao
     * @throws EncoderException
352 7733 tao
     * @throws UnsupportedType
353
     * @throws NotFound
354
     * @throws NotImplemented
355 7546 tao
     */
356 7682 tao
    private synchronized void insert(String pid, SystemMetadata systemMetadata, InputStream data)
357 7546 tao
                    throws IOException, SAXException, ParserConfigurationException,
358 7733 tao
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType {
359 7577 tao
        checkParams(pid, systemMetadata, data);
360 7546 tao
        Map<String, SolrDoc> docs = process(pid, systemMetadata, data);
361
362
        //transform the Map to the SolrInputDocument which can be used by the solr server
363
        if(docs != null) {
364
            Set<String> ids = docs.keySet();
365
            for(String id : ids) {
366
                if(id != null) {
367
                    SolrDoc doc = docs.get(id);
368 7786 tao
                    insertToIndex(doc);
369
                }
370
371
            }
372
        }
373
    }
374
375
    /*
376
     * Insert a SolrDoc to the solr server.
377
     */
378
    private synchronized void insertToIndex(SolrDoc doc) throws SolrServerException, IOException {
379
        if(doc != null ) {
380
            SolrInputDocument solrDoc = new SolrInputDocument();
381
            List<SolrElementField> list = doc.getFieldList();
382
            if(list != null) {
383
                //solrDoc.addField(METACATPIDFIELD, pid);
384
                Iterator<SolrElementField> iterator = list.iterator();
385
                while (iterator.hasNext()) {
386
                    SolrElementField field = iterator.next();
387
                    if(field != null) {
388
                        String value = field.getValue();
389
                        String name = field.getName();
390
                        //System.out.println("add name/value pair - "+name+"/"+value);
391
                        solrDoc.addField(name, value);
392 7546 tao
                    }
393
                }
394
            }
395 7786 tao
            if(!solrDoc.isEmpty()) {
396 7800 tao
                IndexEvent event = new IndexEvent();
397
                event.setDate(Calendar.getInstance().getTime());
398
                Identifier pid = new Identifier();
399
                pid.setValue(doc.getIdentifier());
400
                event.setPid(pid);
401
                try {
402 7801 tao
                    UpdateResponse response = solrServer.add(solrDoc);
403
                    solrServer.commit();
404
                    event.setType(IndexEvent.SUCCESSINSERT);
405
                    event.setDescription("Successfully insert the solr index for the id "+pid.getValue());
406
                    try {
407
                        EventlogFactory.createIndexEventLog().write(event);
408
                    } catch (Exception e) {
409
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't insert the solr doc to the solr server :"+e.getMessage());
410
                    }
411
                } catch (SolrServerException e) {
412
                    event.setType(IndexEvent.FAILUREINSERT);
413
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
414
                    try {
415
                        EventlogFactory.createIndexEventLog().write(event);
416
                    } catch (Exception ee) {
417
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't insert the solr doc to the solr server :"+ee.getMessage());
418
                    }
419
                    throw e;
420
                } catch (IOException e) {
421
                    event.setType(IndexEvent.FAILUREINSERT);
422
                    event.setDescription("Failed to insert the solr index for the id "+pid.getValue()+" since "+e.getMessage());
423
                    try {
424
                        EventlogFactory.createIndexEventLog().write(event);
425
                    } catch (Exception ee) {
426
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't insert the solr doc to the solr server :"+ee.getMessage());
427
                    }
428
                    throw e;
429
430 7800 tao
                }
431 7786 tao
                //System.out.println("=================the response is:\n"+response.toString());
432
            }
433 7546 tao
        }
434
    }
435 7577 tao
436
    /**
437 7627 tao
     * Update the solr index. This method handles the three scenarios:
438
     * 1. Archive (or delete) - if the the system metadata shows the value of the archive is true,
439
     *    remove the index for the document and its previous versions if it has.
440
     * 2. Update an existing doc - if the the system metadata shows the value of the archive is false and it has an obsoletes,
441
     *    remove the index for the previous version(s) and generate new index for the doc.
442
     * 3. Add a new doc - if the system metadata shows the value of the archive is false and it hasn't an obsoletes, generate the
443
     *    index for the doc.
444
     * @param pid  the id of the document
445 7603 tao
     * @param obsoleteIds  the chain of the obsoletes by this id
446 7577 tao
     * @param systemMetadata  the system metadata associated with the data object
447
     * @param data  the data object itself
448
     * @throws SolrServerException
449
     * @throws JiBXException
450 7711 tao
     * @throws EncoderException
451 7733 tao
     * @throws UnsupportedType
452
     * @throws NotFound
453
     * @throws NotImplemented
454 7786 tao
     * @throws ServiceFailure
455 7577 tao
     */
456 7627 tao
    public void update(String pid, List<String> obsoleteIds, SystemMetadata systemMetadata, InputStream data)
457 7577 tao
                    throws IOException, SAXException, ParserConfigurationException,
458 7786 tao
                    XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType, ServiceFailure {
459 7627 tao
        checkParams(pid, systemMetadata, data);
460
        boolean isArchive = systemMetadata.getArchived();
461
        if(isArchive) {
462
            //archive(delete)
463
            Identifier obsolete = systemMetadata.getObsoletes();
464
            if(obsolete != null) {
465
                removeObsoletesChain(obsolete.getValue(), obsoleteIds);
466
            }
467
            remove(pid);
468 7683 tao
            log.info("============================= archive the idex for the identifier "+pid);
469 7627 tao
        } else {
470
            Identifier obsolete = systemMetadata.getObsoletes();
471
            if(obsolete != null) {
472
                removeObsoletesChain(obsolete.getValue(), obsoleteIds);
473
            }
474
            //generate index for either add or update.
475
            insert(pid, systemMetadata, data);
476 7683 tao
            log.info("============================= insert index for the identifier "+pid);
477 7577 tao
        }
478
    }
479 7603 tao
480 7627 tao
481 7786 tao
    private void removeObsoletesChain(String obsoleteId, List<String> obsoleteIdChain) throws SolrServerException, IOException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException {
482 7627 tao
        if(obsoleteId != null && !obsoleteId.trim().equals("")) {
483
            if(obsoleteIdChain == null || obsoleteIdChain.isEmpty()) {
484
                throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsoletes chain can't be null or empty since the system metadata already has the obsoletes element.");
485
            }
486
            if(!obsoleteIdChain.contains(obsoleteId)) {
487
                throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsoletes elment in the system metadata is not in the obsoleteId chain");
488
            }
489
            remove(obsoleteIdChain);
490
        } else {
491
            throw new SolrServerException("SolrIndex.removeObsoletesChain - The obsolete id should be null.");
492
        }
493
    }
494
495 7603 tao
    /**
496
     * Remove all the indexes associated with the pids in the list.
497
     * @param pidList
498
     * @throws IOException
499
     * @throws SolrServerException
500 7786 tao
     * @throws ParserConfigurationException
501
     * @throws SAXException
502
     * @throws UnsupportedType
503
     * @throws NotFound
504
     * @throws NotImplemented
505
     * @throws XPathExpressionException
506
     * @throws ServiceFailure
507 7603 tao
     */
508 7786 tao
    private void remove(List<String> pidList) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException {
509 7603 tao
        if(pidList != null) {
510
            for(String id : pidList) {
511
                remove(id);
512
            }
513
        }
514
    }
515 7547 tao
516
    /**
517
     * Remove the indexed associated with specified pid.
518
     * @param pid  the pid which the indexes are associated with
519
     * @throws IOException
520
     * @throws SolrServerException
521 7786 tao
     * @throws ParserConfigurationException
522
     * @throws SAXException
523
     * @throws UnsupportedType
524
     * @throws NotFound
525
     * @throws NotImplemented
526
     * @throws XPathExpressionException
527
     * @throws ServiceFailure
528 7547 tao
     */
529 7786 tao
    public void remove(String pid) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException {
530
        if (isDataPackage(pid)) {
531
            removeDataPackage(pid);
532
        } else if (isPartOfDataPackage(pid)) {
533
            removeFromDataPackage(pid);
534
        } else {
535
            removeFromIndex(pid);
536
        }
537
    }
538
539
540
541
542
    /*
543
     * Remove a resource map pid
544
     */
545
    private void removeDataPackage(String pid) throws ServiceFailure, SAXException, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException  {
546
        Document resourceMapDoc = generateXmlDocument(DistributedMapsFactory.getDataObject(pid));
547
        ResourceMap resourceMap = new ResourceMap(resourceMapDoc);
548
        List<String> documentIds = resourceMap.getAllDocumentIDs();
549
        List<SolrDoc> indexDocuments =ResourceMapSubprocessor.getSolrDocs(documentIds);
550
        removeFromIndex(pid);
551
        //List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
552
        // for each document in data package:
553
        for (SolrDoc indexDoc : indexDocuments) {
554
555
            if (indexDoc.getIdentifier().equals(pid)) {
556
                continue; // skipping the resource map, no need update
557
                          // it.
558
                          // will
559
                          // be removed.
560
            }
561
562
            // Remove resourceMap reference
563
            indexDoc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP,
564
                    resourceMap.getIdentifier());
565
566
            // // Remove documents/documentedby values for this resource
567
            // map
568
            for (ResourceEntry entry : resourceMap.getMappedReferences()) {
569
                if (indexDoc.getIdentifier().equals(entry.getIdentifier())) {
570
                    for (String documentedBy : entry.getDocumentedBy()) {
571
                        // Using removeOneFieldWithValue in-case same
572
                        // documents
573
                        // are in more than one data package. just
574
                        // remove
575
                        // one
576
                        // instance of data package info.
577
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY,
578
                                documentedBy);
579
                    }
580
                    for (String documents : entry.getDocuments()) {
581
                        indexDoc.removeOneFieldWithValue(SolrElementField.FIELD_DOCUMENTS,
582
                                documents);
583
                    }
584
                    break;
585
                }
586
            }
587
            removeFromIndex(indexDoc.getIdentifier());
588
            insertToIndex(indexDoc);
589
            //docsToUpdate.add(indexDoc);
590
        }
591
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
592
        //httpService.sendUpdate(solrIndexUri, addCommand);
593
    }
594
595
    private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException  {
596
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
597
        removeFromIndex(pid);
598
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
599
600
        List<String> documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS);
601
        for (String documentsValue : documents) {
602
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentsValue);
603
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_ISDOCUMENTEDBY, pid);
604
            removeFromIndex(documentsValue);
605
            insertToIndex(solrDoc);
606
        }
607
608
        List<String> documentedBy = indexedDoc
609
                .getAllFieldValues(SolrElementField.FIELD_ISDOCUMENTEDBY);
610
        for (String documentedByValue : documentedBy) {
611
            SolrDoc solrDoc = ResourceMapSubprocessor.getSolrDoc(documentedByValue);
612
            solrDoc.removeFieldsWithValue(SolrElementField.FIELD_DOCUMENTS, documentedByValue);
613
            //docsToUpdate.add(solrDoc);
614
            removeFromIndex(documentedByValue);
615
            insertToIndex(solrDoc);
616
        }
617
618
        //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate);
619
        //httpService.sendUpdate(solrIndexUri, addCommand);
620
    }
621
622
    /*
623
     * Remove a pid from the solr index
624
     */
625
    private void removeFromIndex(String pid) throws SolrServerException, IOException {
626 7627 tao
        if(pid != null && !pid.trim().equals("")) {
627 7800 tao
            IndexEvent event = new IndexEvent();
628
            event.setDate(Calendar.getInstance().getTime());
629
            Identifier identifier = new Identifier();
630
            identifier.setValue(pid);
631
            event.setPid(identifier);
632
            try {
633 7801 tao
                solrServer.deleteById(pid);
634
                solrServer.commit();
635
                event.setType(IndexEvent.SUCCESSDELETE);
636
                event.setDescription("Successfully remove the solr index for the id "+identifier.getValue());
637
                try {
638
                    EventlogFactory.createIndexEventLog().write(event);
639
                } catch (Exception e) {
640
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't insert the solr doc to the solr server :"+e.getMessage());
641
                }
642
            } catch (SolrServerException e) {
643
                event.setType(IndexEvent.FAILUREDELETE);
644
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
645
                try {
646
                    EventlogFactory.createIndexEventLog().write(event);
647
                } catch (Exception ee) {
648
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't insert the solr doc to the solr server :"+ee.getMessage());
649
                }
650
                throw e;
651
652
            } catch (IOException e) {
653
                event.setType(IndexEvent.FAILUREDELETE);
654
                event.setDescription("Failurely remove the solr index for the id "+identifier.getValue()+" since "+e.getMessage());
655
                try {
656
                    EventlogFactory.createIndexEventLog().write(event);
657
                } catch (Exception ee) {
658
                    log.error("SolrIndex.removeFromIndex - IndexEventLog can't insert the solr doc to the solr server :"+ee.getMessage());
659
                }
660
                throw e;
661 7800 tao
            }
662 7801 tao
663 7627 tao
        }
664 7547 tao
    }
665 7569 tao
666 7786 tao
    /*
667
     * Is the pid a resource map
668
     */
669
    private boolean isDataPackage(String pid) throws FileNotFoundException, ServiceFailure {
670
        boolean isDataPackage = false;
671
        SystemMetadata sysmeta = DistributedMapsFactory.getSystemMetadata(pid);
672
        if(sysmeta != null) {
673
            isDataPackage = IndexGenerator.isResourceMap(sysmeta.getFormatId());
674
        }
675
        return isDataPackage;
676
    }
677
678
    private boolean isPartOfDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
679
        SolrDoc dataPackageIndexDoc = ResourceMapSubprocessor.getSolrDoc(pid);
680
        if (dataPackageIndexDoc != null) {
681
            String resourceMapId = dataPackageIndexDoc
682
                    .getFirstFieldValue(SolrElementField.FIELD_RESOURCEMAP);
683
            return StringUtils.isNotEmpty(resourceMapId);
684
        } else {
685
            return false;
686
        }
687
    }
688
689 7569 tao
    /**
690
     * Get the solrServer
691
     * @return
692
     */
693 7604 tao
    public SolrServer getSolrServer() {
694 7569 tao
        return solrServer;
695
    }
696
697
    /**
698 7604 tao
     * Set the solrServer.
699 7569 tao
     * @param solrServer
700
     */
701 7604 tao
    public void setSolrServer(SolrServer solrServer) {
702 7569 tao
        this.solrServer = solrServer;
703
    }
704 7604 tao
705
    /**
706 7606 tao
     * Get all indexed ids in the solr server.
707
     * @return an empty list if there is no index.
708 7604 tao
     * @throws SolrServerException
709
     */
710
    public List<String> getSolrIds() throws SolrServerException {
711
        List<String> list = new ArrayList<String>();
712
        SolrQuery query = new SolrQuery(IDQUERY);
713
        query.setRows(Integer.MAX_VALUE);
714
        query.setFields(ID);
715
        QueryResponse response = solrServer.query(query);
716
        SolrDocumentList docs = response.getResults();
717
        if(docs != null) {
718
            for(SolrDocument doc :docs) {
719
                String identifier = (String)doc.getFieldValue(ID);
720
                //System.out.println("======================== "+identifier);
721
                list.add(identifier);
722
            }
723
        }
724
        return list;
725
    }
726 7542 tao
}