Project

General

Profile

« Previous | Next » 

Revision 9060

use new IDocumentDeleteSubprocessors to handle clean-up of annotation index fields when annotations are removed.

View differences:

metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/SolrIndex.java
59 59
import org.dataone.cn.indexer.XMLNamespaceConfig;
60 60
import org.dataone.cn.indexer.convert.SolrDateConverter;
61 61
import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor;
62
import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor;
62 63
import org.dataone.cn.indexer.parser.IDocumentSubprocessor;
63 64
import org.dataone.cn.indexer.parser.SolrField;
64 65
import org.dataone.cn.indexer.solrhttp.SolrDoc;
......
92 93
    public static final String ID = "id";
93 94
    private static final String IDQUERY = ID+":*";
94 95
    private List<IDocumentSubprocessor> subprocessors = null;
96
    private List<IDocumentDeleteSubprocessor> deleteSubprocessors = null;
97

  
95 98
    private SolrServer solrServer = null;
96 99
    private XMLNamespaceConfig xmlNamespaceConfig = null;
97 100
    private List<SolrField> sysmetaSolrFields = null;
......
161 164
        this.subprocessors = subprocessorList;
162 165
    }
163 166
    
164
    /**
167
    public List<IDocumentDeleteSubprocessor> getDeleteSubprocessors() {
168
		return deleteSubprocessors;
169
	}
170

  
171
	public void setDeleteSubprocessors(
172
			List<IDocumentDeleteSubprocessor> deleteSubprocessors) {
173
		this.deleteSubprocessors = deleteSubprocessors;
174
	}
175

  
176
	/**
165 177
     * Generate the index for the given information
166 178
     * @param id
167 179
     * @param systemMetadata
......
561 573
     * @throws JiBXException
562 574
     * @throws EncoderException
563 575
     */
564
    void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException, 
565
                                ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, 
566
                                IOException, SAXException, ParserConfigurationException, OREParserException, JiBXException, EncoderException {
576
    void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws Exception {
567 577
        //checkParams(pid, systemMetadata, objectPath);
568 578
        if(systemMetadata==null || pid==null) {
569 579
            log.error("SolrIndex.update - the systemMetadata or pid is null. So nothing will be indexed.");
......
619 629
     * @throws ServiceFailure 
620 630
     * @throws OREParserException 
621 631
     */
622
    private void remove(String pid, SystemMetadata sysmeta) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
632
    private void remove(String pid, SystemMetadata sysmeta) throws Exception {
623 633
        if (isDataPackage(pid, sysmeta)) {
624 634
            removeDataPackage(pid);
625 635
        } else if (isPartOfDataPackage(pid)) {
......
633 643
     * Remove the resource map from the solr index. It doesn't only remove the index for itself and also
634 644
     * remove the relationship for the related metadata and data objects.
635 645
     */
636
    private void removeDataPackage(String pid) throws  XPathExpressionException, IOException, 
637
            SolrServerException, UnsupportedType, NotFound, ParserConfigurationException, SAXException {
646
    private void removeDataPackage(String pid) throws Exception {
638 647
        removeFromIndex(pid);
639 648
        List<SolrDoc> docsToUpdate = getUpdatedSolrDocsByRemovingResourceMap(pid);
640 649
        if (docsToUpdate != null && !docsToUpdate.isEmpty()) {
......
896 905
    /*
897 906
     * Remove a pid which is part of resource map.
898 907
     */
899
    private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException  {
908
    private void removeFromDataPackage(String pid) throws Exception  {
900 909
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid);
901 910
        removeFromIndex(pid);
902 911
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
......
926 935
    /*
927 936
     * Remove a pid from the solr index
928 937
     */
929
    private synchronized void removeFromIndex(String pid) throws SolrServerException, IOException {
930
        if(pid != null && !pid.trim().equals("")) {
938
    private synchronized void removeFromIndex(String identifier) throws Exception {
939
    	
940
    	
941
    	Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
942

  
943
        for (IDocumentDeleteSubprocessor deleteSubprocessor : deleteSubprocessors) {
944
            docs.putAll(deleteSubprocessor.processDocForDelete(identifier, docs));
945
        }
946
        List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>();
947
        List<String> idsToIndex = new ArrayList<String>();
948
        for (String idToUpdate : docs.keySet()) {
949
            if (docs.get(idToUpdate) != null) {
950
                docsToUpdate.add(docs.get(idToUpdate));
951
            } else {
952
                idsToIndex.add(idToUpdate);
953
            }
954
        }
955

  
956
        // update the docs we have
957
        for (SolrDoc docToUpdate : docsToUpdate) {
958
        	insertToIndex(docToUpdate);
959
        }
960
        
961
        // delete this one
962
        deleteDocFromIndex(identifier);
963

  
964
        // index the rest
965
        for (String idToIndex : idsToIndex) {
966
        	Identifier pid = new Identifier();
967
        	pid.setValue(idToIndex);
968
            SystemMetadata sysMeta = DistributedMapsFactory.getSystemMetadata(idToIndex);
969
            if (SolrDoc.visibleInIndex(sysMeta)) {
970
                String objectPath = DistributedMapsFactory.getObjectPathMap().get(pid);
971
                insert(pid, sysMeta, objectPath);
972
            }
973
        }
974
    		
975
    }
976
    
977
    private void deleteDocFromIndex(String pid) throws Exception {
978
    	if (pid != null && !pid.trim().equals("")) {
931 979
            /*IndexEvent event = new IndexEvent();
932 980
            event.setDate(Calendar.getInstance().getTime());
933 981
            Identifier identifier = new Identifier();
......
965 1013
            }
966 1014
            
967 1015
        }
1016
    
968 1017
    }
969 1018

  
970 1019
    /**
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/MetacatDocumentDeleteSubprocessor.java
1
/**
2
 * This work was created by participants in the DataONE project, and is
3
 * jointly copyrighted by participating institutions in DataONE. For 
4
 * more information on DataONE, see our web site at http://dataone.org.
5
 *
6
 *   Copyright ${year}
7
 *
8
 * Licensed under the Apache License, Version 2.0 (the "License");
9
 * you may not use this file except in compliance with the License.
10
 * You may obtain a copy of the License at
11
 *
12
 *   http://www.apache.org/licenses/LICENSE-2.0
13
 *
14
 * Unless required by applicable law or agreed to in writing, software
15
 * distributed under the License is distributed on an "AS IS" BASIS,
16
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
 * See the License for the specific language governing permissions and 
18
 * limitations under the License.
19
 * 
20
 * $Id$
21
 */
22

  
23
package edu.ucsb.nceas.metacat.index;
24
import java.io.IOException;
25
import java.util.ArrayList;
26
import java.util.HashSet;
27
import java.util.List;
28
import java.util.Map;
29
import java.util.Set;
30

  
31
import javax.xml.xpath.XPathExpressionException;
32

  
33
import org.apache.commons.codec.EncoderException;
34
import org.apache.commons.lang.StringUtils;
35
import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor;
36
import org.dataone.cn.indexer.solrhttp.SolrDoc;
37
import org.dataone.cn.indexer.solrhttp.SolrElementField;
38

  
39
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor;
40

  
41

  
42
public class MetacatDocumentDeleteSubprocessor implements IDocumentDeleteSubprocessor {
43

  
44

  
45
    private String relationSourceFormatId;
46
    private String relationSourceField;
47
    private List<String> biDirectionalRelationFields;
48
    private List<String> uniDirectionalRelationFields;
49

  
50
    public MetacatDocumentDeleteSubprocessor() {
51
    }
52

  
53
    public Map<String, SolrDoc> processDocForDelete(String identifier, Map<String, SolrDoc> docs)
54
            throws Exception {
55

  
56
        SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(identifier);
57
        if (indexedDoc != null) {
58
            if (hasRelationsBySource(indexedDoc)) {
59
                docs.putAll(removeBiDirectionalRelationsForDoc(identifier, indexedDoc, docs));
60
            }
61
            if (isRelationshipSource(indexedDoc)) {
62
                docs.putAll(removeRelationsBySourceDoc(identifier, indexedDoc, docs));
63
            }
64
        }
65
        return docs;
66
    }
67

  
68
    private Map<String, SolrDoc> removeRelationsBySourceDoc(String relationSourceId,
69
            SolrDoc indexedDoc, Map<String, SolrDoc> docs) throws Exception {
70

  
71
        // gather all docs with relations from self source
72
        List<SolrDoc> relatedDocs = ResourceMapSubprocessor.getDocumentsByQuery(
73
                "q=" + relationSourceField + ":\"" + relationSourceId + "\"");
74

  
75
        Set<String> otherSourceDocs = new HashSet<String>();
76

  
77
        for (SolrDoc relatedDoc : relatedDocs) {
78

  
79
            // gather other relation source docs from modified list
80
            otherSourceDocs.addAll(relatedDoc.getAllFieldValues(relationSourceField));
81

  
82
            // remove relation fields (uni and bi-directional)
83
            // add modified docs to update list
84
            String docId = relatedDoc.getFirstFieldValue(SolrElementField.FIELD_ID);
85
            if (docs.get(docId) != null) {
86
                relatedDoc = docs.get(docId);
87
            }
88
            relatedDoc.removeAllFields(relationSourceField);
89
            for (String relationField : getBiDirectionalRelationFields()) {
90
                relatedDoc.removeAllFields(relationField);
91
            }
92
            for (String relationField : getUniDirectionalRelationFields()) {
93
                relatedDoc.removeAllFields(relationField);
94
            }
95
            docs.put(docId, relatedDoc);
96
        }
97

  
98
        for (String otherRelatedDoc : otherSourceDocs) {
99
            if (!otherRelatedDoc.equals(relationSourceId)) {
100
                docs.put(otherRelatedDoc, null);
101
            }
102
        }
103
        return docs;
104
    }
105

  
106
    private boolean isRelationshipSource(SolrDoc indexedDoc) throws Exception {
107
        String formatId = indexedDoc.getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT);
108
        return relationSourceFormatId.equals(formatId);
109
    }
110

  
111
    private boolean hasRelationsBySource(SolrDoc indexedDoc) throws XPathExpressionException,
112
            IOException, EncoderException {
113
        String relationSourceId = indexedDoc.getFirstFieldValue(relationSourceField);
114
        return StringUtils.isNotEmpty(relationSourceId);
115
    }
116

  
117
    private Map<String, SolrDoc> removeBiDirectionalRelationsForDoc(String identifier,
118
            SolrDoc indexedDoc, Map<String, SolrDoc> docs) throws Exception {
119

  
120
        for (String relationField : getBiDirectionalRelationFields()) {
121
            List<SolrDoc> inverseDocs = ResourceMapSubprocessor.getDocumentsByQuery(
122
                    "q=" + relationField + ":\"" + identifier + "\"");
123
            for (SolrDoc inverseDoc : inverseDocs) {
124
                String inverseDocId = inverseDoc.getFirstFieldValue(SolrElementField.FIELD_ID);
125
                if (docs.get(inverseDocId) != null) {
126
                    inverseDoc = docs.get(inverseDocId);
127
                }
128
                inverseDoc.removeFieldsWithValue(relationField, identifier);
129
                docs.put(inverseDocId, inverseDoc);
130
            }
131

  
132
        }
133
        return docs;
134
    }
135

  
136
    private List<String> getBiDirectionalRelationFields() {
137
        if (biDirectionalRelationFields == null) {
138
            biDirectionalRelationFields = new ArrayList<String>();
139
        }
140
        return biDirectionalRelationFields;
141
    }
142

  
143
    private List<String> getUniDirectionalRelationFields() {
144
        if (uniDirectionalRelationFields == null) {
145
            uniDirectionalRelationFields = new ArrayList<String>();
146
        }
147
        return uniDirectionalRelationFields;
148
    }
149

  
150
    public String getRelationSourceFormatId() {
151
        return relationSourceFormatId;
152
    }
153

  
154
    public void setRelationSourceFormatId(String relationSourceFormatId) {
155
        this.relationSourceFormatId = relationSourceFormatId;
156
    }
157

  
158
    public String getRelationSourceField() {
159
        return relationSourceField;
160
    }
161

  
162
    public void setRelationSourceField(String relationSourceField) {
163
        this.relationSourceField = relationSourceField;
164
    }
165

  
166
    public void setBiDirectionalRelationFields(List<String> biDirectionalRelationFields) {
167
        this.biDirectionalRelationFields = biDirectionalRelationFields;
168
    }
169

  
170
    public void setUniDirectionalRelationFields(List<String> uniDirectionalRelationFields) {
171
        this.uniDirectionalRelationFields = uniDirectionalRelationFields;
172
    }
173
}
0 174

  
metacat-index/src/main/resources/index-processor-context.xml
30 30
     
31 31
    <import resource="application-context-oa.xml" />
32 32
    <import resource="application-context-annotator.xml" />    
33
    
34
    <import resource="application-context-delete-subprocessors.xml" />    
35
    
33 36

  
34 37
 <bean id="dateConverter" class="org.dataone.cn.indexer.convert.SolrDateConverter" />
35 38
 <bean id="fgdcDateConverter" class="org.dataone.cn.indexer.convert.FgdcDateConverter"/>
......
102 105
       
103 106
      </list>
104 107
     </property>
108
     <property name="deleteSubprocessors">
109
      <list>
110
       <ref bean="annotationDeleteSubprocessor" />
111
      </list>
112
     </property>
105 113
    </bean>
106 114
   </list>
107 115
  </constructor-arg>

Also available in: Unified diff