Revision 9060
Added by ben leinfelder about 10 years ago
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/SolrIndex.java | ||
---|---|---|
59 | 59 |
import org.dataone.cn.indexer.XMLNamespaceConfig; |
60 | 60 |
import org.dataone.cn.indexer.convert.SolrDateConverter; |
61 | 61 |
import org.dataone.cn.indexer.parser.BaseXPathDocumentSubprocessor; |
62 |
import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor; |
|
62 | 63 |
import org.dataone.cn.indexer.parser.IDocumentSubprocessor; |
63 | 64 |
import org.dataone.cn.indexer.parser.SolrField; |
64 | 65 |
import org.dataone.cn.indexer.solrhttp.SolrDoc; |
... | ... | |
92 | 93 |
public static final String ID = "id"; |
93 | 94 |
private static final String IDQUERY = ID+":*"; |
94 | 95 |
private List<IDocumentSubprocessor> subprocessors = null; |
96 |
private List<IDocumentDeleteSubprocessor> deleteSubprocessors = null; |
|
97 |
|
|
95 | 98 |
private SolrServer solrServer = null; |
96 | 99 |
private XMLNamespaceConfig xmlNamespaceConfig = null; |
97 | 100 |
private List<SolrField> sysmetaSolrFields = null; |
... | ... | |
161 | 164 |
this.subprocessors = subprocessorList; |
162 | 165 |
} |
163 | 166 |
|
164 |
/** |
|
167 |
public List<IDocumentDeleteSubprocessor> getDeleteSubprocessors() { |
|
168 |
return deleteSubprocessors; |
|
169 |
} |
|
170 |
|
|
171 |
public void setDeleteSubprocessors( |
|
172 |
List<IDocumentDeleteSubprocessor> deleteSubprocessors) { |
|
173 |
this.deleteSubprocessors = deleteSubprocessors; |
|
174 |
} |
|
175 |
|
|
176 |
/** |
|
165 | 177 |
* Generate the index for the given information |
166 | 178 |
* @param id |
167 | 179 |
* @param systemMetadata |
... | ... | |
561 | 573 |
* @throws JiBXException |
562 | 574 |
* @throws EncoderException |
563 | 575 |
*/ |
564 |
void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException, |
|
565 |
ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, |
|
566 |
IOException, SAXException, ParserConfigurationException, OREParserException, JiBXException, EncoderException { |
|
576 |
void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws Exception { |
|
567 | 577 |
//checkParams(pid, systemMetadata, objectPath); |
568 | 578 |
if(systemMetadata==null || pid==null) { |
569 | 579 |
log.error("SolrIndex.update - the systemMetadata or pid is null. So nothing will be indexed."); |
... | ... | |
619 | 629 |
* @throws ServiceFailure |
620 | 630 |
* @throws OREParserException |
621 | 631 |
*/ |
622 |
private void remove(String pid, SystemMetadata sysmeta) throws IOException, SolrServerException, ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SAXException, ParserConfigurationException, OREParserException {
|
|
632 |
private void remove(String pid, SystemMetadata sysmeta) throws Exception { |
|
623 | 633 |
if (isDataPackage(pid, sysmeta)) { |
624 | 634 |
removeDataPackage(pid); |
625 | 635 |
} else if (isPartOfDataPackage(pid)) { |
... | ... | |
633 | 643 |
* Remove the resource map from the solr index. It doesn't only remove the index for itself and also |
634 | 644 |
* remove the relationship for the related metadata and data objects. |
635 | 645 |
*/ |
636 |
private void removeDataPackage(String pid) throws XPathExpressionException, IOException, |
|
637 |
SolrServerException, UnsupportedType, NotFound, ParserConfigurationException, SAXException { |
|
646 |
private void removeDataPackage(String pid) throws Exception { |
|
638 | 647 |
removeFromIndex(pid); |
639 | 648 |
List<SolrDoc> docsToUpdate = getUpdatedSolrDocsByRemovingResourceMap(pid); |
640 | 649 |
if (docsToUpdate != null && !docsToUpdate.isEmpty()) { |
... | ... | |
896 | 905 |
/* |
897 | 906 |
* Remove a pid which is part of resource map. |
898 | 907 |
*/ |
899 |
private void removeFromDataPackage(String pid) throws XPathExpressionException, NotImplemented, NotFound, UnsupportedType, SolrServerException, IOException, ParserConfigurationException, SAXException {
|
|
908 |
private void removeFromDataPackage(String pid) throws Exception { |
|
900 | 909 |
SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(pid); |
901 | 910 |
removeFromIndex(pid); |
902 | 911 |
List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>(); |
... | ... | |
926 | 935 |
/* |
927 | 936 |
* Remove a pid from the solr index |
928 | 937 |
*/ |
929 |
private synchronized void removeFromIndex(String pid) throws SolrServerException, IOException { |
|
930 |
if(pid != null && !pid.trim().equals("")) { |
|
938 |
private synchronized void removeFromIndex(String identifier) throws Exception { |
|
939 |
|
|
940 |
|
|
941 |
Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>(); |
|
942 |
|
|
943 |
for (IDocumentDeleteSubprocessor deleteSubprocessor : deleteSubprocessors) { |
|
944 |
docs.putAll(deleteSubprocessor.processDocForDelete(identifier, docs)); |
|
945 |
} |
|
946 |
List<SolrDoc> docsToUpdate = new ArrayList<SolrDoc>(); |
|
947 |
List<String> idsToIndex = new ArrayList<String>(); |
|
948 |
for (String idToUpdate : docs.keySet()) { |
|
949 |
if (docs.get(idToUpdate) != null) { |
|
950 |
docsToUpdate.add(docs.get(idToUpdate)); |
|
951 |
} else { |
|
952 |
idsToIndex.add(idToUpdate); |
|
953 |
} |
|
954 |
} |
|
955 |
|
|
956 |
// update the docs we have |
|
957 |
for (SolrDoc docToUpdate : docsToUpdate) { |
|
958 |
insertToIndex(docToUpdate); |
|
959 |
} |
|
960 |
|
|
961 |
// delete this one |
|
962 |
deleteDocFromIndex(identifier); |
|
963 |
|
|
964 |
// index the rest |
|
965 |
for (String idToIndex : idsToIndex) { |
|
966 |
Identifier pid = new Identifier(); |
|
967 |
pid.setValue(idToIndex); |
|
968 |
SystemMetadata sysMeta = DistributedMapsFactory.getSystemMetadata(idToIndex); |
|
969 |
if (SolrDoc.visibleInIndex(sysMeta)) { |
|
970 |
String objectPath = DistributedMapsFactory.getObjectPathMap().get(pid); |
|
971 |
insert(pid, sysMeta, objectPath); |
|
972 |
} |
|
973 |
} |
|
974 |
|
|
975 |
} |
|
976 |
|
|
977 |
private void deleteDocFromIndex(String pid) throws Exception { |
|
978 |
if (pid != null && !pid.trim().equals("")) { |
|
931 | 979 |
/*IndexEvent event = new IndexEvent(); |
932 | 980 |
event.setDate(Calendar.getInstance().getTime()); |
933 | 981 |
Identifier identifier = new Identifier(); |
... | ... | |
965 | 1013 |
} |
966 | 1014 |
|
967 | 1015 |
} |
1016 |
|
|
968 | 1017 |
} |
969 | 1018 |
|
970 | 1019 |
/** |
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/MetacatDocumentDeleteSubprocessor.java | ||
---|---|---|
1 |
/** |
|
2 |
* This work was created by participants in the DataONE project, and is |
|
3 |
* jointly copyrighted by participating institutions in DataONE. For |
|
4 |
* more information on DataONE, see our web site at http://dataone.org. |
|
5 |
* |
|
6 |
* Copyright ${year} |
|
7 |
* |
|
8 |
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
9 |
* you may not use this file except in compliance with the License. |
|
10 |
* You may obtain a copy of the License at |
|
11 |
* |
|
12 |
* http://www.apache.org/licenses/LICENSE-2.0 |
|
13 |
* |
|
14 |
* Unless required by applicable law or agreed to in writing, software |
|
15 |
* distributed under the License is distributed on an "AS IS" BASIS, |
|
16 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
17 |
* See the License for the specific language governing permissions and |
|
18 |
* limitations under the License. |
|
19 |
* |
|
20 |
* $Id$ |
|
21 |
*/ |
|
22 |
|
|
23 |
package edu.ucsb.nceas.metacat.index; |
|
24 |
import java.io.IOException; |
|
25 |
import java.util.ArrayList; |
|
26 |
import java.util.HashSet; |
|
27 |
import java.util.List; |
|
28 |
import java.util.Map; |
|
29 |
import java.util.Set; |
|
30 |
|
|
31 |
import javax.xml.xpath.XPathExpressionException; |
|
32 |
|
|
33 |
import org.apache.commons.codec.EncoderException; |
|
34 |
import org.apache.commons.lang.StringUtils; |
|
35 |
import org.dataone.cn.indexer.parser.IDocumentDeleteSubprocessor; |
|
36 |
import org.dataone.cn.indexer.solrhttp.SolrDoc; |
|
37 |
import org.dataone.cn.indexer.solrhttp.SolrElementField; |
|
38 |
|
|
39 |
import edu.ucsb.nceas.metacat.index.resourcemap.ResourceMapSubprocessor; |
|
40 |
|
|
41 |
|
|
42 |
public class MetacatDocumentDeleteSubprocessor implements IDocumentDeleteSubprocessor { |
|
43 |
|
|
44 |
|
|
45 |
private String relationSourceFormatId; |
|
46 |
private String relationSourceField; |
|
47 |
private List<String> biDirectionalRelationFields; |
|
48 |
private List<String> uniDirectionalRelationFields; |
|
49 |
|
|
50 |
public MetacatDocumentDeleteSubprocessor() { |
|
51 |
} |
|
52 |
|
|
53 |
public Map<String, SolrDoc> processDocForDelete(String identifier, Map<String, SolrDoc> docs) |
|
54 |
throws Exception { |
|
55 |
|
|
56 |
SolrDoc indexedDoc = ResourceMapSubprocessor.getSolrDoc(identifier); |
|
57 |
if (indexedDoc != null) { |
|
58 |
if (hasRelationsBySource(indexedDoc)) { |
|
59 |
docs.putAll(removeBiDirectionalRelationsForDoc(identifier, indexedDoc, docs)); |
|
60 |
} |
|
61 |
if (isRelationshipSource(indexedDoc)) { |
|
62 |
docs.putAll(removeRelationsBySourceDoc(identifier, indexedDoc, docs)); |
|
63 |
} |
|
64 |
} |
|
65 |
return docs; |
|
66 |
} |
|
67 |
|
|
68 |
private Map<String, SolrDoc> removeRelationsBySourceDoc(String relationSourceId, |
|
69 |
SolrDoc indexedDoc, Map<String, SolrDoc> docs) throws Exception { |
|
70 |
|
|
71 |
// gather all docs with relations from self source |
|
72 |
List<SolrDoc> relatedDocs = ResourceMapSubprocessor.getDocumentsByQuery( |
|
73 |
"q=" + relationSourceField + ":\"" + relationSourceId + "\""); |
|
74 |
|
|
75 |
Set<String> otherSourceDocs = new HashSet<String>(); |
|
76 |
|
|
77 |
for (SolrDoc relatedDoc : relatedDocs) { |
|
78 |
|
|
79 |
// gather other relation source docs from modified list |
|
80 |
otherSourceDocs.addAll(relatedDoc.getAllFieldValues(relationSourceField)); |
|
81 |
|
|
82 |
// remove relation fields (uni and bi-directional) |
|
83 |
// add modified docs to update list |
|
84 |
String docId = relatedDoc.getFirstFieldValue(SolrElementField.FIELD_ID); |
|
85 |
if (docs.get(docId) != null) { |
|
86 |
relatedDoc = docs.get(docId); |
|
87 |
} |
|
88 |
relatedDoc.removeAllFields(relationSourceField); |
|
89 |
for (String relationField : getBiDirectionalRelationFields()) { |
|
90 |
relatedDoc.removeAllFields(relationField); |
|
91 |
} |
|
92 |
for (String relationField : getUniDirectionalRelationFields()) { |
|
93 |
relatedDoc.removeAllFields(relationField); |
|
94 |
} |
|
95 |
docs.put(docId, relatedDoc); |
|
96 |
} |
|
97 |
|
|
98 |
for (String otherRelatedDoc : otherSourceDocs) { |
|
99 |
if (!otherRelatedDoc.equals(relationSourceId)) { |
|
100 |
docs.put(otherRelatedDoc, null); |
|
101 |
} |
|
102 |
} |
|
103 |
return docs; |
|
104 |
} |
|
105 |
|
|
106 |
private boolean isRelationshipSource(SolrDoc indexedDoc) throws Exception { |
|
107 |
String formatId = indexedDoc.getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT); |
|
108 |
return relationSourceFormatId.equals(formatId); |
|
109 |
} |
|
110 |
|
|
111 |
private boolean hasRelationsBySource(SolrDoc indexedDoc) throws XPathExpressionException, |
|
112 |
IOException, EncoderException { |
|
113 |
String relationSourceId = indexedDoc.getFirstFieldValue(relationSourceField); |
|
114 |
return StringUtils.isNotEmpty(relationSourceId); |
|
115 |
} |
|
116 |
|
|
117 |
private Map<String, SolrDoc> removeBiDirectionalRelationsForDoc(String identifier, |
|
118 |
SolrDoc indexedDoc, Map<String, SolrDoc> docs) throws Exception { |
|
119 |
|
|
120 |
for (String relationField : getBiDirectionalRelationFields()) { |
|
121 |
List<SolrDoc> inverseDocs = ResourceMapSubprocessor.getDocumentsByQuery( |
|
122 |
"q=" + relationField + ":\"" + identifier + "\""); |
|
123 |
for (SolrDoc inverseDoc : inverseDocs) { |
|
124 |
String inverseDocId = inverseDoc.getFirstFieldValue(SolrElementField.FIELD_ID); |
|
125 |
if (docs.get(inverseDocId) != null) { |
|
126 |
inverseDoc = docs.get(inverseDocId); |
|
127 |
} |
|
128 |
inverseDoc.removeFieldsWithValue(relationField, identifier); |
|
129 |
docs.put(inverseDocId, inverseDoc); |
|
130 |
} |
|
131 |
|
|
132 |
} |
|
133 |
return docs; |
|
134 |
} |
|
135 |
|
|
136 |
private List<String> getBiDirectionalRelationFields() { |
|
137 |
if (biDirectionalRelationFields == null) { |
|
138 |
biDirectionalRelationFields = new ArrayList<String>(); |
|
139 |
} |
|
140 |
return biDirectionalRelationFields; |
|
141 |
} |
|
142 |
|
|
143 |
private List<String> getUniDirectionalRelationFields() { |
|
144 |
if (uniDirectionalRelationFields == null) { |
|
145 |
uniDirectionalRelationFields = new ArrayList<String>(); |
|
146 |
} |
|
147 |
return uniDirectionalRelationFields; |
|
148 |
} |
|
149 |
|
|
150 |
public String getRelationSourceFormatId() { |
|
151 |
return relationSourceFormatId; |
|
152 |
} |
|
153 |
|
|
154 |
public void setRelationSourceFormatId(String relationSourceFormatId) { |
|
155 |
this.relationSourceFormatId = relationSourceFormatId; |
|
156 |
} |
|
157 |
|
|
158 |
public String getRelationSourceField() { |
|
159 |
return relationSourceField; |
|
160 |
} |
|
161 |
|
|
162 |
public void setRelationSourceField(String relationSourceField) { |
|
163 |
this.relationSourceField = relationSourceField; |
|
164 |
} |
|
165 |
|
|
166 |
public void setBiDirectionalRelationFields(List<String> biDirectionalRelationFields) { |
|
167 |
this.biDirectionalRelationFields = biDirectionalRelationFields; |
|
168 |
} |
|
169 |
|
|
170 |
public void setUniDirectionalRelationFields(List<String> uniDirectionalRelationFields) { |
|
171 |
this.uniDirectionalRelationFields = uniDirectionalRelationFields; |
|
172 |
} |
|
173 |
} |
|
0 | 174 |
metacat-index/src/main/resources/index-processor-context.xml | ||
---|---|---|
30 | 30 |
|
31 | 31 |
<import resource="application-context-oa.xml" /> |
32 | 32 |
<import resource="application-context-annotator.xml" /> |
33 |
|
|
34 |
<import resource="application-context-delete-subprocessors.xml" /> |
|
35 |
|
|
33 | 36 |
|
34 | 37 |
<bean id="dateConverter" class="org.dataone.cn.indexer.convert.SolrDateConverter" /> |
35 | 38 |
<bean id="fgdcDateConverter" class="org.dataone.cn.indexer.convert.FgdcDateConverter"/> |
... | ... | |
102 | 105 |
|
103 | 106 |
</list> |
104 | 107 |
</property> |
108 |
<property name="deleteSubprocessors"> |
|
109 |
<list> |
|
110 |
<ref bean="annotationDeleteSubprocessor" /> |
|
111 |
</list> |
|
112 |
</property> |
|
105 | 113 |
</bean> |
106 | 114 |
</list> |
107 | 115 |
</constructor-arg> |
Also available in: Unified diff
use new IDocumentDeleteSubprocessors to handle clean-up of annotation index fields when annotations are removed.