Revision 7710
Added by Jing Tao over 11 years ago
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/resourcemap/ResourceMapSubprocessor.java | ||
---|---|---|
26 | 26 |
*/ |
27 | 27 |
package edu.ucsb.nceas.metacat.index.resourcemap; |
28 | 28 |
|
29 |
import java.io.ByteArrayInputStream; |
|
30 |
import java.io.ByteArrayOutputStream; |
|
29 | 31 |
import java.io.IOException; |
32 |
import java.io.InputStream; |
|
33 |
import java.io.StringWriter; |
|
34 |
import java.io.Writer; |
|
30 | 35 |
import java.util.ArrayList; |
36 |
import java.util.Collections; |
|
37 |
import java.util.Date; |
|
31 | 38 |
import java.util.HashMap; |
32 | 39 |
import java.util.List; |
33 | 40 |
import java.util.Map; |
34 | 41 |
import java.util.Set; |
35 | 42 |
|
43 |
import javax.xml.parsers.DocumentBuilder; |
|
44 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
36 | 45 |
import javax.xml.parsers.ParserConfigurationException; |
46 |
import javax.xml.xpath.XPathConstants; |
|
37 | 47 |
import javax.xml.xpath.XPathExpressionException; |
48 |
import javax.xml.xpath.XPathFactory; |
|
38 | 49 |
|
39 | 50 |
import org.apache.commons.codec.EncoderException; |
40 | 51 |
import org.apache.commons.logging.Log; |
... | ... | |
45 | 56 |
import org.apache.solr.common.SolrDocument; |
46 | 57 |
import org.apache.solr.common.SolrDocumentList; |
47 | 58 |
import org.apache.solr.common.params.SolrParams; |
59 |
import org.apache.solr.core.CoreContainer; |
|
60 |
import org.apache.solr.core.SolrCore; |
|
61 |
import org.apache.solr.request.LocalSolrQueryRequest; |
|
62 |
import org.apache.solr.response.QueryResponseWriter; |
|
63 |
import org.apache.solr.response.SolrQueryResponse; |
|
64 |
import org.apache.solr.schema.DateField; |
|
65 |
import org.apache.solr.schema.IndexSchema; |
|
66 |
import org.apache.solr.schema.SchemaField; |
|
48 | 67 |
import org.apache.solr.servlet.SolrRequestParsers; |
49 | 68 |
import org.dataone.cn.indexer.parser.AbstractDocumentSubprocessor; |
50 | 69 |
import org.dataone.cn.indexer.parser.IDocumentSubprocessor; |
51 | 70 |
import org.dataone.cn.indexer.resourcemap.ResourceMap; |
52 |
import org.dataone.cn.indexer.solrhttp.HTTPService; |
|
53 | 71 |
import org.dataone.cn.indexer.solrhttp.SolrDoc; |
54 | 72 |
import org.dataone.cn.indexer.solrhttp.SolrElementField; |
55 | 73 |
import org.w3c.dom.Document; |
74 |
import org.w3c.dom.Element; |
|
75 |
import org.w3c.dom.NodeList; |
|
56 | 76 |
import org.xml.sax.SAXException; |
57 | 77 |
|
78 |
import edu.ucsb.nceas.metacat.common.SolrQueryResponseTransformer; |
|
79 |
import edu.ucsb.nceas.metacat.common.SolrQueryResponseWriterFactory; |
|
58 | 80 |
import edu.ucsb.nceas.metacat.common.SolrServerFactory; |
59 | 81 |
import edu.ucsb.nceas.metacat.index.SolrIndex; |
60 | 82 |
|
83 |
|
|
61 | 84 |
/** |
62 | 85 |
* A solr index parser for the ResourceMap file. |
63 | 86 |
* The solr doc of the ResourceMap self only has the system metadata information. |
... | ... | |
68 | 91 |
private static final String QUERY ="q=id:"; |
69 | 92 |
private static Log log = LogFactory.getLog(SolrIndex.class); |
70 | 93 |
private static SolrServer solrServer = null; |
94 |
private static SolrCore solrCore = null; |
|
95 |
private static CoreContainer solrCoreContainer = null; |
|
71 | 96 |
static { |
72 | 97 |
try { |
73 | 98 |
solrServer = SolrServerFactory.createSolrServer(); |
99 |
CoreContainer solrCoreContainer = SolrServerFactory.getCoreContainer(); |
|
100 |
String coreName = SolrServerFactory.getCollectionName(); |
|
101 |
solrCore = solrCoreContainer.getCore(coreName); |
|
74 | 102 |
} catch (Exception e) { |
75 | 103 |
log.error("ResourceMapSubprocessor - can't generate the SolrServer since - "+e.getMessage()); |
76 | 104 |
} |
... | ... | |
96 | 124 |
//List<SolrDoc> updateDocuments = getHttpService().getDocuments(getSolrQueryUri(), documentIds); |
97 | 125 |
List<SolrDoc> updateDocuments = getSolrDocs(documentIds); |
98 | 126 |
List<SolrDoc> mergedDocuments = resourceMap.mergeIndexedDocuments(updateDocuments); |
127 |
/*if(mergedDocuments != null) { |
|
128 |
for(SolrDoc doc : mergedDocuments) { |
|
129 |
ByteArrayOutputStream out = new ByteArrayOutputStream(); |
|
130 |
doc.serialize(out, "UTF-8"); |
|
131 |
String result = new String(out.toByteArray(), "UTF-8"); |
|
132 |
System.out.println("after updated document==========================="); |
|
133 |
System.out.println(result); |
|
134 |
} |
|
135 |
}*/ |
|
99 | 136 |
mergedDocuments.add(indexDocument); |
100 | 137 |
return mergedDocuments; |
101 | 138 |
} |
... | ... | |
103 | 140 |
/* |
104 | 141 |
* Get the SolrDoc list for the list of the ids. |
105 | 142 |
*/ |
106 |
private List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException { |
|
143 |
private List<SolrDoc> getSolrDocs(List<String> ids) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException {
|
|
107 | 144 |
List<SolrDoc> list = new ArrayList<SolrDoc>(); |
108 | 145 |
if(ids != null) { |
109 | 146 |
for(String id : ids) { |
... | ... | |
119 | 156 |
/* |
120 | 157 |
* Get the SolrDoc for the specified id |
121 | 158 |
*/ |
122 |
private SolrDoc getSolrDoc(String id) throws SolrServerException { |
|
159 |
private SolrDoc getSolrDoc(String id) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException {
|
|
123 | 160 |
SolrDoc solrDoc = null; |
124 | 161 |
if(solrServer != null) { |
125 | 162 |
String query = QUERY+"\""+id+"\""; |
126 | 163 |
SolrParams solrParams = SolrRequestParsers.parseQueryString(query); |
127 | 164 |
QueryResponse response = solrServer.query(solrParams); |
128 |
solrDoc = transformQueryResponseToSolrDoc(response); |
|
165 |
solrDoc = transformQueryResponseToSolrDoc(solrParams, response); |
|
166 |
|
|
167 |
/*if(solrDoc != null) { |
|
168 |
ByteArrayOutputStream out = new ByteArrayOutputStream(); |
|
169 |
solrDoc.serialize(out, "UTF-8"); |
|
170 |
String result = new String(out.toByteArray(), "UTF-8"); |
|
171 |
System.out.println("need to be updated document ==========================="); |
|
172 |
System.out.println(result); |
|
173 |
}*/ |
|
174 |
|
|
129 | 175 |
} |
130 | 176 |
return solrDoc; |
131 | 177 |
} |
132 | 178 |
|
133 |
/**
|
|
179 |
/* |
|
134 | 180 |
* Transform a Solr QueryReponse to a SolrDoc. The QueryReponse contains a list of |
135 |
* SolrDocuments. This method will transform the first SolrDocuments (Solr lib) to |
|
136 |
* the SolrDoc (d1_cn_index_processor lib). |
|
181 |
* SolrDocuments. This method will transform the first SolrDocuments (in the Solr lib) to
|
|
182 |
* the SolrDoc (in the d1_cn_index_processor lib).
|
|
137 | 183 |
* @param reponse |
138 | 184 |
* @return |
139 | 185 |
*/ |
140 |
public static SolrDoc transformQueryResponseToSolrDoc(QueryResponse reponse) { |
|
141 |
SolrDoc solrDoc = new SolrDoc(); |
|
142 |
if(reponse != null) { |
|
143 |
SolrDocumentList list = reponse.getResults(); |
|
144 |
if(list != null && !list.isEmpty()) { |
|
145 |
SolrDocument document = list.get(0); |
|
146 |
if(document != null) { |
|
147 |
List<SolrElementField> elementFieldList = new ArrayList<SolrElementField>(); |
|
148 |
Set<String> keys = document.keySet(); |
|
149 |
for(String key :keys) { |
|
150 |
Object value = document.get(key); |
|
151 |
SolrElementField solrElement = new SolrElementField(key, value.toString()); |
|
152 |
elementFieldList.add(solrElement); |
|
153 |
} |
|
154 |
solrDoc.setFieldList(elementFieldList); |
|
155 |
} |
|
156 |
|
|
157 |
} |
|
186 |
private SolrDoc transformQueryResponseToSolrDoc(SolrParams solrParams, QueryResponse response) throws SolrServerException, IOException, ParserConfigurationException, SAXException, XPathExpressionException { |
|
187 |
SolrDoc solrDoc = null; |
|
188 |
if(response != null) { |
|
189 |
SolrQueryResponseTransformer transformer = new SolrQueryResponseTransformer(solrCore); |
|
190 |
InputStream input = transformer.transformResults(solrParams, response, SolrQueryResponseWriterFactory.XML); |
|
191 |
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); |
|
192 |
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); |
|
193 |
Document doc = dBuilder.parse(input); |
|
194 |
solrDoc = parseResults(doc); |
|
158 | 195 |
} |
159 | 196 |
return solrDoc; |
160 | 197 |
} |
198 |
|
|
199 |
|
|
200 |
|
|
201 |
/* |
|
202 |
* Parse the query result document. This method only choose the first one from a list. |
|
203 |
*/ |
|
204 |
private SolrDoc parseResults(Document document) throws XPathExpressionException { |
|
205 |
SolrDoc solrDoc = null; |
|
206 |
NodeList nodeList = (NodeList) XPathFactory.newInstance().newXPath() |
|
207 |
.evaluate("/response/result/doc", document, XPathConstants.NODESET); |
|
208 |
if(nodeList != null && nodeList.getLength() >0) { |
|
209 |
Element docElement = (Element) nodeList.item(0); |
|
210 |
solrDoc = parseDoc(docElement); |
|
211 |
} |
|
212 |
return solrDoc; |
|
213 |
} |
|
161 | 214 |
|
215 |
|
|
216 |
/* |
|
217 |
* Parse an element |
|
218 |
*/ |
|
219 |
private SolrDoc parseDoc(Element docElement) { |
|
220 |
List<String> validSolrFieldNames = getValidSchemaField(); |
|
221 |
SolrDoc doc = new SolrDoc(); |
|
222 |
doc.LoadFromElement(docElement, validSolrFieldNames); |
|
223 |
return doc; |
|
224 |
} |
|
225 |
|
|
226 |
|
|
227 |
/** |
|
228 |
* Get the valid schema fields from the solr server. |
|
229 |
* @return |
|
230 |
*/ |
|
231 |
private List<String> getValidSchemaField() { |
|
232 |
List<String> validSolrFieldNames = new ArrayList<String>(); |
|
233 |
IndexSchema schema = solrCore.getSchema(); |
|
234 |
Map<String, SchemaField> fieldMap = schema.getFields(); |
|
235 |
Set<String> fieldNames = fieldMap.keySet(); |
|
236 |
for(String fieldName : fieldNames) { |
|
237 |
SchemaField field = fieldMap.get(fieldName); |
|
238 |
//remove the field which is the target field of a CopyField. |
|
239 |
if(field != null && !schema.isCopyFieldTarget(field)) { |
|
240 |
validSolrFieldNames.add(fieldName); |
|
241 |
} |
|
242 |
} |
|
243 |
//System.out.println("the valid file name is\n"+validSolrFieldNames); |
|
244 |
return validSolrFieldNames; |
|
245 |
} |
|
246 |
|
|
162 | 247 |
} |
Also available in: Unified diff
Add code the transform query response to SolrDoc.