Revision 8766
Added by ben leinfelder over 10 years ago
metacat-index/src/test/java/edu/ucsb/nceas/metacat/index/SolrIndexIT.java | ||
---|---|---|
4 | 4 |
import static org.junit.Assert.assertTrue; |
5 | 5 |
|
6 | 6 |
import edu.ucsb.nceas.metacat.common.SolrServerFactory; |
7 |
import java.io.File; |
|
8 |
import java.io.FileInputStream; |
|
9 |
import java.io.InputStream; |
|
10 | 7 |
import java.io.StringWriter; |
11 | 8 |
import java.io.Writer; |
12 | 9 |
import java.util.ArrayList; |
... | ... | |
74 | 71 |
|
75 | 72 |
//InputStream systemInputStream = new FileInputStream(new File(SYSTEMMETAFILEPATH)); |
76 | 73 |
SystemMetadata systemMetadata = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, SYSTEMMETAFILEPATH); |
77 |
InputStream emlInputStream = new FileInputStream(new File(EMLFILEPATH)); |
|
78 | 74 |
//List<String> chain = null; |
79 | 75 |
Identifier pid = new Identifier(); |
80 | 76 |
pid.setValue(id); |
81 |
solrIndex.update(pid, systemMetadata, emlInputStream);
|
|
77 |
solrIndex.update(pid, systemMetadata, EMLFILEPATH);
|
|
82 | 78 |
String result = doQuery(solrIndex.getSolrServer()); |
83 | 79 |
List<String> ids = solrIndex.getSolrIds(); |
84 | 80 |
//assertTrue(ids.size() == 1); |
... | ... | |
100 | 96 |
public void testUpdate() throws Exception { |
101 | 97 |
//InputStream systemInputStream = new FileInputStream(new File(SYSTEMMETAFILEPATH)); |
102 | 98 |
SystemMetadata systemMetadata = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, SYSTEMMETAUPDATEFILEPATH); |
103 |
InputStream emlInputStream = new FileInputStream(new File(EMLUPDATEFILEPATH)); |
|
104 | 99 |
/*obsoletes.add(id); |
105 | 100 |
obsoletes.add("tao");*/ |
106 | 101 |
Identifier pid = new Identifier(); |
107 | 102 |
pid.setValue(newId); |
108 |
solrIndex.update(pid, systemMetadata, emlInputStream);
|
|
103 |
solrIndex.update(pid, systemMetadata, EMLFILEPATH);
|
|
109 | 104 |
String result = doQuery(solrIndex.getSolrServer()); |
110 | 105 |
assertTrue(result.contains("version1")); |
111 | 106 |
assertTrue(result.contains("version2")); |
... | ... | |
115 | 110 |
SystemMetadata obsoletedSystemMetadata = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, SYSTEMMETAFILEPATH); |
116 | 111 |
assertTrue(obsoletedSystemMetadata.getIdentifier().getValue().equals(obsoletedPid.getValue())); |
117 | 112 |
obsoletedSystemMetadata.setObsoletedBy(pid); |
118 |
InputStream obsoletedEmlInputStream = new FileInputStream(new File(EMLFILEPATH)); |
|
119 |
solrIndex.update(obsoletedPid, obsoletedSystemMetadata, obsoletedEmlInputStream); |
|
113 |
solrIndex.update(obsoletedPid, obsoletedSystemMetadata, EMLFILEPATH); |
|
120 | 114 |
|
121 | 115 |
// old version should be marked as obsoleted and not returned |
122 | 116 |
result = doQuery(solrIndex.getSolrServer(), "&fq=-obsoletedBy:*"); |
... | ... | |
133 | 127 |
//InputStream systemInputStream = new FileInputStream(new File(SYSTEMMETAFILEPATH)); |
134 | 128 |
//System metadata's archive is true. |
135 | 129 |
SystemMetadata systemMetadata = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, SYSTEMMETAARCHIVEFILEPATH); |
136 |
InputStream emlInputStream = new FileInputStream(new File(EMLUPDATEFILEPATH)); |
|
137 | 130 |
/*ArrayList<String> obsoletes = new ArrayList<String>(); |
138 | 131 |
obsoletes.add(id); |
139 | 132 |
obsoletes.add("tao");*/ |
140 | 133 |
Identifier pid = new Identifier(); |
141 | 134 |
pid.setValue(newId); |
142 |
solrIndex.update(pid, systemMetadata, emlInputStream);
|
|
135 |
solrIndex.update(pid, systemMetadata, EMLUPDATEFILEPATH);
|
|
143 | 136 |
String result = doQuery(solrIndex.getSolrServer()); |
144 | 137 |
assertTrue(result.contains("version1")); |
145 | 138 |
assertTrue(!result.contains("version2")); |
... | ... | |
153 | 146 |
public void testDynamicFields() throws Exception { |
154 | 147 |
|
155 | 148 |
SystemMetadata systemMetadata = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, SYSTEMMETAFILEPATH); |
156 |
InputStream emlInputStream = new FileInputStream(new File(EMLFILEPATH)); |
|
157 | 149 |
Identifier pid = new Identifier(); |
158 | 150 |
pid.setValue(id); |
159 |
solrIndex.update(pid, systemMetadata, emlInputStream);
|
|
151 |
solrIndex.update(pid, systemMetadata, EMLFILEPATH);
|
|
160 | 152 |
String result = doQuery(solrIndex.getSolrServer()); |
161 | 153 |
List<String> ids = solrIndex.getSolrIds(); |
162 | 154 |
boolean foundId = false; |
... | ... | |
197 | 189 |
public void testOpenAnnotation() throws Exception { |
198 | 190 |
|
199 | 191 |
SystemMetadata systemMetadata = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, SYSTEMMETAFILEPATH); |
200 |
InputStream emlInputStream = new FileInputStream(new File(EMLFILEPATH)); |
|
201 | 192 |
Identifier pid = new Identifier(); |
202 | 193 |
pid.setValue(id); |
203 |
solrIndex.update(pid, systemMetadata, emlInputStream);
|
|
194 |
solrIndex.update(pid, systemMetadata, EMLFILEPATH);
|
|
204 | 195 |
String result = doQuery(solrIndex.getSolrServer()); |
205 | 196 |
List<String> ids = solrIndex.getSolrIds(); |
206 | 197 |
boolean foundId = false; |
... | ... | |
214 | 205 |
|
215 | 206 |
// augment with the dynamic field |
216 | 207 |
SystemMetadata annotationSystemMetadata = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, ANNOTATION_SYSTEM_META_FILE_PATH); |
217 |
InputStream annotationInputStream = new FileInputStream(new File(OA_FILE_PATH)); |
|
218 | 208 |
Identifier annotationPid = new Identifier(); |
219 | 209 |
annotationPid.setValue(annotation_id); |
220 |
solrIndex.update(annotationPid, annotationSystemMetadata, annotationInputStream);
|
|
210 |
solrIndex.update(annotationPid, annotationSystemMetadata, OA_FILE_PATH);
|
|
221 | 211 |
String annotationResult = doQuery(solrIndex.getSolrServer(), "&fq=standard_sm:\"http://ecoinformatics.org/oboe/oboe.1.0/oboe-standards.owl#Gram\""); |
222 | 212 |
assertTrue(annotationResult.contains(pid.getValue())); |
223 | 213 |
assertTrue(annotationResult.contains("http://ecoinformatics.org/oboe/oboe.1.0/oboe-standards.owl#Gram")); |
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/SolrIndex.java | ||
---|---|---|
23 | 23 |
import java.io.FileNotFoundException; |
24 | 24 |
import java.io.IOException; |
25 | 25 |
import java.io.InputStream; |
26 |
import java.net.MalformedURLException; |
|
27 | 26 |
import java.util.ArrayList; |
28 | 27 |
import java.util.Calendar; |
29 | 28 |
import java.util.Date; |
... | ... | |
176 | 175 |
* @throws NotFound |
177 | 176 |
* @throws NotImplemented |
178 | 177 |
*/ |
179 |
private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, InputStream dataStream)
|
|
178 |
private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata, String objectPath)
|
|
180 | 179 |
throws IOException, SAXException, ParserConfigurationException, |
181 | 180 |
XPathExpressionException, JiBXException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{ |
182 | 181 |
|
... | ... | |
209 | 208 |
// metadata document. |
210 | 209 |
// note that resource map processing touches all objects |
211 | 210 |
// referenced by the resource map. |
211 |
InputStream dataStream = new FileInputStream(objectPath); |
|
212 | 212 |
Document docObject = generateXmlDocument(dataStream); |
213 | 213 |
if (docObject == null) { |
214 | 214 |
throw new Exception("Could not load OBJECT for ID " + id ); |
... | ... | |
333 | 333 |
* @param data |
334 | 334 |
* @throws SolrServerException |
335 | 335 |
*/ |
336 |
private void checkParams(Identifier pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException {
|
|
336 |
private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException {
|
|
337 | 337 |
if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) { |
338 | 338 |
throw new SolrServerException("The identifier of the indexed document should not be null or blank."); |
339 | 339 |
} |
340 | 340 |
if(systemMetadata == null) { |
341 | 341 |
throw new SolrServerException("The system metadata of the indexed document "+pid.getValue()+ " should not be null."); |
342 | 342 |
} |
343 |
if(data == null) {
|
|
343 |
if(objectPath == null) {
|
|
344 | 344 |
throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null."); |
345 | 345 |
} |
346 | 346 |
} |
... | ... | |
349 | 349 |
* Insert the indexes for a document. |
350 | 350 |
* @param pid the id of this document |
351 | 351 |
* @param systemMetadata the system metadata associated with the data object |
352 |
* @param data the data object itself
|
|
352 |
* @param data the path to the object file itself
|
|
353 | 353 |
* @throws SolrServerException |
354 | 354 |
* @throws JiBXException |
355 | 355 |
* @throws EncoderException |
... | ... | |
357 | 357 |
* @throws NotFound |
358 | 358 |
* @throws NotImplemented |
359 | 359 |
*/ |
360 |
private synchronized void insert(Identifier pid, SystemMetadata systemMetadata, InputStream data)
|
|
360 |
private synchronized void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath)
|
|
361 | 361 |
throws IOException, SAXException, ParserConfigurationException, |
362 | 362 |
XPathExpressionException, SolrServerException, JiBXException, EncoderException, NotImplemented, NotFound, UnsupportedType { |
363 |
checkParams(pid, systemMetadata, data);
|
|
364 |
Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, data);
|
|
363 |
checkParams(pid, systemMetadata, objectPath);
|
|
364 |
Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, objectPath);
|
|
365 | 365 |
|
366 | 366 |
//transform the Map to the SolrInputDocument which can be used by the solr server |
367 | 367 |
if(docs != null) { |
... | ... | |
514 | 514 |
*/ |
515 | 515 |
public void update(Identifier pid, SystemMetadata systemMetadata) { |
516 | 516 |
String objectPath = null; |
517 |
InputStream data = null; |
|
518 | 517 |
try { |
519 | 518 |
objectPath = DistributedMapsFactory.getObjectPathMap().get(pid); |
520 |
data = new FileInputStream(objectPath); |
|
521 |
update(pid, systemMetadata, data); |
|
519 |
update(pid, systemMetadata, objectPath); |
|
522 | 520 |
EventlogFactory.createIndexEventLog().remove(pid); |
523 | 521 |
} catch (Exception e) { |
524 | 522 |
String error = "SolrIndex.update - could not update the solr index since " + e.getMessage(); |
... | ... | |
550 | 548 |
* @throws JiBXException |
551 | 549 |
* @throws EncoderException |
552 | 550 |
*/ |
553 |
void update(Identifier pid, SystemMetadata systemMetadata, InputStream data) throws SolrServerException,
|
|
551 |
void update(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws SolrServerException,
|
|
554 | 552 |
ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, |
555 | 553 |
IOException, SAXException, ParserConfigurationException, OREParserException, JiBXException, EncoderException { |
556 |
checkParams(pid, systemMetadata, data);
|
|
554 |
checkParams(pid, systemMetadata, objectPath);
|
|
557 | 555 |
boolean isArchive = systemMetadata.getArchived() != null && systemMetadata.getArchived(); |
558 | 556 |
if(isArchive ) { |
559 | 557 |
//delete the index for the archived objects |
... | ... | |
561 | 559 |
log.info("SolrIndex.update============================= archive the idex for the identifier "+pid); |
562 | 560 |
} else { |
563 | 561 |
//generate index for either add or update. |
564 |
insert(pid, systemMetadata, data);
|
|
562 |
insert(pid, systemMetadata, objectPath);
|
|
565 | 563 |
log.info("SolrIndex.update============================= insert index for the identifier "+pid); |
566 | 564 |
} |
567 | 565 |
} |
Also available in: Unified diff
pass around the object file path rather than the data stream so that multiple subprocessors can index the same object and not consume the stream before it gets to the next one. In preparation for extending the assertions stored in OREs. https://projects.ecoinformatics.org/ecoinfo/issues/6548