Revision 7876
Added by Jing Tao over 11 years ago
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGenerator.java | ||
---|---|---|
28 | 28 |
|
29 | 29 |
import java.io.FileInputStream; |
30 | 30 |
import java.io.FileNotFoundException; |
31 |
import java.io.IOException; |
|
31 | 32 |
import java.io.InputStream; |
32 | 33 |
import java.util.ArrayList; |
33 | 34 |
import java.util.Calendar; |
35 |
import java.util.Collections; |
|
34 | 36 |
import java.util.Date; |
35 | 37 |
import java.util.List; |
36 | 38 |
import java.util.TimerTask; |
37 | 39 |
|
40 |
import javax.xml.parsers.ParserConfigurationException; |
|
41 |
import javax.xml.xpath.XPathExpressionException; |
|
42 |
|
|
38 | 43 |
import org.apache.commons.logging.Log; |
39 | 44 |
import org.apache.commons.logging.LogFactory; |
40 | 45 |
import org.apache.solr.client.solrj.SolrServerException; |
... | ... | |
42 | 47 |
import org.dataone.service.exceptions.InvalidRequest; |
43 | 48 |
import org.dataone.service.exceptions.InvalidToken; |
44 | 49 |
import org.dataone.service.exceptions.NotAuthorized; |
50 |
import org.dataone.service.exceptions.NotFound; |
|
45 | 51 |
import org.dataone.service.exceptions.NotImplemented; |
46 | 52 |
import org.dataone.service.exceptions.ServiceFailure; |
53 |
import org.dataone.service.exceptions.UnsupportedType; |
|
47 | 54 |
import org.dataone.service.types.v1.Event; |
48 | 55 |
import org.dataone.service.types.v1.Identifier; |
49 | 56 |
import org.dataone.service.types.v1.ObjectFormatIdentifier; |
50 | 57 |
import org.dataone.service.types.v1.SystemMetadata; |
58 |
import org.xml.sax.SAXException; |
|
51 | 59 |
|
52 | 60 |
import com.hazelcast.core.IMap; |
53 | 61 |
import com.hazelcast.core.ISet; |
... | ... | |
70 | 78 |
|
71 | 79 |
private static final int FIRST =0; |
72 | 80 |
private static final int SECOND =1; |
81 |
private static final int THIRD = 2; |
|
82 |
private static final int FOURTH = 3; |
|
73 | 83 |
public static final int WAITTIME = 10000; |
74 | 84 |
public static final int MAXWAITNUMBER = 180; |
75 | 85 |
private static final String HTTP = "http://"; |
... | ... | |
140 | 150 |
* @throws IllegalAccessException |
141 | 151 |
* @throws InstantiationException |
142 | 152 |
* @throws ClassNotFoundException |
153 |
* @throws ParserConfigurationException |
|
154 |
* @throws SAXException |
|
155 |
* @throws IOException |
|
156 |
* @throws UnsupportedType |
|
157 |
* @throws NotFound |
|
158 |
* @throws XPathExpressionException |
|
143 | 159 |
*/ |
144 | 160 |
public void indexAll() throws InvalidRequest, InvalidToken, |
145 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
|
|
161 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
|
|
146 | 162 |
Date since = null; |
147 | 163 |
Date until = null; |
148 | 164 |
index(since, until); |
... | ... | |
161 | 177 |
* @throws IllegalAccessException |
162 | 178 |
* @throws InstantiationException |
163 | 179 |
* @throws ClassNotFoundException |
180 |
* @throws ParserConfigurationException |
|
181 |
* @throws SAXException |
|
182 |
* @throws IOException |
|
183 |
* @throws UnsupportedType |
|
184 |
* @throws NotFound |
|
185 |
* @throws XPathExpressionException |
|
164 | 186 |
*/ |
165 | 187 |
public void index(Date since) throws InvalidRequest, InvalidToken, |
166 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
|
|
188 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
|
|
167 | 189 |
Date until = null; |
168 | 190 |
index(since, until); |
169 | 191 |
} |
... | ... | |
178 | 200 |
* @throws NotAuthorized |
179 | 201 |
* @throws InvalidToken |
180 | 202 |
* @throws InvalidRequest |
181 |
* @throws FileNotFoundException |
|
182 | 203 |
* @throws IndexEventLogException |
183 | 204 |
* @throws IllegalAccessException |
184 | 205 |
* @throws InstantiationException |
185 | 206 |
* @throws ClassNotFoundException |
207 |
* @throws ParserConfigurationException |
|
208 |
* @throws SAXException |
|
209 |
* @throws IOException |
|
210 |
* @throws UnsupportedType |
|
211 |
* @throws NotFound |
|
212 |
* @throws XPathExpressionException |
|
186 | 213 |
*/ |
187 | 214 |
public void index(Date since, Date until) throws SolrServerException, InvalidRequest, |
188 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
|
|
215 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
|
|
189 | 216 |
Date processedDate = null; |
190 | 217 |
List<String> solrIds = null; |
191 | 218 |
initSystemMetadataMap(); |
... | ... | |
193 | 220 |
List[] metacatIds = getMetacatIds(since, until); |
194 | 221 |
List<String> otherMetacatIds = metacatIds[FIRST]; |
195 | 222 |
List<String> resourceMapIds = metacatIds[SECOND]; |
223 |
List<String> otherDeletedMetacatIds = metacatIds[THIRD]; |
|
224 |
List<String> resourceMapDeletedIds = metacatIds[FOURTH]; |
|
196 | 225 |
|
197 | 226 |
//figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds. |
227 |
List<Long> maxCollection = new ArrayList<Long>(); |
|
198 | 228 |
Date latestOtherId = null; |
199 | 229 |
if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) { |
200 | 230 |
int size = otherMetacatIds.size(); |
201 | 231 |
String id = otherMetacatIds.get(size-1); |
202 | 232 |
SystemMetadata sysmeta = getSystemMetadata(id); |
203 | 233 |
latestOtherId = sysmeta.getDateSysMetadataModified(); |
234 |
maxCollection.add(new Long(latestOtherId.getTime())); |
|
204 | 235 |
} |
236 |
|
|
237 |
Date latestDeletedOtherIds = null; |
|
238 |
if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) { |
|
239 |
int size = otherDeletedMetacatIds.size(); |
|
240 |
String id = otherDeletedMetacatIds.get(size-1); |
|
241 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
242 |
latestDeletedOtherIds = sysmeta.getDateSysMetadataModified(); |
|
243 |
maxCollection.add(new Long(latestDeletedOtherIds.getTime())); |
|
244 |
} |
|
245 |
|
|
205 | 246 |
Date latestResourceId = null; |
206 | 247 |
if (resourceMapIds != null && !resourceMapIds.isEmpty()) { |
207 | 248 |
int size = resourceMapIds.size(); |
208 | 249 |
String id = resourceMapIds.get(size-1); |
209 | 250 |
SystemMetadata sysmeta = getSystemMetadata(id); |
210 | 251 |
latestResourceId = sysmeta.getDateSysMetadataModified(); |
252 |
maxCollection.add(new Long(latestResourceId.getTime())); |
|
211 | 253 |
} |
212 |
if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) { |
|
254 |
|
|
255 |
Date latestDeletedResourceId = null; |
|
256 |
if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) { |
|
257 |
int size = resourceMapDeletedIds.size(); |
|
258 |
String id = resourceMapDeletedIds.get(size-1); |
|
259 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
260 |
latestDeletedResourceId = sysmeta.getDateSysMetadataModified(); |
|
261 |
maxCollection.add(new Long(latestDeletedResourceId.getTime())); |
|
262 |
} |
|
263 |
|
|
264 |
if(!maxCollection.isEmpty()) { |
|
265 |
Long max = Collections.max(maxCollection); |
|
266 |
processedDate = new Date(max.longValue()); |
|
267 |
} |
|
268 |
/*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) { |
|
213 | 269 |
processedDate = latestOtherId; |
214 | 270 |
} else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime() <= latestResourceId.getTime()) { |
215 | 271 |
processedDate = latestResourceId; |
... | ... | |
217 | 273 |
processedDate = latestResourceId; |
218 | 274 |
} else if (latestOtherId != null && latestResourceId == null) { |
219 | 275 |
processedDate = latestOtherId; |
220 |
} |
|
276 |
}*/
|
|
221 | 277 |
|
278 |
|
|
222 | 279 |
//add the failedPids |
223 | 280 |
List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null); |
224 | 281 |
List<IndexEvent> failedOtherIds = new ArrayList<IndexEvent>(); |
... | ... | |
253 | 310 |
}*/ |
254 | 311 |
|
255 | 312 |
log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds); |
313 |
log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds); |
|
256 | 314 |
log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds); |
315 |
log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds); |
|
257 | 316 |
index(otherMetacatIds); |
317 |
removeIndex(otherDeletedMetacatIds); |
|
258 | 318 |
index(resourceMapIds); |
319 |
removeIndex(resourceMapDeletedIds); |
|
259 | 320 |
|
260 | 321 |
//record the timed index. |
261 | 322 |
if(processedDate != null) { |
... | ... | |
388 | 449 |
} catch (IndexEventLogException e) { |
389 | 450 |
// TODO Auto-generated catch block |
390 | 451 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
452 |
} catch (XPathExpressionException e) { |
|
453 |
// TODO Auto-generated catch block |
|
454 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
455 |
} catch (NotFound e) { |
|
456 |
// TODO Auto-generated catch block |
|
457 |
e.printStackTrace(); |
|
458 |
} catch (UnsupportedType e) { |
|
459 |
// TODO Auto-generated catch block |
|
460 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
461 |
} catch (IOException e) { |
|
462 |
// TODO Auto-generated catch block |
|
463 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
464 |
} catch (SAXException e) { |
|
465 |
// TODO Auto-generated catch block |
|
466 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
467 |
} catch (ParserConfigurationException e) { |
|
468 |
// TODO Auto-generated catch block |
|
469 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
391 | 470 |
} |
392 | 471 |
} |
393 | 472 |
|
... | ... | |
412 | 491 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException { |
413 | 492 |
|
414 | 493 |
List<String> resourceMapIds = new ArrayList(); |
494 |
List<String> resourceMapDeletedIds = new ArrayList(); |
|
415 | 495 |
List<String> otherIds = new ArrayList(); |
416 |
List[] ids = new List[2]; |
|
496 |
List<String> otherDeletedIds = new ArrayList(); |
|
497 |
List[] ids = new List[4]; |
|
417 | 498 |
ids[FIRST]= otherIds; |
418 | 499 |
ids[SECOND] = resourceMapIds; |
500 |
ids[THIRD] = otherDeletedIds; |
|
501 |
ids[FOURTH] = resourceMapDeletedIds; |
|
419 | 502 |
ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet(); |
420 | 503 |
Date otherPreviousDate = null; |
504 |
Date otherDeletedPreviousDate = null; |
|
421 | 505 |
Date resourceMapPreviousDate = null; |
506 |
Date resourceMapDeletedPreviousDate = null; |
|
422 | 507 |
if(metacatIds != null) { |
423 | 508 |
for(Identifier identifier : metacatIds) { |
424 | 509 |
if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) { |
425 | 510 |
SystemMetadata sysmeta = getSystemMetadata(identifier.getValue()); |
426 |
if(sysmeta != null && !sysmeta.getArchived()) {
|
|
511 |
if(sysmeta != null) { |
|
427 | 512 |
ObjectFormatIdentifier formatId =sysmeta.getFormatId(); |
428 | 513 |
//System.out.println("the object format id is "+formatId.getValue()); |
429 | 514 |
//System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces); |
... | ... | |
446 | 531 |
} |
447 | 532 |
if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) { |
448 | 533 |
//for the resource map |
449 |
if(!resourceMapIds.isEmpty()) { |
|
450 |
if(sysDate.getTime() > resourceMapPreviousDate.getTime()) { |
|
451 |
resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one |
|
452 |
resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one |
|
534 |
if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) { |
|
535 |
//archived ids |
|
536 |
if(!resourceMapDeletedIds.isEmpty()) { |
|
537 |
if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) { |
|
538 |
resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one |
|
539 |
resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one |
|
540 |
} else { |
|
541 |
int size = resourceMapDeletedIds.size();// |
|
542 |
resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list. |
|
543 |
} |
|
453 | 544 |
} else { |
454 |
int size = resourceMapIds.size();//
|
|
455 |
resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
|
|
545 |
resourceMapDeletedIds.add(identifier.getValue());
|
|
546 |
resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
|
|
456 | 547 |
} |
457 | 548 |
} else { |
458 |
resourceMapIds.add(identifier.getValue()); |
|
459 |
resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate |
|
549 |
// current ids |
|
550 |
if(!resourceMapIds.isEmpty()) { |
|
551 |
if(sysDate.getTime() > resourceMapPreviousDate.getTime()) { |
|
552 |
resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one |
|
553 |
resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one |
|
554 |
} else { |
|
555 |
int size = resourceMapIds.size();// |
|
556 |
resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list. |
|
557 |
} |
|
558 |
} else { |
|
559 |
resourceMapIds.add(identifier.getValue()); |
|
560 |
resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate |
|
561 |
} |
|
460 | 562 |
} |
461 |
|
|
462 | 563 |
} else if (correctTimeRange) { |
463 |
if(!otherIds.isEmpty()) { |
|
464 |
if(sysDate.getTime() > otherPreviousDate.getTime()) { |
|
465 |
otherIds.add(identifier.getValue()); |
|
466 |
otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one |
|
564 |
if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) { |
|
565 |
//for the archived ids |
|
566 |
if(!otherDeletedIds.isEmpty()) { |
|
567 |
if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) { |
|
568 |
otherDeletedIds.add(identifier.getValue()); |
|
569 |
otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one |
|
570 |
} else { |
|
571 |
int size = otherDeletedIds.size(); |
|
572 |
otherDeletedIds.add(size-1, identifier.getValue()); |
|
573 |
} |
|
467 | 574 |
} else { |
468 |
int size = otherIds.size();
|
|
469 |
otherIds.add(size-1, identifier.getValue());
|
|
575 |
otherDeletedIds.add(identifier.getValue());
|
|
576 |
otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
|
|
470 | 577 |
} |
471 | 578 |
} else { |
472 |
otherIds.add(identifier.getValue()); |
|
473 |
otherPreviousDate = sysDate;//init otherPreviousDate |
|
579 |
//for the current ids |
|
580 |
if(!otherIds.isEmpty()) { |
|
581 |
if(sysDate.getTime() > otherPreviousDate.getTime()) { |
|
582 |
otherIds.add(identifier.getValue()); |
|
583 |
otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one |
|
584 |
} else { |
|
585 |
int size = otherIds.size(); |
|
586 |
otherIds.add(size-1, identifier.getValue()); |
|
587 |
} |
|
588 |
} else { |
|
589 |
otherIds.add(identifier.getValue()); |
|
590 |
otherPreviousDate = sysDate;//init otherPreviousDate |
|
591 |
} |
|
474 | 592 |
} |
475 |
|
|
476 | 593 |
} |
477 | 594 |
|
478 | 595 |
} |
... | ... | |
523 | 640 |
} |
524 | 641 |
|
525 | 642 |
/* |
643 |
* Remove the solr index for the list of ids |
|
644 |
*/ |
|
645 |
private void removeIndex(List<String> ids) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException { |
|
646 |
if(ids!= null) { |
|
647 |
for(String id :ids) { |
|
648 |
removeIndex(id); |
|
649 |
} |
|
650 |
} |
|
651 |
} |
|
652 |
|
|
653 |
/* |
|
526 | 654 |
* Remove the index for the id |
527 | 655 |
*/ |
528 |
private void removeIndex(String id) throws Exception {
|
|
656 |
private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException {
|
|
529 | 657 |
if(id != null) { |
530 | 658 |
solrIndex.remove(id); |
531 | 659 |
} |
Also available in: Unified diff
Add code to handle deleted ids.