Project

General

Profile

Revision 7876

Added by Jing Tao over 7 years ago

Add code to handle deleted ids.

View differences:

metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGenerator.java
28 28

  
29 29
import java.io.FileInputStream;
30 30
import java.io.FileNotFoundException;
31
import java.io.IOException;
31 32
import java.io.InputStream;
32 33
import java.util.ArrayList;
33 34
import java.util.Calendar;
35
import java.util.Collections;
34 36
import java.util.Date;
35 37
import java.util.List;
36 38
import java.util.TimerTask;
37 39

  
40
import javax.xml.parsers.ParserConfigurationException;
41
import javax.xml.xpath.XPathExpressionException;
42

  
38 43
import org.apache.commons.logging.Log;
39 44
import org.apache.commons.logging.LogFactory;
40 45
import org.apache.solr.client.solrj.SolrServerException;
......
42 47
import org.dataone.service.exceptions.InvalidRequest;
43 48
import org.dataone.service.exceptions.InvalidToken;
44 49
import org.dataone.service.exceptions.NotAuthorized;
50
import org.dataone.service.exceptions.NotFound;
45 51
import org.dataone.service.exceptions.NotImplemented;
46 52
import org.dataone.service.exceptions.ServiceFailure;
53
import org.dataone.service.exceptions.UnsupportedType;
47 54
import org.dataone.service.types.v1.Event;
48 55
import org.dataone.service.types.v1.Identifier;
49 56
import org.dataone.service.types.v1.ObjectFormatIdentifier;
50 57
import org.dataone.service.types.v1.SystemMetadata;
58
import org.xml.sax.SAXException;
51 59

  
52 60
import com.hazelcast.core.IMap;
53 61
import com.hazelcast.core.ISet;
......
70 78
    
71 79
    private static final int FIRST =0;
72 80
    private static final int SECOND =1;
81
    private static final int THIRD = 2;
82
    private static final int FOURTH = 3;
73 83
    public static final int WAITTIME = 10000;
74 84
    public static final int MAXWAITNUMBER = 180;
75 85
    private static final String HTTP = "http://";
......
140 150
     * @throws IllegalAccessException 
141 151
     * @throws InstantiationException 
142 152
     * @throws ClassNotFoundException 
153
     * @throws ParserConfigurationException 
154
     * @throws SAXException 
155
     * @throws IOException 
156
     * @throws UnsupportedType 
157
     * @throws NotFound 
158
     * @throws XPathExpressionException 
143 159
     */
144 160
    public void indexAll() throws InvalidRequest, InvalidToken,
145
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
161
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
146 162
        Date since = null;
147 163
        Date until = null;
148 164
        index(since, until);
......
161 177
     * @throws IllegalAccessException 
162 178
     * @throws InstantiationException 
163 179
     * @throws ClassNotFoundException 
180
     * @throws ParserConfigurationException 
181
     * @throws SAXException 
182
     * @throws IOException 
183
     * @throws UnsupportedType 
184
     * @throws NotFound 
185
     * @throws XPathExpressionException 
164 186
     */
165 187
    public void index(Date since) throws InvalidRequest, InvalidToken, 
166
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
188
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
167 189
        Date until = null;
168 190
        index(since, until);
169 191
    }
......
178 200
     * @throws NotAuthorized 
179 201
     * @throws InvalidToken 
180 202
     * @throws InvalidRequest 
181
     * @throws FileNotFoundException 
182 203
     * @throws IndexEventLogException 
183 204
     * @throws IllegalAccessException 
184 205
     * @throws InstantiationException 
185 206
     * @throws ClassNotFoundException 
207
     * @throws ParserConfigurationException 
208
     * @throws SAXException 
209
     * @throws IOException 
210
     * @throws UnsupportedType 
211
     * @throws NotFound 
212
     * @throws XPathExpressionException 
186 213
     */
187 214
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
188
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
215
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
189 216
        Date processedDate = null;
190 217
        List<String> solrIds = null;
191 218
        initSystemMetadataMap();
......
193 220
        List[] metacatIds = getMetacatIds(since, until);
194 221
        List<String> otherMetacatIds = metacatIds[FIRST];
195 222
        List<String> resourceMapIds =  metacatIds[SECOND];
223
        List<String> otherDeletedMetacatIds = metacatIds[THIRD];
224
        List<String> resourceMapDeletedIds = metacatIds[FOURTH];
196 225
        
197 226
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
227
        List<Long> maxCollection = new ArrayList<Long>();
198 228
        Date latestOtherId = null;
199 229
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
200 230
            int size = otherMetacatIds.size();
201 231
            String id = otherMetacatIds.get(size-1);
202 232
            SystemMetadata sysmeta = getSystemMetadata(id);
203 233
            latestOtherId = sysmeta.getDateSysMetadataModified();
234
            maxCollection.add(new Long(latestOtherId.getTime()));
204 235
        }
236
        
237
        Date latestDeletedOtherIds = null;
238
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
239
            int size = otherDeletedMetacatIds.size();
240
            String id = otherDeletedMetacatIds.get(size-1);
241
            SystemMetadata sysmeta = getSystemMetadata(id);
242
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
243
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
244
        }
245
        
205 246
        Date latestResourceId = null;
206 247
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
207 248
            int size = resourceMapIds.size();
208 249
            String id = resourceMapIds.get(size-1);
209 250
            SystemMetadata sysmeta = getSystemMetadata(id);
210 251
            latestResourceId = sysmeta.getDateSysMetadataModified();
252
            maxCollection.add(new Long(latestResourceId.getTime()));
211 253
        }
212
        if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
254
        
255
        Date latestDeletedResourceId = null;
256
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
257
            int size = resourceMapDeletedIds.size();
258
            String id = resourceMapDeletedIds.get(size-1);
259
            SystemMetadata sysmeta = getSystemMetadata(id);
260
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
261
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
262
        }
263
        
264
        if(!maxCollection.isEmpty()) {
265
            Long max = Collections.max(maxCollection);
266
            processedDate = new Date(max.longValue());
267
        }
268
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
213 269
            processedDate = latestOtherId;
214 270
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
215 271
            processedDate = latestResourceId;
......
217 273
            processedDate = latestResourceId;
218 274
        } else if (latestOtherId != null && latestResourceId == null) {
219 275
            processedDate = latestOtherId;
220
        }
276
        }*/
221 277
        
278
        
222 279
        //add the failedPids 
223 280
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
224 281
        List<IndexEvent> failedOtherIds = new ArrayList<IndexEvent>();
......
253 310
        }*/
254 311
        
255 312
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
313
        log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
256 314
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
315
        log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
257 316
        index(otherMetacatIds);
317
        removeIndex(otherDeletedMetacatIds);
258 318
        index(resourceMapIds);
319
        removeIndex(resourceMapDeletedIds);
259 320
       
260 321
        //record the timed index.
261 322
        if(processedDate != null) {
......
388 449
        } catch (IndexEventLogException e) {
389 450
            // TODO Auto-generated catch block
390 451
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
452
        } catch (XPathExpressionException e) {
453
            // TODO Auto-generated catch block
454
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
455
        } catch (NotFound e) {
456
            // TODO Auto-generated catch block
457
            e.printStackTrace();
458
        } catch (UnsupportedType e) {
459
            // TODO Auto-generated catch block
460
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
461
        } catch (IOException e) {
462
            // TODO Auto-generated catch block
463
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
464
        } catch (SAXException e) {
465
            // TODO Auto-generated catch block
466
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
467
        } catch (ParserConfigurationException e) {
468
            // TODO Auto-generated catch block
469
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
391 470
        }
392 471
    }
393 472
    
......
412 491
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
413 492
        
414 493
        List<String> resourceMapIds = new ArrayList();
494
        List<String> resourceMapDeletedIds = new ArrayList();
415 495
        List<String> otherIds = new ArrayList();
416
        List[] ids = new List[2];
496
        List<String> otherDeletedIds = new ArrayList();
497
        List[] ids = new List[4];
417 498
        ids[FIRST]= otherIds;
418 499
        ids[SECOND] = resourceMapIds;
500
        ids[THIRD]  = otherDeletedIds;
501
        ids[FOURTH] = resourceMapDeletedIds;
419 502
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
420 503
        Date otherPreviousDate = null;
504
        Date otherDeletedPreviousDate = null;
421 505
        Date resourceMapPreviousDate = null;
506
        Date resourceMapDeletedPreviousDate = null;
422 507
        if(metacatIds != null) {
423 508
            for(Identifier identifier : metacatIds) {
424 509
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
425 510
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
426
                    if(sysmeta != null && !sysmeta.getArchived()) {
511
                    if(sysmeta != null) {
427 512
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
428 513
                        //System.out.println("the object format id is "+formatId.getValue());
429 514
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
......
446 531
                        }
447 532
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
448 533
                            //for the resource map
449
                            if(!resourceMapIds.isEmpty()) {
450
                                if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
451
                                    resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
452
                                    resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
534
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
535
                                //archived ids
536
                                if(!resourceMapDeletedIds.isEmpty()) {
537
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
538
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
539
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
540
                                    } else {
541
                                        int size = resourceMapDeletedIds.size();//
542
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
543
                                    }
453 544
                                } else {
454
                                    int size = resourceMapIds.size();//
455
                                    resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
545
                                    resourceMapDeletedIds.add(identifier.getValue());
546
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
456 547
                                }
457 548
                            } else {
458
                                resourceMapIds.add(identifier.getValue());
459
                                resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
549
                                // current ids
550
                                if(!resourceMapIds.isEmpty()) {
551
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
552
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
553
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
554
                                    } else {
555
                                        int size = resourceMapIds.size();//
556
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
557
                                    }
558
                                } else {
559
                                    resourceMapIds.add(identifier.getValue());
560
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
561
                                }
460 562
                            }
461
                            
462 563
                        } else if (correctTimeRange) {
463
                            if(!otherIds.isEmpty()) {
464
                                if(sysDate.getTime() > otherPreviousDate.getTime()) {
465
                                    otherIds.add(identifier.getValue());
466
                                    otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
564
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
565
                                //for the archived ids
566
                                if(!otherDeletedIds.isEmpty()) {
567
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
568
                                        otherDeletedIds.add(identifier.getValue());
569
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
570
                                    } else {
571
                                        int size = otherDeletedIds.size();
572
                                        otherDeletedIds.add(size-1, identifier.getValue());
573
                                    }
467 574
                                } else {
468
                                    int size = otherIds.size();
469
                                    otherIds.add(size-1, identifier.getValue());
575
                                    otherDeletedIds.add(identifier.getValue());
576
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
470 577
                                }
471 578
                            } else {
472
                                otherIds.add(identifier.getValue());
473
                                otherPreviousDate = sysDate;//init otherPreviousDate
579
                                //for the current ids
580
                                if(!otherIds.isEmpty()) {
581
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
582
                                        otherIds.add(identifier.getValue());
583
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
584
                                    } else {
585
                                        int size = otherIds.size();
586
                                        otherIds.add(size-1, identifier.getValue());
587
                                    }
588
                                } else {
589
                                    otherIds.add(identifier.getValue());
590
                                    otherPreviousDate = sysDate;//init otherPreviousDate
591
                                }
474 592
                            }
475
                            
476 593
                        }
477 594
                        
478 595
                    }
......
523 640
    }
524 641
    
525 642
    /*
643
     * Remove the solr index for the list of ids
644
     */
645
    private void removeIndex(List<String> ids) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException {
646
        if(ids!= null) {
647
            for(String id :ids) {
648
                removeIndex(id);
649
            }
650
        }
651
    }
652
    
653
    /*
526 654
     * Remove the index for the id
527 655
     */
528
    private void removeIndex(String id) throws Exception {
656
    private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException  {
529 657
        if(id != null) {
530 658
            solrIndex.remove(id);
531 659
        }

Also available in: Unified diff