Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.File;
30
import java.io.FileInputStream;
31
import java.io.FileNotFoundException;
32
import java.io.IOException;
33
import java.io.InputStream;
34
import java.text.SimpleDateFormat;
35
import java.util.ArrayList;
36
import java.util.Calendar;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.List;
40
import java.util.TimerTask;
41

    
42
import javax.xml.parsers.ParserConfigurationException;
43
import javax.xml.xpath.XPathExpressionException;
44

    
45
import org.apache.commons.io.FileUtils;
46
import org.apache.commons.logging.Log;
47
import org.apache.commons.logging.LogFactory;
48
import org.apache.solr.client.solrj.SolrServerException;
49
import org.dataone.configuration.Settings;
50
import org.dataone.service.exceptions.InvalidRequest;
51
import org.dataone.service.exceptions.InvalidToken;
52
import org.dataone.service.exceptions.NotAuthorized;
53
import org.dataone.service.exceptions.NotFound;
54
import org.dataone.service.exceptions.NotImplemented;
55
import org.dataone.service.exceptions.ServiceFailure;
56
import org.dataone.service.exceptions.UnsupportedType;
57
import org.dataone.service.types.v1.Event;
58
import org.dataone.service.types.v1.Identifier;
59
import org.dataone.service.types.v1.ObjectFormatIdentifier;
60
import org.dataone.service.types.v1.SystemMetadata;
61
import org.xml.sax.SAXException;
62

    
63
import com.hazelcast.core.IMap;
64
import com.hazelcast.core.ISet;
65

    
66
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
67
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
68
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
69
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
70

    
71

    
72
/**
73
 * A class represents the object to generate massive solr indexes.
74
 * This can happen during an update of Metacat (generating index for all existing documents)
75
 * or regenerate index for those documents
76
 * failing to build index during the insert or update.
77
 * 
78
 * @author tao
79
 *
80
 */
81
public class IndexGenerator extends TimerTask {
82
    
83
    private static final int FIRST =0;
84
    private static final int SECOND =1;
85
    private static final int THIRD = 2;
86
    private static final int FOURTH = 3;
87
    public static final int WAITTIME = 10000;
88
    public static final int MAXWAITNUMBER = 180;
89
    private static final String HTTP = "http://";
90
    private static final String MNAPPENDIX = "/d1/mn";
91
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
92
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
93
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
94
    
95
    private static int waitingTime = WAITTIME;
96
    private static int maxAttempts = MAXWAITNUMBER;
97
    
98
    private SolrIndex solrIndex = null;
99
    //private SystemMetadataEventListener systemMetadataListener = null;
100
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
101
    private IMap<Identifier, String> objectPathMap;
102
    private Log log = LogFactory.getLog(IndexGenerator.class);
103
    //private MNode mNode = null;
104
    private static List<String> resourceMapNamespaces = null;
105
    
106
    /**
107
     * Constructor
108
     * @param solrIndex
109
     * @param systemMetadataListener
110
     */
111
    public IndexGenerator(SolrIndex solrIndex) {
112
        this.solrIndex = solrIndex;
113
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
114
        //this.systemMetadataListener = systemMetadataListener;
115
        //this.mNode = new MNode(buildMNBaseURL());
116
        try {
117
            waitingTime = Settings.getConfiguration().getInt(WAITIMEPOPERTYNAME);
118
            maxAttempts = Settings.getConfiguration().getInt(MAXATTEMPTSPROPERTYNAME);
119
        } catch (Exception e) {
120
            log.warn("IndexGenerator.constructor - couldn't read the waiting time or maxattempts from the metacat.properties file since : "+e.getMessage()+". Default values will be used");
121
            waitingTime = WAITTIME;
122
            maxAttempts = MAXWAITNUMBER;
123
        }
124
    }
125
    
126
    /**
127
     * Build the index for all documents in Metacat without overwriting.
128
     * @throws SolrServerException 
129
     * @throws ServiceFailure 
130
     * @throws NotImplemented 
131
     * @throws NotAuthorized 
132
     * @throws InvalidToken 
133
     * @throws InvalidRequest 
134
     * @throws IndexEventLogException 
135
     * @throws IllegalAccessException 
136
     * @throws InstantiationException 
137
     * @throws ClassNotFoundException 
138
     */
139
    /*public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
140
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
141
        boolean force = false;
142
        indexAll(force);
143
    }*/
144
    
145
    /**
146
     * Build the index for all documents.
147
     * @throws SolrServerException 
148
     * @throws ServiceFailure 
149
     * @throws NotImplemented 
150
     * @throws NotAuthorized 
151
     * @throws InvalidToken 
152
     * @throws InvalidRequest 
153
     * @throws IndexEventLogException 
154
     * @throws IllegalAccessException 
155
     * @throws InstantiationException 
156
     * @throws ClassNotFoundException 
157
     * @throws ParserConfigurationException 
158
     * @throws SAXException 
159
     * @throws IOException 
160
     * @throws UnsupportedType 
161
     * @throws NotFound 
162
     * @throws XPathExpressionException 
163
     */
164
    public void indexAll() throws InvalidRequest, InvalidToken,
165
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
166
        Date since = null;
167
        Date until = null;
168
        index(since, until);
169
    }
170
    
171
    /**
172
     * Build the index for the docs which have been modified since the specified date.
173
     * @param since
174
     * @throws SolrServerException 
175
     * @throws ServiceFailure 
176
     * @throws NotImplemented 
177
     * @throws NotAuthorized 
178
     * @throws InvalidToken 
179
     * @throws InvalidRequest 
180
     * @throws IndexEventLogException 
181
     * @throws IllegalAccessException 
182
     * @throws InstantiationException 
183
     * @throws ClassNotFoundException 
184
     * @throws ParserConfigurationException 
185
     * @throws SAXException 
186
     * @throws IOException 
187
     * @throws UnsupportedType 
188
     * @throws NotFound 
189
     * @throws XPathExpressionException 
190
     */
191
    public void index(Date since) throws InvalidRequest, InvalidToken, 
192
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
193
        Date until = null;
194
        index(since, until);
195
    }
196
    
197
    /**
198
     *  Build the index for the docs which have been modified between the specified date.s
199
     * @param since
200
     * @param until
201
     * @throws SolrServerException 
202
     * @throws ServiceFailure 
203
     * @throws NotImplemented 
204
     * @throws NotAuthorized 
205
     * @throws InvalidToken 
206
     * @throws InvalidRequest 
207
     * @throws IndexEventLogException 
208
     * @throws IllegalAccessException 
209
     * @throws InstantiationException 
210
     * @throws ClassNotFoundException 
211
     * @throws ParserConfigurationException 
212
     * @throws SAXException 
213
     * @throws IOException 
214
     * @throws UnsupportedType 
215
     * @throws NotFound 
216
     * @throws XPathExpressionException 
217
     */
218
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
219
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
220
        Date processedDate = null;
221
        List<String> solrIds = null;
222
        initSystemMetadataMap();
223
        initObjectPathMap();
224
        List[] metacatIds = getMetacatIds(since, until);
225
        List<String> otherMetacatIds = metacatIds[FIRST];
226
        List<String> resourceMapIds =  metacatIds[SECOND];
227
        List<String> otherDeletedMetacatIds = metacatIds[THIRD];
228
        List<String> resourceMapDeletedIds = metacatIds[FOURTH];
229
        
230
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
231
        List<Long> maxCollection = new ArrayList<Long>();
232
        Date latestOtherId = null;
233
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
234
            int size = otherMetacatIds.size();
235
            String id = otherMetacatIds.get(size-1);
236
            SystemMetadata sysmeta = getSystemMetadata(id);
237
            latestOtherId = sysmeta.getDateSysMetadataModified();
238
            maxCollection.add(new Long(latestOtherId.getTime()));
239
        }
240
        
241
        Date latestDeletedOtherIds = null;
242
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
243
            int size = otherDeletedMetacatIds.size();
244
            String id = otherDeletedMetacatIds.get(size-1);
245
            SystemMetadata sysmeta = getSystemMetadata(id);
246
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
247
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
248
        }
249
        
250
        Date latestResourceId = null;
251
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
252
            int size = resourceMapIds.size();
253
            String id = resourceMapIds.get(size-1);
254
            SystemMetadata sysmeta = getSystemMetadata(id);
255
            latestResourceId = sysmeta.getDateSysMetadataModified();
256
            maxCollection.add(new Long(latestResourceId.getTime()));
257
        }
258
        
259
        Date latestDeletedResourceId = null;
260
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
261
            int size = resourceMapDeletedIds.size();
262
            String id = resourceMapDeletedIds.get(size-1);
263
            SystemMetadata sysmeta = getSystemMetadata(id);
264
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
265
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
266
        }
267
        
268
        if(!maxCollection.isEmpty()) {
269
            Long max = Collections.max(maxCollection);
270
            processedDate = new Date(max.longValue());
271
        }
272
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
273
            processedDate = latestOtherId;
274
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
275
            processedDate = latestResourceId;
276
        } else if (latestOtherId == null && latestResourceId != null) {
277
            processedDate = latestResourceId;
278
        } else if (latestOtherId != null && latestResourceId == null) {
279
            processedDate = latestOtherId;
280
        }*/
281
        
282
        
283
        //add the failedPids 
284
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
285
        List<IndexEvent> failedOtherIds = new ArrayList<IndexEvent>();
286
        List<IndexEvent> failedResourceMapIds = new ArrayList<IndexEvent>();
287
        if(failedEvents != null) {
288
            for(IndexEvent event : failedEvents) {
289
            	String id = event.getIdentifier().getValue();
290
                SystemMetadata sysmeta = getSystemMetadata(id);
291
                if(sysmeta != null) {
292
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
293
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
294
                        failedResourceMapIds.add(event);
295
                    } else {
296
                        failedOtherIds.add(event);
297
                    }
298
                }
299
            }
300
        }
301
        indexFailedIds(failedOtherIds);
302
        indexFailedIds(failedResourceMapIds);
303
        
304
        /*if(!failedOtherIds.isEmpty()) {
305
            failedOtherIds.addAll(otherMetacatIds);
306
        } else {
307
            failedOtherIds = otherMetacatIds;
308
        }
309
        
310
        if(!failedResourceMapIds.isEmpty()) {
311
            failedResourceMapIds.addAll(resourceMapIds);
312
        } else {
313
            failedResourceMapIds = resourceMapIds;
314
        }*/
315
        
316
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
317
        logFile(otherMetacatIds);
318
        log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
319
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
320
        logFile(resourceMapIds);
321
        log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
322
        index(otherMetacatIds);
323
        removeIndex(otherDeletedMetacatIds);
324
        index(resourceMapIds);
325
        removeIndex(resourceMapDeletedIds);
326
       
327
        //record the timed index.
328
        if(processedDate != null) {
329
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
330
        }
331
        
332
    }
333
    
334
    /*
335
     * Write the docids which will be indexed into a file. 
336
     */
337
    private void logFile(List<String> ids)  {
338
        if(ids != null) {
339
            try {
340
                String tempDir = System.getProperty("java.io.tmpdir");
341
                log.info("the temp dir is ===================== "+tempDir);
342
                File idsForIndex = new File(tempDir, "ids-for-timed-indexing-log");
343
                if(!idsForIndex.exists()) {
344
                    idsForIndex.createNewFile();
345
                }
346
                Date date = Calendar.getInstance().getTime();
347
                SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
348
                String dateStr = format.format(date);
349
                List<String> dateList = new ArrayList<String>();
350
                dateList.add(dateStr);
351
                Boolean append = true;
352
                FileUtils.writeLines(idsForIndex, dateList, append);//write time string
353
                FileUtils.writeLines(idsForIndex, ids, append);
354
            } catch (Exception e) {
355
                log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage());
356
            }
357
           
358
        }
359
    }
360
    /*
361
     * Doing index
362
     */
363
    private void index(List<String> metacatIds) {
364
        if(metacatIds != null) {
365
            for(String metacatId : metacatIds) {
366
                if(metacatId != null) {
367
                        try {
368
                            generateIndex(metacatId);
369
                        } catch (Exception e) {
370
                            IndexEvent event = new IndexEvent();
371
                            Identifier pid = new Identifier();
372
                            pid.setValue(metacatId);
373
                            event.setIdentifier(pid);
374
                            event.setDate(Calendar.getInstance().getTime());
375
                            event.setAction(Event.CREATE);
376
                            String error = "IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage();
377
                            event.setDescription(error);
378
                            try {
379
                                EventlogFactory.createIndexEventLog().write(event);
380
                            } catch (Exception ee) {
381
                                log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
382
                            }
383
                            log.error(error);
384
                        }
385
                        
386
                   
387
                }
388
            }
389
        }
390
    }
391
    
392
    /*
393
     * Index those ids which failed in the process (We got them from the EventLog)
394
     */
395
    private void indexFailedIds(List<IndexEvent> events) {
396
        if(events != null) {
397
            for(IndexEvent event : events) {
398
                if(event != null) {
399
                    Identifier identifier = event.getIdentifier();
400
                    if(identifier != null) {
401
                        String id = identifier.getValue();
402
                        if(id != null) {
403
                            Event action = event.getAction();
404
                            if (action != null && action.equals(Event.CREATE)) {
405
                                try {
406
                                    generateIndex(id);
407
                                    EventlogFactory.createIndexEventLog().remove(identifier);
408
                                } catch (Exception e) {
409
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage());
410
                                }
411
                            } else if (action != null && action.equals(Event.DELETE)) {
412
                                try {
413
                                    removeIndex(id);
414
                                    EventlogFactory.createIndexEventLog().remove(identifier);
415
                                } catch (Exception e) {
416
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage());
417
                                }
418
                            }
419
                        }
420
                    }
421
                }
422
            }
423
        }
424
    }
425
    
426
    public void run() {
427
        /*IndexEvent event = new IndexEvent();
428
        event.setDate(Calendar.getInstance().getTime());
429
        event.setType(IndexEvent.STARTTIMEDINDEX);
430
        event.setDescription("Start the timed index job");
431
        try {
432
            EventlogFactory.createIndexEventLog().write(event);
433
        } catch (Exception e) {
434
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing start event :"+e.getMessage());
435
        }*/
436
        try {
437
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
438
            index(since);
439
        } catch (InvalidRequest e) {
440
            // TODO Auto-generated catch block
441
            //e.printStackTrace();
442
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
443
        } catch (InvalidToken e) {
444
            // TODO Auto-generated catch block
445
            //e.printStackTrace();
446
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
447
        } catch (NotAuthorized e) {
448
            // TODO Auto-generated catch block
449
            //e.printStackTrace();
450
        } catch (NotImplemented e) {
451
            // TODO Auto-generated catch block
452
            //e.printStackTrace();
453
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
454
        } catch (ServiceFailure e) {
455
            // TODO Auto-generated catch block
456
            //e.printStackTrace();
457
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
458
        } catch (SolrServerException e) {
459
            // TODO Auto-generated catch block
460
            //e.printStackTrace();
461
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
462
        } catch (FileNotFoundException e) {
463
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
464
        }
465
        /*event.setDate(Calendar.getInstance().getTime());
466
        event.setType(IndexEvent.FINISHTIMEDINDEX);
467
        event.setDescription("Finish the timed index job");
468
        try {
469
            EventlogFactory.createIndexEventLog().write(event);
470
        } catch (Exception e) {
471
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing finish event :"+e.getMessage());
472
        }*/ catch (ClassNotFoundException e) {
473
            // TODO Auto-generated catch block
474
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
475
        } catch (InstantiationException e) {
476
            // TODO Auto-generated catch block
477
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
478
        } catch (IllegalAccessException e) {
479
            // TODO Auto-generated catch block
480
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
481
        } catch (IndexEventLogException e) {
482
            // TODO Auto-generated catch block
483
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
484
        } catch (XPathExpressionException e) {
485
            // TODO Auto-generated catch block
486
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
487
        } catch (NotFound e) {
488
            // TODO Auto-generated catch block
489
            e.printStackTrace();
490
        } catch (UnsupportedType e) {
491
            // TODO Auto-generated catch block
492
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
493
        } catch (IOException e) {
494
            // TODO Auto-generated catch block
495
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
496
        } catch (SAXException e) {
497
            // TODO Auto-generated catch block
498
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
499
        } catch (ParserConfigurationException e) {
500
            // TODO Auto-generated catch block
501
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
502
        }
503
    }
504
    
505
    /*
506
     * Get the indexed ids list from the solr server.
507
     * An empty list will be returned if there is no ids.
508
     */
509
    private List<String> getSolrDocIds() throws SolrServerException {
510
        List<String> ids = solrIndex.getSolrIds();
511
        return ids;
512
    }
513
    
514
    /*
515
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
516
     * 
517
     * If since and util are null, it will return all of them.
518
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
519
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
520
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
521
     */
522
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
523
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
524
        
525
        List<String> resourceMapIds = new ArrayList();
526
        List<String> resourceMapDeletedIds = new ArrayList();
527
        List<String> otherIds = new ArrayList();
528
        List<String> otherDeletedIds = new ArrayList();
529
        List[] ids = new List[4];
530
        ids[FIRST]= otherIds;
531
        ids[SECOND] = resourceMapIds;
532
        ids[THIRD]  = otherDeletedIds;
533
        ids[FOURTH] = resourceMapDeletedIds;
534
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
535
        Date otherPreviousDate = null;
536
        Date otherDeletedPreviousDate = null;
537
        Date resourceMapPreviousDate = null;
538
        Date resourceMapDeletedPreviousDate = null;
539
        if(metacatIds != null) {
540
            for(Identifier identifier : metacatIds) {
541
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
542
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
543
                    if(sysmeta != null) {
544
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
545
                        //System.out.println("the object format id is "+formatId.getValue());
546
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
547
                        boolean correctTimeRange = false;
548
                        Date sysDate = sysmeta.getDateSysMetadataModified();
549
                        if(since == null && until == null) {
550
                            correctTimeRange = true;
551
                        } else if (since != null && until == null) {
552
                            if(sysDate.getTime() > since.getTime()) {
553
                                correctTimeRange = true;
554
                            }
555
                        } else if (since == null && until != null) {
556
                            if(sysDate.getTime() < until.getTime()) {
557
                                correctTimeRange = true;
558
                            }
559
                        } else if (since != null && until != null) {
560
                            if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) {
561
                                correctTimeRange = true;
562
                            }
563
                        }
564
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
565
                            //for the resource map
566
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
567
                                //archived ids
568
                                if(!resourceMapDeletedIds.isEmpty()) {
569
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
570
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
571
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
572
                                    } else {
573
                                        int size = resourceMapDeletedIds.size();//
574
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
575
                                    }
576
                                } else {
577
                                    resourceMapDeletedIds.add(identifier.getValue());
578
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
579
                                }
580
                            } else {
581
                                // current ids
582
                                if(!resourceMapIds.isEmpty()) {
583
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
584
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
585
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
586
                                    } else {
587
                                        int size = resourceMapIds.size();//
588
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
589
                                    }
590
                                } else {
591
                                    resourceMapIds.add(identifier.getValue());
592
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
593
                                }
594
                            }
595
                        } else if (correctTimeRange) {
596
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
597
                                //for the archived ids
598
                                if(!otherDeletedIds.isEmpty()) {
599
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
600
                                        otherDeletedIds.add(identifier.getValue());
601
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
602
                                    } else {
603
                                        int size = otherDeletedIds.size();
604
                                        otherDeletedIds.add(size-1, identifier.getValue());
605
                                    }
606
                                } else {
607
                                    otherDeletedIds.add(identifier.getValue());
608
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
609
                                }
610
                            } else {
611
                                //for the current ids
612
                                if(!otherIds.isEmpty()) {
613
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
614
                                        otherIds.add(identifier.getValue());
615
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
616
                                    } else {
617
                                        int size = otherIds.size();
618
                                        otherIds.add(size-1, identifier.getValue());
619
                                    }
620
                                } else {
621
                                    otherIds.add(identifier.getValue());
622
                                    otherPreviousDate = sysDate;//init otherPreviousDate
623
                                }
624
                            }
625
                        }
626
                        
627
                    }
628
                }
629
            }
630
        }
631
        return ids;
632
    }
633
    
634
    /*
635
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
636
     */
637
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
638
        boolean isResourceMap = false;
639
        if(formatId != null && resourceMapNamespaces != null) {
640
            for(String namespace : resourceMapNamespaces) {
641
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
642
                    isResourceMap = true;
643
                    break;
644
                }
645
            }
646
        }
647
        return isResourceMap;
648
    }
649
    
650
   
651
    
652
    /*
653
     * Generate index for the id.
654
     */
655
    private void generateIndex(String id) throws Exception {
656
        if(id != null)  {
657
                SystemMetadata sysmeta = getSystemMetadata(id);
658
                //only update none-archived id.
659
                if(sysmeta != null && !sysmeta.getArchived() && sysmeta.getObsoletedBy() == null) {
660
                        InputStream data = getDataObject(id);
661
                        Identifier obsolete = sysmeta.getObsoletes();
662
                        List<String> obsoleteChain = null;
663
                        if(obsolete != null) {
664
                            obsoleteChain = getObsoletes(id);
665
                        } 
666
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
667
                } else {
668
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
669
                }
670
           
671
        }
672
    }
673
    
674
    /*
675
     * Remove the solr index for the list of ids
676
     */
677
    private void removeIndex(List<String> ids) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException {
678
        if(ids!= null) {
679
            for(String id :ids) {
680
                removeIndex(id);
681
            }
682
        }
683
    }
684
    
685
    /*
686
     * Remove the index for the id
687
     */
688
    private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException  {
689
        if(id != null) {
690
            solrIndex.remove(id);
691
        }
692
    }
693
    
694
    /*
695
     * Initialize the system metadata map
696
     */
697
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
698
        int times = 0;
699
        if(systemMetadataMap == null) {
700
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
701
            /*while(true) {
702
                try {
703
                    systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
704
                    break;
705
                } catch (FileNotFoundException e) {
706
                    throw e;
707
                } catch (ServiceFailure e) {
708
                    if(times <= maxAttempts) {
709
                        log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
710
                                         +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
711
                        try {
712
                            Thread.sleep(waitingTime);
713
                        } catch (Exception ee) {
714
                            log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for "+waitingTime/1000+" seconds to wait the hazelcast service");
715
                        }
716
                       
717
                    } else {
718
                        throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the system metadata from it and the building index can't happen this time");
719
                    }
720
                }
721
                times++;
722
            }*/
723
        }
724
    }
725
    
726
    /*
727
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
728
     */
729
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
730
        if(objectPathMap == null) {
731
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
732
        }
733
    }
734
    /**
735
     * Get an InputStream as the data object for the specific pid.
736
     * @param pid
737
     * @return
738
     * @throws FileNotFoundException
739
     */
740
    private InputStream getDataObject(String pid) throws FileNotFoundException {
741
        Identifier identifier = new Identifier();
742
        identifier.setValue(pid);
743
        String objectPath = objectPathMap.get(identifier);
744
        InputStream data = null;
745
        data = new FileInputStream(objectPath);
746
        return data;
747

    
748
    }
749
    
750
    /**
751
     * Get the SystemMetadata for the specified id from the distributed Map.
752
     * The null maybe is returned if there is no system metadata found.
753
     * @param id  the specified id.
754
     * @return the SystemMetadata associated with the id.
755
     */
756
    private SystemMetadata getSystemMetadata(String id) {
757
        SystemMetadata metadata = null;
758
        if(systemMetadataMap != null && id != null) {
759
            Identifier identifier = new Identifier();
760
            identifier.setValue(id);
761
            metadata = systemMetadataMap.get(identifier);
762
        }
763
        return metadata;
764
    }
765
    
766
    /**
767
     * Get the obsoletes chain of the specified id. The returned list doesn't include
768
     * the specified id itself. The newer version has the lower index number in the list.
769
     * Empty list will be returned if there is no document to be obsoleted by this id.
770
     * @param id
771
     * @return
772
     */
773
    private List<String> getObsoletes(String id) {
774
        List<String> obsoletes = new ArrayList<String>();
775
        while (id != null) {
776
            SystemMetadata metadata = getSystemMetadata(id);
777
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
778
            if(metadata != null) {
779
                Identifier identifier = metadata.getObsoletes();
780
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
781
                    obsoletes.add(identifier.getValue());
782
                    id = identifier.getValue();
783
                } 
784
            } 
785
        }
786
        return obsoletes;
787
    }
788
    
789
    /**
790
     * Overwrite and do nothing
791
     */
792
    public boolean cancel() {
793
        return true;
794
    }
795

    
796
}
(3-3/6)