Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.File;
30
import java.io.FileInputStream;
31
import java.io.FileNotFoundException;
32
import java.io.IOException;
33
import java.io.InputStream;
34
import java.text.SimpleDateFormat;
35
import java.util.ArrayList;
36
import java.util.Calendar;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.List;
40
import java.util.TimerTask;
41

    
42
import javax.xml.parsers.ParserConfigurationException;
43
import javax.xml.xpath.XPathExpressionException;
44

    
45
import org.apache.commons.io.FileUtils;
46
import org.apache.commons.logging.Log;
47
import org.apache.commons.logging.LogFactory;
48
import org.apache.solr.client.solrj.SolrServerException;
49
import org.dataone.configuration.Settings;
50
import org.dataone.service.exceptions.InvalidRequest;
51
import org.dataone.service.exceptions.InvalidToken;
52
import org.dataone.service.exceptions.NotAuthorized;
53
import org.dataone.service.exceptions.NotFound;
54
import org.dataone.service.exceptions.NotImplemented;
55
import org.dataone.service.exceptions.ServiceFailure;
56
import org.dataone.service.exceptions.UnsupportedType;
57
import org.dataone.service.types.v1.Event;
58
import org.dataone.service.types.v1.Identifier;
59
import org.dataone.service.types.v1.ObjectFormatIdentifier;
60
import org.dataone.service.types.v1.SystemMetadata;
61
import org.xml.sax.SAXException;
62

    
63
import com.hazelcast.core.IMap;
64
import com.hazelcast.core.ISet;
65

    
66
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
67
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
68
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
69
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
70

    
71

    
72
/**
73
 * A class represents the object to generate massive solr indexes.
74
 * This can happen during an update of Metacat (generating index for all existing documents)
75
 * or regenerate index for those documents
76
 * failing to build index during the insert or update.
77
 * 
78
 * @author tao
79
 *
80
 */
81
public class IndexGenerator extends TimerTask {
82
    
83
    private static final int FIRST =0;
84
    private static final int SECOND =1;
85
    private static final int THIRD = 2;
86
    private static final int FOURTH = 3;
87
    public static final int WAITTIME = 10000;
88
    public static final int MAXWAITNUMBER = 180;
89
    private static final String HTTP = "http://";
90
    private static final String MNAPPENDIX = "/d1/mn";
91
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
92
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
93
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
94
    
95
    private static int waitingTime = WAITTIME;
96
    private static int maxAttempts = MAXWAITNUMBER;
97
    
98
    private SolrIndex solrIndex = null;
99
    //private SystemMetadataEventListener systemMetadataListener = null;
100
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
101
    private IMap<Identifier, String> objectPathMap;
102
    private Log log = LogFactory.getLog(IndexGenerator.class);
103
    //private MNode mNode = null;
104
    private static List<String> resourceMapNamespaces = null;
105
    
106
    /**
107
     * Constructor
108
     * @param solrIndex
109
     * @param systemMetadataListener
110
     */
111
    public IndexGenerator(SolrIndex solrIndex) {
112
        this.solrIndex = solrIndex;
113
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
114
        //this.systemMetadataListener = systemMetadataListener;
115
        //this.mNode = new MNode(buildMNBaseURL());
116
        try {
117
            waitingTime = Settings.getConfiguration().getInt(WAITIMEPOPERTYNAME);
118
            maxAttempts = Settings.getConfiguration().getInt(MAXATTEMPTSPROPERTYNAME);
119
        } catch (Exception e) {
120
            log.warn("IndexGenerator.constructor - couldn't read the waiting time or maxattempts from the metacat.properties file since : "+e.getMessage()+". Default values will be used");
121
            waitingTime = WAITTIME;
122
            maxAttempts = MAXWAITNUMBER;
123
        }
124
    }
125
    
126
    /**
127
     * Build the index for all documents in Metacat without overwriting.
128
     * @throws SolrServerException 
129
     * @throws ServiceFailure 
130
     * @throws NotImplemented 
131
     * @throws NotAuthorized 
132
     * @throws InvalidToken 
133
     * @throws InvalidRequest 
134
     * @throws IndexEventLogException 
135
     * @throws IllegalAccessException 
136
     * @throws InstantiationException 
137
     * @throws ClassNotFoundException 
138
     */
139
    /*public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
140
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
141
        boolean force = false;
142
        indexAll(force);
143
    }*/
144
    
145
    /**
146
     * Build the index for all documents.
147
     * @throws SolrServerException 
148
     * @throws ServiceFailure 
149
     * @throws NotImplemented 
150
     * @throws NotAuthorized 
151
     * @throws InvalidToken 
152
     * @throws InvalidRequest 
153
     * @throws IndexEventLogException 
154
     * @throws IllegalAccessException 
155
     * @throws InstantiationException 
156
     * @throws ClassNotFoundException 
157
     * @throws ParserConfigurationException 
158
     * @throws SAXException 
159
     * @throws IOException 
160
     * @throws UnsupportedType 
161
     * @throws NotFound 
162
     * @throws XPathExpressionException 
163
     */
164
    public void indexAll() throws InvalidRequest, InvalidToken,
165
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
166
        Date since = null;
167
        Date until = null;
168
        index(since, until);
169
    }
170
    
171
    /**
172
     * Build the index for the docs which have been modified since the specified date.
173
     * @param since
174
     * @throws SolrServerException 
175
     * @throws ServiceFailure 
176
     * @throws NotImplemented 
177
     * @throws NotAuthorized 
178
     * @throws InvalidToken 
179
     * @throws InvalidRequest 
180
     * @throws IndexEventLogException 
181
     * @throws IllegalAccessException 
182
     * @throws InstantiationException 
183
     * @throws ClassNotFoundException 
184
     * @throws ParserConfigurationException 
185
     * @throws SAXException 
186
     * @throws IOException 
187
     * @throws UnsupportedType 
188
     * @throws NotFound 
189
     * @throws XPathExpressionException 
190
     */
191
    public void index(Date since) throws InvalidRequest, InvalidToken, 
192
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
193
        Date until = null;
194
        index(since, until);
195
    }
196
    
197
    /**
198
     *  Build the index for the docs which have been modified between the specified date.s
199
     * @param since
200
     * @param until
201
     * @throws SolrServerException 
202
     * @throws ServiceFailure 
203
     * @throws NotImplemented 
204
     * @throws NotAuthorized 
205
     * @throws InvalidToken 
206
     * @throws InvalidRequest 
207
     * @throws IndexEventLogException 
208
     * @throws IllegalAccessException 
209
     * @throws InstantiationException 
210
     * @throws ClassNotFoundException 
211
     * @throws ParserConfigurationException 
212
     * @throws SAXException 
213
     * @throws IOException 
214
     * @throws UnsupportedType 
215
     * @throws NotFound 
216
     * @throws XPathExpressionException 
217
     */
218
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
219
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException {
220
        Date processedDate = null;
221
        List<String> solrIds = null;
222
        initSystemMetadataMap();
223
        initObjectPathMap();
224
        List[] metacatIds = getMetacatIds(since, until);
225
        List<String> otherMetacatIds = metacatIds[FIRST];
226
        List<String> resourceMapIds =  metacatIds[SECOND];
227
        List<String> otherDeletedMetacatIds = metacatIds[THIRD];
228
        List<String> resourceMapDeletedIds = metacatIds[FOURTH];
229
        
230
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
231
        List<Long> maxCollection = new ArrayList<Long>();
232
        Date latestOtherId = null;
233
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
234
            int size = otherMetacatIds.size();
235
            String id = otherMetacatIds.get(size-1);
236
            SystemMetadata sysmeta = getSystemMetadata(id);
237
            latestOtherId = sysmeta.getDateSysMetadataModified();
238
            maxCollection.add(new Long(latestOtherId.getTime()));
239
        }
240
        
241
        Date latestDeletedOtherIds = null;
242
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
243
            int size = otherDeletedMetacatIds.size();
244
            String id = otherDeletedMetacatIds.get(size-1);
245
            SystemMetadata sysmeta = getSystemMetadata(id);
246
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
247
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
248
        }
249
        
250
        Date latestResourceId = null;
251
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
252
            int size = resourceMapIds.size();
253
            String id = resourceMapIds.get(size-1);
254
            SystemMetadata sysmeta = getSystemMetadata(id);
255
            latestResourceId = sysmeta.getDateSysMetadataModified();
256
            maxCollection.add(new Long(latestResourceId.getTime()));
257
        }
258
        
259
        Date latestDeletedResourceId = null;
260
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
261
            int size = resourceMapDeletedIds.size();
262
            String id = resourceMapDeletedIds.get(size-1);
263
            SystemMetadata sysmeta = getSystemMetadata(id);
264
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
265
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
266
        }
267
        
268
        if(!maxCollection.isEmpty()) {
269
            Long max = Collections.max(maxCollection);
270
            processedDate = new Date(max.longValue());
271
        }
272
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
273
            processedDate = latestOtherId;
274
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
275
            processedDate = latestResourceId;
276
        } else if (latestOtherId == null && latestResourceId != null) {
277
            processedDate = latestResourceId;
278
        } else if (latestOtherId != null && latestResourceId == null) {
279
            processedDate = latestOtherId;
280
        }*/
281
        
282
        
283
        //add the failedPids 
284
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
285
        List<IndexEvent> failedOtherIds = new ArrayList<IndexEvent>();
286
        List<IndexEvent> failedResourceMapIds = new ArrayList<IndexEvent>();
287
        if(failedEvents != null) {
288
            for(IndexEvent event : failedEvents) {
289
            	String id = event.getIdentifier().getValue();
290
                SystemMetadata sysmeta = getSystemMetadata(id);
291
                if(sysmeta != null) {
292
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
293
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
294
                        failedResourceMapIds.add(event);
295
                    } else {
296
                        failedOtherIds.add(event);
297
                    }
298
                }
299
            }
300
        }
301
        indexFailedIds(failedOtherIds);
302
        indexFailedIds(failedResourceMapIds);
303
        
304
        /*if(!failedOtherIds.isEmpty()) {
305
            failedOtherIds.addAll(otherMetacatIds);
306
        } else {
307
            failedOtherIds = otherMetacatIds;
308
        }
309
        
310
        if(!failedResourceMapIds.isEmpty()) {
311
            failedResourceMapIds.addAll(resourceMapIds);
312
        } else {
313
            failedResourceMapIds = resourceMapIds;
314
        }*/
315
        
316
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
317
        logFile(otherMetacatIds);
318
        log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
319
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
320
        logFile(resourceMapIds);
321
        log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
322
        index(otherMetacatIds);
323
        removeIndex(otherDeletedMetacatIds);
324
        index(resourceMapIds);
325
        removeIndex(resourceMapDeletedIds);
326
       
327
        //record the timed index.
328
        if(processedDate != null) {
329
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
330
        }
331
        
332
    }
333
    
334
    /*
335
     * Write the docids which will be indexed into a file. 
336
     */
337
    private void logFile(List<String> ids) throws IOException {
338
        if(ids != null) {
339
            String solrHomeDir = Settings.getConfiguration().getString(SolrServerFactory.SOLR_HOME_PROPERTY_NAME);
340
            File idsForIndex = new File(solrHomeDir, "ids-for-timed-indexing-log");
341
            if(!idsForIndex.exists()) {
342
                idsForIndex.createNewFile();
343
            }
344
            Date date = Calendar.getInstance().getTime();
345
            SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
346
            String dateStr = format.format(date);
347
            List<String> dateList = new ArrayList<String>();
348
            dateList.add(dateStr);
349
            Boolean append = true;
350
            FileUtils.writeLines(idsForIndex, dateList, append);//write time string
351
            FileUtils.writeLines(idsForIndex, ids, append);
352
        }
353
    }
354
    /*
355
     * Doing index
356
     */
357
    private void index(List<String> metacatIds) {
358
        if(metacatIds != null) {
359
            for(String metacatId : metacatIds) {
360
                if(metacatId != null) {
361
                        try {
362
                            generateIndex(metacatId);
363
                        } catch (Exception e) {
364
                            IndexEvent event = new IndexEvent();
365
                            Identifier pid = new Identifier();
366
                            pid.setValue(metacatId);
367
                            event.setIdentifier(pid);
368
                            event.setDate(Calendar.getInstance().getTime());
369
                            event.setAction(Event.CREATE);
370
                            String error = "IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage();
371
                            event.setDescription(error);
372
                            try {
373
                                EventlogFactory.createIndexEventLog().write(event);
374
                            } catch (Exception ee) {
375
                                log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
376
                            }
377
                            log.error(error);
378
                        }
379
                        
380
                   
381
                }
382
            }
383
        }
384
    }
385
    
386
    /*
387
     * Index those ids which failed in the process (We got them from the EventLog)
388
     */
389
    private void indexFailedIds(List<IndexEvent> events) {
390
        if(events != null) {
391
            for(IndexEvent event : events) {
392
                if(event != null) {
393
                    Identifier identifier = event.getIdentifier();
394
                    if(identifier != null) {
395
                        String id = identifier.getValue();
396
                        if(id != null) {
397
                            Event action = event.getAction();
398
                            if (action != null && action.equals(Event.CREATE)) {
399
                                try {
400
                                    generateIndex(id);
401
                                    EventlogFactory.createIndexEventLog().remove(identifier);
402
                                } catch (Exception e) {
403
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage());
404
                                }
405
                            } else if (action != null && action.equals(Event.DELETE)) {
406
                                try {
407
                                    removeIndex(id);
408
                                    EventlogFactory.createIndexEventLog().remove(identifier);
409
                                } catch (Exception e) {
410
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage());
411
                                }
412
                            }
413
                        }
414
                    }
415
                }
416
            }
417
        }
418
    }
419
    
420
    public void run() {
421
        /*IndexEvent event = new IndexEvent();
422
        event.setDate(Calendar.getInstance().getTime());
423
        event.setType(IndexEvent.STARTTIMEDINDEX);
424
        event.setDescription("Start the timed index job");
425
        try {
426
            EventlogFactory.createIndexEventLog().write(event);
427
        } catch (Exception e) {
428
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing start event :"+e.getMessage());
429
        }*/
430
        try {
431
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
432
            index(since);
433
        } catch (InvalidRequest e) {
434
            // TODO Auto-generated catch block
435
            //e.printStackTrace();
436
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
437
        } catch (InvalidToken e) {
438
            // TODO Auto-generated catch block
439
            //e.printStackTrace();
440
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
441
        } catch (NotAuthorized e) {
442
            // TODO Auto-generated catch block
443
            //e.printStackTrace();
444
        } catch (NotImplemented e) {
445
            // TODO Auto-generated catch block
446
            //e.printStackTrace();
447
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
448
        } catch (ServiceFailure e) {
449
            // TODO Auto-generated catch block
450
            //e.printStackTrace();
451
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
452
        } catch (SolrServerException e) {
453
            // TODO Auto-generated catch block
454
            //e.printStackTrace();
455
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
456
        } catch (FileNotFoundException e) {
457
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
458
        }
459
        /*event.setDate(Calendar.getInstance().getTime());
460
        event.setType(IndexEvent.FINISHTIMEDINDEX);
461
        event.setDescription("Finish the timed index job");
462
        try {
463
            EventlogFactory.createIndexEventLog().write(event);
464
        } catch (Exception e) {
465
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing finish event :"+e.getMessage());
466
        }*/ catch (ClassNotFoundException e) {
467
            // TODO Auto-generated catch block
468
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
469
        } catch (InstantiationException e) {
470
            // TODO Auto-generated catch block
471
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
472
        } catch (IllegalAccessException e) {
473
            // TODO Auto-generated catch block
474
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
475
        } catch (IndexEventLogException e) {
476
            // TODO Auto-generated catch block
477
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
478
        } catch (XPathExpressionException e) {
479
            // TODO Auto-generated catch block
480
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
481
        } catch (NotFound e) {
482
            // TODO Auto-generated catch block
483
            e.printStackTrace();
484
        } catch (UnsupportedType e) {
485
            // TODO Auto-generated catch block
486
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
487
        } catch (IOException e) {
488
            // TODO Auto-generated catch block
489
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
490
        } catch (SAXException e) {
491
            // TODO Auto-generated catch block
492
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
493
        } catch (ParserConfigurationException e) {
494
            // TODO Auto-generated catch block
495
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
496
        }
497
    }
498
    
499
    /*
500
     * Get the indexed ids list from the solr server.
501
     * An empty list will be returned if there is no ids.
502
     */
503
    private List<String> getSolrDocIds() throws SolrServerException {
504
        List<String> ids = solrIndex.getSolrIds();
505
        return ids;
506
    }
507
    
508
    /*
509
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
510
     * 
511
     * If since and util are null, it will return all of them.
512
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
513
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
514
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
515
     */
516
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
517
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
518
        
519
        List<String> resourceMapIds = new ArrayList();
520
        List<String> resourceMapDeletedIds = new ArrayList();
521
        List<String> otherIds = new ArrayList();
522
        List<String> otherDeletedIds = new ArrayList();
523
        List[] ids = new List[4];
524
        ids[FIRST]= otherIds;
525
        ids[SECOND] = resourceMapIds;
526
        ids[THIRD]  = otherDeletedIds;
527
        ids[FOURTH] = resourceMapDeletedIds;
528
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
529
        Date otherPreviousDate = null;
530
        Date otherDeletedPreviousDate = null;
531
        Date resourceMapPreviousDate = null;
532
        Date resourceMapDeletedPreviousDate = null;
533
        if(metacatIds != null) {
534
            for(Identifier identifier : metacatIds) {
535
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
536
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
537
                    if(sysmeta != null) {
538
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
539
                        //System.out.println("the object format id is "+formatId.getValue());
540
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
541
                        boolean correctTimeRange = false;
542
                        Date sysDate = sysmeta.getDateSysMetadataModified();
543
                        if(since == null && until == null) {
544
                            correctTimeRange = true;
545
                        } else if (since != null && until == null) {
546
                            if(sysDate.getTime() > since.getTime()) {
547
                                correctTimeRange = true;
548
                            }
549
                        } else if (since == null && until != null) {
550
                            if(sysDate.getTime() < until.getTime()) {
551
                                correctTimeRange = true;
552
                            }
553
                        } else if (since != null && until != null) {
554
                            if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) {
555
                                correctTimeRange = true;
556
                            }
557
                        }
558
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
559
                            //for the resource map
560
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
561
                                //archived ids
562
                                if(!resourceMapDeletedIds.isEmpty()) {
563
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
564
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
565
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
566
                                    } else {
567
                                        int size = resourceMapDeletedIds.size();//
568
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
569
                                    }
570
                                } else {
571
                                    resourceMapDeletedIds.add(identifier.getValue());
572
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
573
                                }
574
                            } else {
575
                                // current ids
576
                                if(!resourceMapIds.isEmpty()) {
577
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
578
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
579
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
580
                                    } else {
581
                                        int size = resourceMapIds.size();//
582
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
583
                                    }
584
                                } else {
585
                                    resourceMapIds.add(identifier.getValue());
586
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
587
                                }
588
                            }
589
                        } else if (correctTimeRange) {
590
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
591
                                //for the archived ids
592
                                if(!otherDeletedIds.isEmpty()) {
593
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
594
                                        otherDeletedIds.add(identifier.getValue());
595
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
596
                                    } else {
597
                                        int size = otherDeletedIds.size();
598
                                        otherDeletedIds.add(size-1, identifier.getValue());
599
                                    }
600
                                } else {
601
                                    otherDeletedIds.add(identifier.getValue());
602
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
603
                                }
604
                            } else {
605
                                //for the current ids
606
                                if(!otherIds.isEmpty()) {
607
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
608
                                        otherIds.add(identifier.getValue());
609
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
610
                                    } else {
611
                                        int size = otherIds.size();
612
                                        otherIds.add(size-1, identifier.getValue());
613
                                    }
614
                                } else {
615
                                    otherIds.add(identifier.getValue());
616
                                    otherPreviousDate = sysDate;//init otherPreviousDate
617
                                }
618
                            }
619
                        }
620
                        
621
                    }
622
                }
623
            }
624
        }
625
        return ids;
626
    }
627
    
628
    /*
629
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
630
     */
631
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
632
        boolean isResourceMap = false;
633
        if(formatId != null && resourceMapNamespaces != null) {
634
            for(String namespace : resourceMapNamespaces) {
635
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
636
                    isResourceMap = true;
637
                    break;
638
                }
639
            }
640
        }
641
        return isResourceMap;
642
    }
643
    
644
   
645
    
646
    /*
647
     * Generate index for the id.
648
     */
649
    private void generateIndex(String id) throws Exception {
650
        if(id != null)  {
651
                SystemMetadata sysmeta = getSystemMetadata(id);
652
                //only update none-archived id.
653
                if(sysmeta != null && !sysmeta.getArchived() && sysmeta.getObsoletedBy() == null) {
654
                        InputStream data = getDataObject(id);
655
                        Identifier obsolete = sysmeta.getObsoletes();
656
                        List<String> obsoleteChain = null;
657
                        if(obsolete != null) {
658
                            obsoleteChain = getObsoletes(id);
659
                        } 
660
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
661
                } else {
662
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
663
                }
664
           
665
        }
666
    }
667
    
668
    /*
669
     * Remove the solr index for the list of ids
670
     */
671
    private void removeIndex(List<String> ids) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException {
672
        if(ids!= null) {
673
            for(String id :ids) {
674
                removeIndex(id);
675
            }
676
        }
677
    }
678
    
679
    /*
680
     * Remove the index for the id
681
     */
682
    private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException  {
683
        if(id != null) {
684
            solrIndex.remove(id);
685
        }
686
    }
687
    
688
    /*
689
     * Initialize the system metadata map
690
     */
691
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
692
        int times = 0;
693
        if(systemMetadataMap == null) {
694
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
695
            /*while(true) {
696
                try {
697
                    systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
698
                    break;
699
                } catch (FileNotFoundException e) {
700
                    throw e;
701
                } catch (ServiceFailure e) {
702
                    if(times <= maxAttempts) {
703
                        log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
704
                                         +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
705
                        try {
706
                            Thread.sleep(waitingTime);
707
                        } catch (Exception ee) {
708
                            log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for "+waitingTime/1000+" seconds to wait the hazelcast service");
709
                        }
710
                       
711
                    } else {
712
                        throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the system metadata from it and the building index can't happen this time");
713
                    }
714
                }
715
                times++;
716
            }*/
717
        }
718
    }
719
    
720
    /*
721
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
722
     */
723
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
724
        if(objectPathMap == null) {
725
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
726
        }
727
    }
728
    /**
729
     * Get an InputStream as the data object for the specific pid.
730
     * @param pid
731
     * @return
732
     * @throws FileNotFoundException
733
     */
734
    private InputStream getDataObject(String pid) throws FileNotFoundException {
735
        Identifier identifier = new Identifier();
736
        identifier.setValue(pid);
737
        String objectPath = objectPathMap.get(identifier);
738
        InputStream data = null;
739
        data = new FileInputStream(objectPath);
740
        return data;
741

    
742
    }
743
    
744
    /**
745
     * Get the SystemMetadata for the specified id from the distributed Map.
746
     * The null maybe is returned if there is no system metadata found.
747
     * @param id  the specified id.
748
     * @return the SystemMetadata associated with the id.
749
     */
750
    private SystemMetadata getSystemMetadata(String id) {
751
        SystemMetadata metadata = null;
752
        if(systemMetadataMap != null && id != null) {
753
            Identifier identifier = new Identifier();
754
            identifier.setValue(id);
755
            metadata = systemMetadataMap.get(identifier);
756
        }
757
        return metadata;
758
    }
759
    
760
    /**
761
     * Get the obsoletes chain of the specified id. The returned list doesn't include
762
     * the specified id itself. The newer version has the lower index number in the list.
763
     * Empty list will be returned if there is no document to be obsoleted by this id.
764
     * @param id
765
     * @return
766
     */
767
    private List<String> getObsoletes(String id) {
768
        List<String> obsoletes = new ArrayList<String>();
769
        while (id != null) {
770
            SystemMetadata metadata = getSystemMetadata(id);
771
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
772
            if(metadata != null) {
773
                Identifier identifier = metadata.getObsoletes();
774
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
775
                    obsoletes.add(identifier.getValue());
776
                    id = identifier.getValue();
777
                } 
778
            } 
779
        }
780
        return obsoletes;
781
    }
782
    
783
    /**
784
     * Overwrite and do nothing
785
     */
786
    public boolean cancel() {
787
        return true;
788
    }
789

    
790
}
(3-3/6)