Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.File;
30
import java.io.FileInputStream;
31
import java.io.FileNotFoundException;
32
import java.io.IOException;
33
import java.io.InputStream;
34
import java.text.SimpleDateFormat;
35
import java.util.ArrayList;
36
import java.util.Calendar;
37
import java.util.Collections;
38
import java.util.Date;
39
import java.util.List;
40
import java.util.TimerTask;
41

    
42
import javax.xml.parsers.ParserConfigurationException;
43
import javax.xml.xpath.XPathExpressionException;
44

    
45
import org.apache.commons.io.FileUtils;
46
import org.apache.commons.logging.Log;
47
import org.apache.commons.logging.LogFactory;
48
import org.apache.solr.client.solrj.SolrServerException;
49
import org.dataone.configuration.Settings;
50
import org.dataone.service.exceptions.InvalidRequest;
51
import org.dataone.service.exceptions.InvalidToken;
52
import org.dataone.service.exceptions.NotAuthorized;
53
import org.dataone.service.exceptions.NotFound;
54
import org.dataone.service.exceptions.NotImplemented;
55
import org.dataone.service.exceptions.ServiceFailure;
56
import org.dataone.service.exceptions.UnsupportedType;
57
import org.dataone.service.types.v1.Event;
58
import org.dataone.service.types.v1.Identifier;
59
import org.dataone.service.types.v1.ObjectFormatIdentifier;
60
import org.dataone.service.types.v1.SystemMetadata;
61
import org.dspace.foresite.OREParserException;
62
import org.xml.sax.SAXException;
63

    
64
import com.hazelcast.core.IMap;
65
import com.hazelcast.core.ISet;
66

    
67
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
68
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
69
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
70
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
71

    
72

    
73
/**
74
 * A class represents the object to generate massive solr indexes.
75
 * This can happen during an update of Metacat (generating index for all existing documents)
76
 * or regenerate index for those documents
77
 * failing to build index during the insert or update.
78
 * 
79
 * @author tao
80
 *
81
 */
82
public class IndexGenerator extends TimerTask {
83
    
84
    private static final int FIRST =0;
85
    private static final int SECOND =1;
86
    private static final int THIRD = 2;
87
    private static final int FOURTH = 3;
88
    public static final int WAITTIME = 10000;
89
    public static final int MAXWAITNUMBER = 180;
90
    private static final String HTTP = "http://";
91
    private static final String MNAPPENDIX = "/d1/mn";
92
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
93
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
94
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
95
    
96
    private static int waitingTime = WAITTIME;
97
    private static int maxAttempts = MAXWAITNUMBER;
98
    
99
    private SolrIndex solrIndex = null;
100
    //private SystemMetadataEventListener systemMetadataListener = null;
101
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
102
    private IMap<Identifier, String> objectPathMap;
103
    private Log log = LogFactory.getLog(IndexGenerator.class);
104
    //private MNode mNode = null;
105
    private static List<String> resourceMapNamespaces = null;
106
    
107
    /**
108
     * Constructor
109
     * @param solrIndex
110
     * @param systemMetadataListener
111
     */
112
    public IndexGenerator(SolrIndex solrIndex) {
113
        this.solrIndex = solrIndex;
114
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
115
        //this.systemMetadataListener = systemMetadataListener;
116
        //this.mNode = new MNode(buildMNBaseURL());
117
        try {
118
            waitingTime = Settings.getConfiguration().getInt(WAITIMEPOPERTYNAME);
119
            maxAttempts = Settings.getConfiguration().getInt(MAXATTEMPTSPROPERTYNAME);
120
        } catch (Exception e) {
121
            log.warn("IndexGenerator.constructor - couldn't read the waiting time or maxattempts from the metacat.properties file since : "+e.getMessage()+". Default values will be used");
122
            waitingTime = WAITTIME;
123
            maxAttempts = MAXWAITNUMBER;
124
        }
125
    }
126
    
127
    /**
128
     * Build the index for all documents in Metacat without overwriting.
129
     * @throws SolrServerException 
130
     * @throws ServiceFailure 
131
     * @throws NotImplemented 
132
     * @throws NotAuthorized 
133
     * @throws InvalidToken 
134
     * @throws InvalidRequest 
135
     * @throws IndexEventLogException 
136
     * @throws IllegalAccessException 
137
     * @throws InstantiationException 
138
     * @throws ClassNotFoundException 
139
     */
140
    /*public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
141
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
142
        boolean force = false;
143
        indexAll(force);
144
    }*/
145
    
146
    /**
147
     * Build the index for all documents.
148
     * @throws SolrServerException 
149
     * @throws ServiceFailure 
150
     * @throws NotImplemented 
151
     * @throws NotAuthorized 
152
     * @throws InvalidToken 
153
     * @throws InvalidRequest 
154
     * @throws IndexEventLogException 
155
     * @throws IllegalAccessException 
156
     * @throws InstantiationException 
157
     * @throws ClassNotFoundException 
158
     * @throws ParserConfigurationException 
159
     * @throws SAXException 
160
     * @throws IOException 
161
     * @throws UnsupportedType 
162
     * @throws NotFound 
163
     * @throws XPathExpressionException 
164
     * @throws OREParserException 
165
     */
166
    public void indexAll() throws InvalidRequest, InvalidToken,
167
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
168
        Date since = null;
169
        Date until = null;
170
        index(since, until);
171
    }
172
    
173
    /**
174
     * Build the index for the docs which have been modified since the specified date.
175
     * @param since
176
     * @throws SolrServerException 
177
     * @throws ServiceFailure 
178
     * @throws NotImplemented 
179
     * @throws NotAuthorized 
180
     * @throws InvalidToken 
181
     * @throws InvalidRequest 
182
     * @throws IndexEventLogException 
183
     * @throws IllegalAccessException 
184
     * @throws InstantiationException 
185
     * @throws ClassNotFoundException 
186
     * @throws ParserConfigurationException 
187
     * @throws SAXException 
188
     * @throws IOException 
189
     * @throws UnsupportedType 
190
     * @throws NotFound 
191
     * @throws XPathExpressionException 
192
     * @throws OREParserException 
193
     */
194
    public void index(Date since) throws InvalidRequest, InvalidToken, 
195
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
196
        Date until = null;
197
        index(since, until);
198
    }
199
    
200
    /**
201
     *  Build the index for the docs which have been modified between the specified date.s
202
     * @param since
203
     * @param until
204
     * @throws SolrServerException 
205
     * @throws ServiceFailure 
206
     * @throws NotImplemented 
207
     * @throws NotAuthorized 
208
     * @throws InvalidToken 
209
     * @throws InvalidRequest 
210
     * @throws IndexEventLogException 
211
     * @throws IllegalAccessException 
212
     * @throws InstantiationException 
213
     * @throws ClassNotFoundException 
214
     * @throws ParserConfigurationException 
215
     * @throws SAXException 
216
     * @throws IOException 
217
     * @throws UnsupportedType 
218
     * @throws NotFound 
219
     * @throws XPathExpressionException 
220
     * @throws OREParserException 
221
     */
222
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
223
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
224
        Date processedDate = null;
225
        List<String> solrIds = null;
226
        initSystemMetadataMap();
227
        initObjectPathMap();
228
        List[] metacatIds = getMetacatIds(since, until);
229
        List<String> otherMetacatIds = metacatIds[FIRST];
230
        List<String> resourceMapIds =  metacatIds[SECOND];
231
        List<String> otherDeletedMetacatIds = metacatIds[THIRD];
232
        List<String> resourceMapDeletedIds = metacatIds[FOURTH];
233
        
234
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
235
        List<Long> maxCollection = new ArrayList<Long>();
236
        Date latestOtherId = null;
237
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
238
            int size = otherMetacatIds.size();
239
            String id = otherMetacatIds.get(size-1);
240
            SystemMetadata sysmeta = getSystemMetadata(id);
241
            latestOtherId = sysmeta.getDateSysMetadataModified();
242
            maxCollection.add(new Long(latestOtherId.getTime()));
243
        }
244
        
245
        Date latestDeletedOtherIds = null;
246
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
247
            int size = otherDeletedMetacatIds.size();
248
            String id = otherDeletedMetacatIds.get(size-1);
249
            SystemMetadata sysmeta = getSystemMetadata(id);
250
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
251
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
252
        }
253
        
254
        Date latestResourceId = null;
255
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
256
            int size = resourceMapIds.size();
257
            String id = resourceMapIds.get(size-1);
258
            SystemMetadata sysmeta = getSystemMetadata(id);
259
            latestResourceId = sysmeta.getDateSysMetadataModified();
260
            maxCollection.add(new Long(latestResourceId.getTime()));
261
        }
262
        
263
        Date latestDeletedResourceId = null;
264
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
265
            int size = resourceMapDeletedIds.size();
266
            String id = resourceMapDeletedIds.get(size-1);
267
            SystemMetadata sysmeta = getSystemMetadata(id);
268
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
269
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
270
        }
271
        
272
        if(!maxCollection.isEmpty()) {
273
            Long max = Collections.max(maxCollection);
274
            processedDate = new Date(max.longValue());
275
        }
276
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
277
            processedDate = latestOtherId;
278
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
279
            processedDate = latestResourceId;
280
        } else if (latestOtherId == null && latestResourceId != null) {
281
            processedDate = latestResourceId;
282
        } else if (latestOtherId != null && latestResourceId == null) {
283
            processedDate = latestOtherId;
284
        }*/
285
        
286
        
287
        //add the failedPids 
288
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
289
        List<IndexEvent> failedOtherIds = new ArrayList<IndexEvent>();
290
        List<IndexEvent> failedResourceMapIds = new ArrayList<IndexEvent>();
291
        if(failedEvents != null) {
292
            for(IndexEvent event : failedEvents) {
293
            	String id = event.getIdentifier().getValue();
294
                SystemMetadata sysmeta = getSystemMetadata(id);
295
                if(sysmeta != null) {
296
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
297
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
298
                        failedResourceMapIds.add(event);
299
                    } else {
300
                        failedOtherIds.add(event);
301
                    }
302
                }
303
            }
304
        }
305
        indexFailedIds(failedOtherIds);
306
        indexFailedIds(failedResourceMapIds);
307
        
308
        /*if(!failedOtherIds.isEmpty()) {
309
            failedOtherIds.addAll(otherMetacatIds);
310
        } else {
311
            failedOtherIds = otherMetacatIds;
312
        }
313
        
314
        if(!failedResourceMapIds.isEmpty()) {
315
            failedResourceMapIds.addAll(resourceMapIds);
316
        } else {
317
            failedResourceMapIds = resourceMapIds;
318
        }*/
319
        
320
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
321
        //logFile(otherMetacatIds, "ids-for-timed-indexing-log");
322
        log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
323
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
324
        //logFile(resourceMapIds, "ids-for-timed-indexing-log");
325
        log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
326
        index(otherMetacatIds);
327
        removeIndex(otherDeletedMetacatIds);
328
        index(resourceMapIds);
329
        removeIndex(resourceMapDeletedIds);
330
       
331
        //record the timed index.
332
        if(processedDate != null) {
333
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
334
        }
335
        
336
    }
337
    
338
    /*
339
     * Write the docids which will be indexed into a file. 
340
     */
341
    /*private void logFile(List<String> ids, String fileName)  {
342
        if(ids != null) {
343
            try {
344
                String tempDir = System.getProperty("java.io.tmpdir");
345
                log.info("the temp dir is ===================== "+tempDir);
346
                File idsForIndex = new File(tempDir, fileName);
347
                if(!idsForIndex.exists()) {
348
                    idsForIndex.createNewFile();
349
                } 
350
                
351
                Date date = Calendar.getInstance().getTime();
352
                SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
353
                String dateStr = format.format(date);
354
                List<String> dateList = new ArrayList<String>();
355
                dateList.add(dateStr);
356
                Boolean append = true;
357
                FileUtils.writeLines(idsForIndex, dateList, append);//write time string
358
                FileUtils.writeLines(idsForIndex, ids, append);
359
            } catch (Exception e) {
360
                log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage());
361
            }
362
           
363
        }
364
    }*/
365
    /*
366
     * Doing index
367
     */
368
    private void index(List<String> metacatIds) {
369
        if(metacatIds != null) {
370
            for(String metacatId : metacatIds) {
371
                if(metacatId != null) {
372
                        try {
373
                            generateIndex(metacatId);
374
                        } catch (Exception e) {
375
                            IndexEvent event = new IndexEvent();
376
                            Identifier pid = new Identifier();
377
                            pid.setValue(metacatId);
378
                            event.setIdentifier(pid);
379
                            event.setDate(Calendar.getInstance().getTime());
380
                            event.setAction(Event.CREATE);
381
                            String error = "IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage();
382
                            event.setDescription(error);
383
                            try {
384
                                EventlogFactory.createIndexEventLog().write(event);
385
                            } catch (Exception ee) {
386
                                log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
387
                            }
388
                            log.error(error);
389
                        }
390
                        
391
                   
392
                }
393
            }
394
        }
395
    }
396
    
397
    /*
398
     * Index those ids which failed in the process (We got them from the EventLog)
399
     */
400
    private void indexFailedIds(List<IndexEvent> events) {
401
        if(events != null) {
402
            for(IndexEvent event : events) {
403
                if(event != null) {
404
                    Identifier identifier = event.getIdentifier();
405
                    if(identifier != null) {
406
                        String id = identifier.getValue();
407
                        if(id != null) {
408
                            Event action = event.getAction();
409
                            if (action != null && action.equals(Event.CREATE)) {
410
                                try {
411
                                    generateIndex(id);
412
                                    EventlogFactory.createIndexEventLog().remove(identifier);
413
                                } catch (Exception e) {
414
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage());
415
                                }
416
                            } else if (action != null && action.equals(Event.DELETE)) {
417
                                try {
418
                                    removeIndex(id);
419
                                    EventlogFactory.createIndexEventLog().remove(identifier);
420
                                } catch (Exception e) {
421
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage());
422
                                }
423
                            }
424
                        }
425
                    }
426
                }
427
            }
428
        }
429
    }
430
    
431
    public void run() {
432
        /*IndexEvent event = new IndexEvent();
433
        event.setDate(Calendar.getInstance().getTime());
434
        event.setType(IndexEvent.STARTTIMEDINDEX);
435
        event.setDescription("Start the timed index job");
436
        try {
437
            EventlogFactory.createIndexEventLog().write(event);
438
        } catch (Exception e) {
439
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing start event :"+e.getMessage());
440
        }*/
441
        try {
442
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
443
            index(since);
444
        } catch (InvalidRequest e) {
445
            // TODO Auto-generated catch block
446
            //e.printStackTrace();
447
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
448
        } catch (InvalidToken e) {
449
            // TODO Auto-generated catch block
450
            //e.printStackTrace();
451
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
452
        } catch (NotAuthorized e) {
453
            // TODO Auto-generated catch block
454
            //e.printStackTrace();
455
        } catch (NotImplemented e) {
456
            // TODO Auto-generated catch block
457
            //e.printStackTrace();
458
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
459
        } catch (ServiceFailure e) {
460
            // TODO Auto-generated catch block
461
            //e.printStackTrace();
462
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
463
        } catch (SolrServerException e) {
464
            // TODO Auto-generated catch block
465
            //e.printStackTrace();
466
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
467
        } catch (FileNotFoundException e) {
468
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
469
        }
470
        /*event.setDate(Calendar.getInstance().getTime());
471
        event.setType(IndexEvent.FINISHTIMEDINDEX);
472
        event.setDescription("Finish the timed index job");
473
        try {
474
            EventlogFactory.createIndexEventLog().write(event);
475
        } catch (Exception e) {
476
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing finish event :"+e.getMessage());
477
        }*/ catch (ClassNotFoundException e) {
478
            // TODO Auto-generated catch block
479
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
480
        } catch (InstantiationException e) {
481
            // TODO Auto-generated catch block
482
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
483
        } catch (IllegalAccessException e) {
484
            // TODO Auto-generated catch block
485
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
486
        } catch (IndexEventLogException e) {
487
            // TODO Auto-generated catch block
488
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
489
        } catch (XPathExpressionException e) {
490
            // TODO Auto-generated catch block
491
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
492
        } catch (NotFound e) {
493
            // TODO Auto-generated catch block
494
            e.printStackTrace();
495
        } catch (UnsupportedType e) {
496
            // TODO Auto-generated catch block
497
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
498
        } catch (IOException e) {
499
            // TODO Auto-generated catch block
500
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
501
        } catch (SAXException e) {
502
            // TODO Auto-generated catch block
503
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
504
        } catch (ParserConfigurationException e) {
505
            // TODO Auto-generated catch block
506
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
507
        } catch (OREParserException e) {
508
            // TODO Auto-generated catch block
509
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
510
        }
511
    }
512
    
513
    /*
514
     * Get the indexed ids list from the solr server.
515
     * An empty list will be returned if there is no ids.
516
     */
517
    private List<String> getSolrDocIds() throws SolrServerException {
518
        List<String> ids = solrIndex.getSolrIds();
519
        return ids;
520
    }
521
    
522
    /*
523
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
524
     * 
525
     * If since and util are null, it will return all of them.
526
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
527
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
528
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
529
     */
530
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
531
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
532
        String fileName = "ids-from-hazelcast";
533
        List<String> resourceMapIds = new ArrayList();
534
        List<String> resourceMapDeletedIds = new ArrayList();
535
        List<String> otherIds = new ArrayList();
536
        List<String> otherDeletedIds = new ArrayList();
537
        List[] ids = new List[4];
538
        ids[FIRST]= otherIds;
539
        ids[SECOND] = resourceMapIds;
540
        ids[THIRD]  = otherDeletedIds;
541
        ids[FOURTH] = resourceMapDeletedIds;
542
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
543
        Date otherPreviousDate = null;
544
        Date otherDeletedPreviousDate = null;
545
        Date resourceMapPreviousDate = null;
546
        Date resourceMapDeletedPreviousDate = null;
547
        if(metacatIds != null) {
548
            for(Identifier identifier : metacatIds) {
549
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
550
                    List<String> idLog = new ArrayList<String>();
551
                    idLog.add(identifier.getValue());
552
                    //logFile(idLog, fileName);
553
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
554
                    if(sysmeta != null) {
555
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
556
                        //System.out.println("the object format id is "+formatId.getValue());
557
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
558
                        boolean correctTimeRange = false;
559
                        Date sysDate = sysmeta.getDateSysMetadataModified();
560
                        if(since == null && until == null) {
561
                            correctTimeRange = true;
562
                        } else if (since != null && until == null) {
563
                            if(sysDate.getTime() > since.getTime()) {
564
                                correctTimeRange = true;
565
                            }
566
                        } else if (since == null && until != null) {
567
                            if(sysDate.getTime() < until.getTime()) {
568
                                correctTimeRange = true;
569
                            }
570
                        } else if (since != null && until != null) {
571
                            if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) {
572
                                correctTimeRange = true;
573
                            }
574
                        }
575
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
576
                            //for the resource map
577
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
578
                                //archived ids
579
                                if(!resourceMapDeletedIds.isEmpty()) {
580
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
581
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
582
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
583
                                    } else {
584
                                        int size = resourceMapDeletedIds.size();//
585
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
586
                                    }
587
                                } else {
588
                                    resourceMapDeletedIds.add(identifier.getValue());
589
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
590
                                }
591
                            } else {
592
                                // current ids
593
                                if(!resourceMapIds.isEmpty()) {
594
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
595
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
596
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
597
                                    } else {
598
                                        int size = resourceMapIds.size();//
599
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
600
                                    }
601
                                } else {
602
                                    resourceMapIds.add(identifier.getValue());
603
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
604
                                }
605
                            }
606
                        } else if (correctTimeRange) {
607
                            if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
608
                                //for the archived ids
609
                                if(!otherDeletedIds.isEmpty()) {
610
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
611
                                        otherDeletedIds.add(identifier.getValue());
612
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
613
                                    } else {
614
                                        int size = otherDeletedIds.size();
615
                                        otherDeletedIds.add(size-1, identifier.getValue());
616
                                    }
617
                                } else {
618
                                    otherDeletedIds.add(identifier.getValue());
619
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
620
                                }
621
                            } else {
622
                                //for the current ids
623
                                if(!otherIds.isEmpty()) {
624
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
625
                                        otherIds.add(identifier.getValue());
626
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
627
                                    } else {
628
                                        int size = otherIds.size();
629
                                        otherIds.add(size-1, identifier.getValue());
630
                                    }
631
                                } else {
632
                                    otherIds.add(identifier.getValue());
633
                                    otherPreviousDate = sysDate;//init otherPreviousDate
634
                                }
635
                            }
636
                        }
637
                        
638
                    }
639
                }
640
            }
641
        }
642
        return ids;
643
    }
644
    
645
    /*
646
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
647
     */
648
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
649
        boolean isResourceMap = false;
650
        if(formatId != null && resourceMapNamespaces != null) {
651
            for(String namespace : resourceMapNamespaces) {
652
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
653
                    isResourceMap = true;
654
                    break;
655
                }
656
            }
657
        }
658
        return isResourceMap;
659
    }
660
    
661
   
662
    
663
    /*
664
     * Generate index for the id.
665
     */
666
    private void generateIndex(String id) throws Exception {
667
        if(id != null)  {
668
                SystemMetadata sysmeta = getSystemMetadata(id);
669
                //only update none-archived id.
670
                if(sysmeta != null && !sysmeta.getArchived() && sysmeta.getObsoletedBy() == null) {
671
                        InputStream data = getDataObject(id);
672
                        Identifier obsolete = sysmeta.getObsoletes();
673
                        List<String> obsoleteChain = null;
674
                        if(obsolete != null) {
675
                            obsoleteChain = getObsoletes(id);
676
                        } 
677
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
678
                } else {
679
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
680
                }
681
           
682
        }
683
    }
684
    
685
    /*
686
     * Remove the solr index for the list of ids
687
     */
688
    private void removeIndex(List<String> ids) {
689
        if(ids!= null) {
690
            for(String id :ids) {
691
                try {
692
                    removeIndex(id);
693
                } catch (Exception e) {
694
                    IndexEvent event = new IndexEvent();
695
                    Identifier pid = new Identifier();
696
                    pid.setValue(id);
697
                    event.setIdentifier(pid);
698
                    event.setDate(Calendar.getInstance().getTime());
699
                    event.setAction(Event.DELETE);
700
                    String error = "IndexGenerator.index - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage();
701
                    event.setDescription(error);
702
                    try {
703
                        EventlogFactory.createIndexEventLog().write(event);
704
                    } catch (Exception ee) {
705
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
706
                    }
707
                    log.error(error);
708
                }
709
                
710
            }
711
        }
712
    }
713
    
714
    /*
715
     * Remove the index for the id
716
     */
717
    private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException, OREParserException  {
718
        if(id != null) {
719
            solrIndex.remove(id);
720
        }
721
    }
722
    
723
    /*
724
     * Initialize the system metadata map
725
     */
726
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
727
        int times = 0;
728
        if(systemMetadataMap == null) {
729
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
730
            /*while(true) {
731
                try {
732
                    systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
733
                    break;
734
                } catch (FileNotFoundException e) {
735
                    throw e;
736
                } catch (ServiceFailure e) {
737
                    if(times <= maxAttempts) {
738
                        log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
739
                                         +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
740
                        try {
741
                            Thread.sleep(waitingTime);
742
                        } catch (Exception ee) {
743
                            log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for "+waitingTime/1000+" seconds to wait the hazelcast service");
744
                        }
745
                       
746
                    } else {
747
                        throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the system metadata from it and the building index can't happen this time");
748
                    }
749
                }
750
                times++;
751
            }*/
752
        }
753
    }
754
    
755
    /*
756
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
757
     */
758
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
759
        if(objectPathMap == null) {
760
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
761
        }
762
    }
763
    /**
764
     * Get an InputStream as the data object for the specific pid.
765
     * @param pid
766
     * @return
767
     * @throws FileNotFoundException
768
     */
769
    private InputStream getDataObject(String pid) throws FileNotFoundException {
770
        Identifier identifier = new Identifier();
771
        identifier.setValue(pid);
772
        String objectPath = objectPathMap.get(identifier);
773
        InputStream data = null;
774
        data = new FileInputStream(objectPath);
775
        return data;
776

    
777
    }
778
    
779
    /**
780
     * Get the SystemMetadata for the specified id from the distributed Map.
781
     * The null maybe is returned if there is no system metadata found.
782
     * @param id  the specified id.
783
     * @return the SystemMetadata associated with the id.
784
     */
785
    private SystemMetadata getSystemMetadata(String id) {
786
        SystemMetadata metadata = null;
787
        if(systemMetadataMap != null && id != null) {
788
            Identifier identifier = new Identifier();
789
            identifier.setValue(id);
790
            metadata = systemMetadataMap.get(identifier);
791
        }
792
        return metadata;
793
    }
794
    
795
    /**
796
     * Get the obsoletes chain of the specified id. The returned list doesn't include
797
     * the specified id itself. The newer version has the lower index number in the list.
798
     * Empty list will be returned if there is no document to be obsoleted by this id.
799
     * @param id
800
     * @return
801
     */
802
    private List<String> getObsoletes(String id) {
803
        List<String> obsoletes = new ArrayList<String>();
804
        while (id != null) {
805
            SystemMetadata metadata = getSystemMetadata(id);
806
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
807
            if(metadata != null) {
808
                Identifier identifier = metadata.getObsoletes();
809
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
810
                    obsoletes.add(identifier.getValue());
811
                    id = identifier.getValue();
812
                } 
813
            } 
814
        }
815
        return obsoletes;
816
    }
817
    
818
    /**
819
     * Overwrite and do nothing
820
     */
821
    public boolean cancel() {
822
        return true;
823
    }
824

    
825
}
(3-3/6)