Project

General

Profile

1
/**
2
 *  Copyright: 2013 Regents of the University of California and the
3
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20

    
21
import java.io.File;
22
import java.io.FileInputStream;
23
import java.io.FileNotFoundException;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.text.SimpleDateFormat;
27
import java.util.ArrayList;
28
import java.util.Calendar;
29
import java.util.Collections;
30
import java.util.Date;
31
import java.util.List;
32
import java.util.TimerTask;
33

    
34
import javax.xml.parsers.ParserConfigurationException;
35
import javax.xml.xpath.XPathExpressionException;
36

    
37
import org.apache.commons.io.FileUtils;
38
import org.apache.commons.logging.Log;
39
import org.apache.commons.logging.LogFactory;
40
import org.apache.solr.client.solrj.SolrServerException;
41
import org.dataone.configuration.Settings;
42
import org.dataone.service.exceptions.InvalidRequest;
43
import org.dataone.service.exceptions.InvalidToken;
44
import org.dataone.service.exceptions.NotAuthorized;
45
import org.dataone.service.exceptions.NotFound;
46
import org.dataone.service.exceptions.NotImplemented;
47
import org.dataone.service.exceptions.ServiceFailure;
48
import org.dataone.service.exceptions.UnsupportedType;
49
import org.dataone.service.types.v1.Event;
50
import org.dataone.service.types.v1.Identifier;
51
import org.dataone.service.types.v1.ObjectFormatIdentifier;
52
import org.dataone.service.types.v1.SystemMetadata;
53
import org.dspace.foresite.OREParserException;
54
import org.xml.sax.SAXException;
55

    
56
import com.hazelcast.core.IMap;
57
import com.hazelcast.core.ISet;
58

    
59
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
60
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
61
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
62
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
63

    
64

    
65
/**
66
 * A class represents the object to generate massive solr indexes.
67
 * This can happen during an update of Metacat (generating index for all existing documents)
68
 * or regenerate index for those documents
69
 * failing to build index during the insert or update.
70
 * 
71
 * @author tao
72
 *
73
 */
74
public class IndexGenerator extends TimerTask {
75
    
76
    private static final int FIRST =0;
77
    private static final int SECOND =1;
78
    private static final int THIRD = 2;
79
    private static final int FOURTH = 3;
80
    public static final int WAITTIME = 10000;
81
    public static final int MAXWAITNUMBER = 180;
82
    private static final String HTTP = "http://";
83
    private static final String MNAPPENDIX = "/d1/mn";
84
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
85
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
86
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
87
    
88
    
89
    private SolrIndex solrIndex = null;
90
    //private SystemMetadataEventListener systemMetadataListener = null;
91
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
92
    private IMap<Identifier, String> objectPathMap;
93
    private ISet<SystemMetadata> indexQueue;
94
    private Log log = LogFactory.getLog(IndexGenerator.class);
95
    //private MNode mNode = null;
96
    private static List<String> resourceMapNamespaces = null;
97
    
98
    /**
99
     * Constructor
100
     * @param solrIndex
101
     * @param systemMetadataListener
102
     */
103
    public IndexGenerator(SolrIndex solrIndex) {
104
        this.solrIndex = solrIndex;
105
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
106
        //this.systemMetadataListener = systemMetadataListener;
107
        //this.mNode = new MNode(buildMNBaseURL());
108
      
109
    }
110
    
111
    /**
112
     * Build the index for all documents in Metacat without overwriting.
113
     * @throws SolrServerException 
114
     * @throws ServiceFailure 
115
     * @throws NotImplemented 
116
     * @throws NotAuthorized 
117
     * @throws InvalidToken 
118
     * @throws InvalidRequest 
119
     * @throws IndexEventLogException 
120
     * @throws IllegalAccessException 
121
     * @throws InstantiationException 
122
     * @throws ClassNotFoundException 
123
     */
124
    /*public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
125
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
126
        boolean force = false;
127
        indexAll(force);
128
    }*/
129
    
130
    /**
131
     * Build the index for all documents.
132
     * @throws SolrServerException 
133
     * @throws ServiceFailure 
134
     * @throws NotImplemented 
135
     * @throws NotAuthorized 
136
     * @throws InvalidToken 
137
     * @throws InvalidRequest 
138
     * @throws IndexEventLogException 
139
     * @throws IllegalAccessException 
140
     * @throws InstantiationException 
141
     * @throws ClassNotFoundException 
142
     * @throws ParserConfigurationException 
143
     * @throws SAXException 
144
     * @throws IOException 
145
     * @throws UnsupportedType 
146
     * @throws NotFound 
147
     * @throws XPathExpressionException 
148
     * @throws OREParserException 
149
     */
150
    public void indexAll() throws InvalidRequest, InvalidToken,
151
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
152
        Date since = null;
153
        Date until = null;
154
        index(since, until);
155
    }
156
    
157
    /**
158
     * Build the index for the docs which have been modified since the specified date.
159
     * @param since
160
     * @throws SolrServerException 
161
     * @throws ServiceFailure 
162
     * @throws NotImplemented 
163
     * @throws NotAuthorized 
164
     * @throws InvalidToken 
165
     * @throws InvalidRequest 
166
     * @throws IndexEventLogException 
167
     * @throws IllegalAccessException 
168
     * @throws InstantiationException 
169
     * @throws ClassNotFoundException 
170
     * @throws ParserConfigurationException 
171
     * @throws SAXException 
172
     * @throws IOException 
173
     * @throws UnsupportedType 
174
     * @throws NotFound 
175
     * @throws XPathExpressionException 
176
     * @throws OREParserException 
177
     */
178
    public void index(Date since) throws InvalidRequest, InvalidToken, 
179
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
180
        Date until = null;
181
        index(since, until);
182
    }
183
    
184
    /**
185
     *  Build the index for the docs which have been modified between the specified date.s
186
     * @param since
187
     * @param until
188
     * @throws SolrServerException 
189
     * @throws ServiceFailure 
190
     * @throws NotImplemented 
191
     * @throws NotAuthorized 
192
     * @throws InvalidToken 
193
     * @throws InvalidRequest 
194
     * @throws IndexEventLogException 
195
     * @throws IllegalAccessException 
196
     * @throws InstantiationException 
197
     * @throws ClassNotFoundException 
198
     * @throws ParserConfigurationException 
199
     * @throws SAXException 
200
     * @throws IOException 
201
     * @throws UnsupportedType 
202
     * @throws NotFound 
203
     * @throws XPathExpressionException 
204
     * @throws OREParserException 
205
     */
206
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
207
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
208
        Date processedDate = null;
209
        List<String> solrIds = null;
210
        initSystemMetadataMap();
211
        initObjectPathMap();
212
        initIndexQueue();
213
        List[] metacatIds = getMetacatIds(since, until);
214
        List<String> otherMetacatIds = metacatIds[FIRST];
215
        List<String> resourceMapIds =  metacatIds[SECOND];
216
        //List<String> otherDeletedMetacatIds = metacatIds[THIRD];
217
        //List<String> resourceMapDeletedIds = metacatIds[FOURTH];
218
        
219
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
220
        List<Long> maxCollection = new ArrayList<Long>();
221
        Date latestOtherId = null;
222
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
223
            int size = otherMetacatIds.size();
224
            String id = otherMetacatIds.get(size-1);
225
            SystemMetadata sysmeta = getSystemMetadata(id);
226
            latestOtherId = sysmeta.getDateSysMetadataModified();
227
            maxCollection.add(new Long(latestOtherId.getTime()));
228
        }
229
        
230
        /*Date latestDeletedOtherIds = null;
231
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
232
            int size = otherDeletedMetacatIds.size();
233
            String id = otherDeletedMetacatIds.get(size-1);
234
            SystemMetadata sysmeta = getSystemMetadata(id);
235
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
236
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
237
        }*/
238
        
239
        Date latestResourceId = null;
240
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
241
            int size = resourceMapIds.size();
242
            String id = resourceMapIds.get(size-1);
243
            SystemMetadata sysmeta = getSystemMetadata(id);
244
            latestResourceId = sysmeta.getDateSysMetadataModified();
245
            maxCollection.add(new Long(latestResourceId.getTime()));
246
        }
247
        
248
        /*Date latestDeletedResourceId = null;
249
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
250
            int size = resourceMapDeletedIds.size();
251
            String id = resourceMapDeletedIds.get(size-1);
252
            SystemMetadata sysmeta = getSystemMetadata(id);
253
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
254
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
255
        }*/
256
        
257
        if(!maxCollection.isEmpty()) {
258
            Long max = Collections.max(maxCollection);
259
            processedDate = new Date(max.longValue());
260
        }
261
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
262
            processedDate = latestOtherId;
263
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
264
            processedDate = latestResourceId;
265
        } else if (latestOtherId == null && latestResourceId != null) {
266
            processedDate = latestResourceId;
267
        } else if (latestOtherId != null && latestResourceId == null) {
268
            processedDate = latestOtherId;
269
        }*/
270
        
271
        
272
        //add the failedPids 
273
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
274
        List<String> failedOtherIds = new ArrayList<String>();
275
        List<String> failedResourceMapIds = new ArrayList<String>();
276
        if(failedEvents != null) {
277
            for(IndexEvent event : failedEvents) {
278
            	String id = event.getIdentifier().getValue();
279
                SystemMetadata sysmeta = getSystemMetadata(id);
280
                if(sysmeta != null) {
281
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
282
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
283
                        failedResourceMapIds.add(id);
284
                    } else {
285
                        failedOtherIds.add(id);
286
                    }
287
                }
288
            }
289
        }
290
        //indexFailedIds(failedOtherIds);
291
        //indexFailedIds(failedResourceMapIds);
292
        
293
        index(failedOtherIds);
294
        index(failedResourceMapIds);
295
        
296
        /*if(!failedOtherIds.isEmpty()) {
297
            failedOtherIds.addAll(otherMetacatIds);
298
        } else {
299
            failedOtherIds = otherMetacatIds;
300
        }
301
        
302
        if(!failedResourceMapIds.isEmpty()) {
303
            failedResourceMapIds.addAll(resourceMapIds);
304
        } else {
305
            failedResourceMapIds = resourceMapIds;
306
        }*/
307
        //log.info("the ids in index_event for reindex ( except the resourcemap)=====================================\n "+failedOtherIds);
308
        //log.info("the resourcemap ids in index_event for reindex =====================================\n "+failedResourceMapIds);
309
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
310
        //logFile(otherMetacatIds, "ids-for-timed-indexing-log");
311
        //log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
312
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
313
        //logFile(resourceMapIds, "ids-for-timed-indexing-log");
314
        //log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
315
        index(otherMetacatIds);
316
        //removeIndex(otherDeletedMetacatIds);
317
        index(resourceMapIds);
318
        //removeIndex(resourceMapDeletedIds);
319
       
320
        //record the timed index.
321
        if(processedDate != null) {
322
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
323
        }
324
        
325
    }
326
    
327
    /*
328
     * Write the docids which will be indexed into a file. 
329
     */
330
    /*private void logFile(List<String> ids, String fileName)  {
331
        if(ids != null) {
332
            try {
333
                String tempDir = System.getProperty("java.io.tmpdir");
334
                log.info("the temp dir is ===================== "+tempDir);
335
                File idsForIndex = new File(tempDir, fileName);
336
                if(!idsForIndex.exists()) {
337
                    idsForIndex.createNewFile();
338
                } 
339
                
340
                Date date = Calendar.getInstance().getTime();
341
                SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
342
                String dateStr = format.format(date);
343
                List<String> dateList = new ArrayList<String>();
344
                dateList.add(dateStr);
345
                Boolean append = true;
346
                FileUtils.writeLines(idsForIndex, dateList, append);//write time string
347
                FileUtils.writeLines(idsForIndex, ids, append);
348
            } catch (Exception e) {
349
                log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage());
350
            }
351
           
352
        }
353
    }*/
354
    /*
355
     * Doing index
356
     */
357
    private void index(List<String> metacatIds) {
358
        if(metacatIds != null) {
359
            for(String metacatId : metacatIds) {
360
                if(metacatId != null) {
361
                     generateIndex(metacatId);
362
                }
363
            }
364
        }
365
    }
366
    
367
    /*
368
     * Index those ids which failed in the process (We got them from the EventLog)
369
     */
370
    /*private void indexFailedIds(List<IndexEvent> events) {
371
        if(events != null) {
372
            for(IndexEvent event : events) {
373
                if(event != null) {
374
                    Identifier identifier = event.getIdentifier();
375
                    if(identifier != null) {
376
                        String id = identifier.getValue();
377
                        if(id != null) {
378
                            Event action = event.getAction();
379
                            //if (action != null && action.equals(Event.CREATE)) {
380
                                try {
381
                                    generateIndex(id);
382
                                    EventlogFactory.createIndexEventLog().remove(identifier);
383
                                } catch (Exception e) {
384
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage());
385
                                }
386
                            
387
                        }
388
                    }
389
                }
390
            }
391
        }
392
    }*/
393
    
394
    public void run() {
395
        /*IndexEvent event = new IndexEvent();
396
        event.setDate(Calendar.getInstance().getTime());
397
        event.setType(IndexEvent.STARTTIMEDINDEX);
398
        event.setDescription("Start the timed index job");
399
        try {
400
            EventlogFactory.createIndexEventLog().write(event);
401
        } catch (Exception e) {
402
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing start event :"+e.getMessage());
403
        }*/
404
        try {
405
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
406
            index(since);
407
        } catch (InvalidRequest e) {
408
            // TODO Auto-generated catch block
409
            //e.printStackTrace();
410
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
411
        } catch (InvalidToken e) {
412
            // TODO Auto-generated catch block
413
            //e.printStackTrace();
414
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
415
        } catch (NotAuthorized e) {
416
            // TODO Auto-generated catch block
417
            //e.printStackTrace();
418
        } catch (NotImplemented e) {
419
            // TODO Auto-generated catch block
420
            //e.printStackTrace();
421
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
422
        } catch (ServiceFailure e) {
423
            // TODO Auto-generated catch block
424
            //e.printStackTrace();
425
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
426
        } catch (SolrServerException e) {
427
            // TODO Auto-generated catch block
428
            //e.printStackTrace();
429
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
430
        } catch (FileNotFoundException e) {
431
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
432
        }
433
        /*event.setDate(Calendar.getInstance().getTime());
434
        event.setType(IndexEvent.FINISHTIMEDINDEX);
435
        event.setDescription("Finish the timed index job");
436
        try {
437
            EventlogFactory.createIndexEventLog().write(event);
438
        } catch (Exception e) {
439
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing finish event :"+e.getMessage());
440
        }*/ catch (ClassNotFoundException e) {
441
            // TODO Auto-generated catch block
442
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
443
        } catch (InstantiationException e) {
444
            // TODO Auto-generated catch block
445
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
446
        } catch (IllegalAccessException e) {
447
            // TODO Auto-generated catch block
448
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
449
        } catch (IndexEventLogException e) {
450
            // TODO Auto-generated catch block
451
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
452
        } catch (XPathExpressionException e) {
453
            // TODO Auto-generated catch block
454
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
455
        } catch (NotFound e) {
456
            // TODO Auto-generated catch block
457
            e.printStackTrace();
458
        } catch (UnsupportedType e) {
459
            // TODO Auto-generated catch block
460
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
461
        } catch (IOException e) {
462
            // TODO Auto-generated catch block
463
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
464
        } catch (SAXException e) {
465
            // TODO Auto-generated catch block
466
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
467
        } catch (ParserConfigurationException e) {
468
            // TODO Auto-generated catch block
469
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
470
        } catch (OREParserException e) {
471
            // TODO Auto-generated catch block
472
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
473
        }
474
    }
475
    
476
   
477
    
478
    /*
479
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
480
     * 
481
     * If since and util are null, it will return all of them.
482
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
483
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
484
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
485
     */
486
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
487
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
488
        String fileName = "ids-from-hazelcast";
489
        List<String> resourceMapIds = new ArrayList();
490
        //List<String> resourceMapDeletedIds = new ArrayList();
491
        List<String> otherIds = new ArrayList();
492
        //List<String> otherDeletedIds = new ArrayList();
493
        List[] ids = new List[2];
494
        ids[FIRST]= otherIds;
495
        ids[SECOND] = resourceMapIds;
496
        //ids[THIRD]  = otherDeletedIds;
497
        //ids[FOURTH] = resourceMapDeletedIds;
498
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
499
        Date otherPreviousDate = null;
500
        Date otherDeletedPreviousDate = null;
501
        Date resourceMapPreviousDate = null;
502
        Date resourceMapDeletedPreviousDate = null;
503
        if(metacatIds != null) {
504
            for(Identifier identifier : metacatIds) {
505
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
506
                    List<String> idLog = new ArrayList<String>();
507
                    idLog.add(identifier.getValue());
508
                    //logFile(idLog, fileName);
509
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
510
                    if(sysmeta != null) {
511
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
512
                        //System.out.println("the object format id is "+formatId.getValue());
513
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
514
                        boolean correctTimeRange = false;
515
                        Date sysDate = sysmeta.getDateSysMetadataModified();
516
                        if(since == null && until == null) {
517
                            correctTimeRange = true;
518
                        } else if (since != null && until == null) {
519
                            if(sysDate.getTime() > since.getTime()) {
520
                                correctTimeRange = true;
521
                            }
522
                        } else if (since == null && until != null) {
523
                            if(sysDate.getTime() < until.getTime()) {
524
                                correctTimeRange = true;
525
                            }
526
                        } else if (since != null && until != null) {
527
                            if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) {
528
                                correctTimeRange = true;
529
                            }
530
                        }
531
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
532
                            //for the resource map
533
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
534
                                //archived ids
535
                                if(!resourceMapDeletedIds.isEmpty()) {
536
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
537
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
538
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
539
                                    } else {
540
                                        int size = resourceMapDeletedIds.size();//
541
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
542
                                    }
543
                                } else {
544
                                    resourceMapDeletedIds.add(identifier.getValue());
545
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
546
                                }
547
                            } else {*/
548
                                // for all ids
549
                                if(!resourceMapIds.isEmpty()) {
550
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
551
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
552
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
553
                                    } else {
554
                                        int size = resourceMapIds.size();//
555
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
556
                                    }
557
                                } else {
558
                                    resourceMapIds.add(identifier.getValue());
559
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
560
                                }
561
                            //}
562
                        } else if (correctTimeRange) {
563
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
564
                                //for the archived ids
565
                                if(!otherDeletedIds.isEmpty()) {
566
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
567
                                        otherDeletedIds.add(identifier.getValue());
568
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
569
                                    } else {
570
                                        int size = otherDeletedIds.size();
571
                                        otherDeletedIds.add(size-1, identifier.getValue());
572
                                    }
573
                                } else {
574
                                    otherDeletedIds.add(identifier.getValue());
575
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
576
                                }
577
                            } else {*/
578
                                //for all ids
579
                                if(!otherIds.isEmpty()) {
580
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
581
                                        otherIds.add(identifier.getValue());
582
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
583
                                    } else {
584
                                        int size = otherIds.size();
585
                                        otherIds.add(size-1, identifier.getValue());
586
                                    }
587
                                } else {
588
                                    otherIds.add(identifier.getValue());
589
                                    otherPreviousDate = sysDate;//init otherPreviousDate
590
                                }
591
                            //}
592
                        }
593
                        
594
                    }
595
                }
596
            }
597
        }
598
        return ids;
599
    }
600
    
601
    /*
602
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
603
     */
604
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
605
        boolean isResourceMap = false;
606
        if(formatId != null && resourceMapNamespaces != null) {
607
            for(String namespace : resourceMapNamespaces) {
608
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
609
                    isResourceMap = true;
610
                    break;
611
                }
612
            }
613
        }
614
        return isResourceMap;
615
    }
616
    
617
   
618
    
619
    /*
620
     * Generate index for the id.
621
     */
622
    private void generateIndex(String id)  {
623
        if(id != null)  {
624
                SystemMetadata sysmeta = getSystemMetadata(id);
625
                //only update none-archived id.
626
                //if(sysmeta != null && !sysmeta.getArchived() && sysmeta.getObsoletedBy() == null) {
627
                try {
628
                    if(sysmeta != null) {
629
                        InputStream data = getDataObject(id);
630
                        /*Identifier obsolete = sysmeta.getObsoletes();
631
                        List<String> obsoleteChain = null;
632
                        if(obsolete != null) {
633
                            obsoleteChain = getObsoletes(id);
634
                        }*/
635
                        solrIndex.update(id, sysmeta, data);
636
                        try {
637
                            Identifier identifier = new Identifier();
638
                            identifier.setValue(id);
639
                            EventlogFactory.createIndexEventLog().remove(identifier);
640
                        } catch (Exception ee) {
641
                            log.error("IndexGenerator.index - can't remove the id "+id +" from the index_event table since - "+ee.getMessage());
642
                        }
643
                    } else {
644
                        throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
645
                    }
646
                } catch (Exception e) {
647
                    IndexEvent event = new IndexEvent();
648
                    Identifier pid = new Identifier();
649
                    pid.setValue(id);
650
                    event.setIdentifier(pid);
651
                    event.setDate(Calendar.getInstance().getTime());
652
                    event.setAction(Event.CREATE);
653
                    String error = "IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage();
654
                    event.setDescription(error);
655
                    try {
656
                        EventlogFactory.createIndexEventLog().write(event);
657
                    } catch (Exception ee) {
658
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index inserting event :"+ee.getMessage());
659
                    }
660
                    log.error(error);
661
                }
662
                 
663
           
664
        }
665
    }
666
    
667
    /*
668
     * Remove the solr index for the list of ids
669
     */
670
    /*private void removeIndex(List<String> ids) {
671
        if(ids!= null) {
672
            for(String id :ids) {
673
                try {
674
                    removeIndex(id);
675
                } catch (Exception e) {
676
                    IndexEvent event = new IndexEvent();
677
                    Identifier pid = new Identifier();
678
                    pid.setValue(id);
679
                    event.setIdentifier(pid);
680
                    event.setDate(Calendar.getInstance().getTime());
681
                    event.setAction(Event.DELETE);
682
                    String error = "IndexGenerator.index - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage();
683
                    event.setDescription(error);
684
                    try {
685
                        EventlogFactory.createIndexEventLog().write(event);
686
                    } catch (Exception ee) {
687
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
688
                    }
689
                    log.error(error);
690
                }
691
                
692
            }
693
        }
694
    }*/
695
    
696
    /*
697
     * Remove the index for the id
698
     */
699
    /*private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException, OREParserException  {
700
        if(id != null) {
701
            //solrIndex.remove(id);
702
        }
703
    }*/
704
    
705
    /*
706
     * Initialize the system metadata map
707
     */
708
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
709
        int times = 0;
710
        if(systemMetadataMap == null) {
711
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
712
        }
713
    }
714
    
715
    /*
716
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
717
     */
718
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
719
        if(objectPathMap == null) {
720
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
721
        }
722
    }
723
    
724
    
725
    
726
    /*
727
     * Initialize the index queue
728
     */
729
    private void initIndexQueue() throws FileNotFoundException, ServiceFailure {
730
        if(indexQueue == null) {
731
            indexQueue = DistributedMapsFactory.getIndexQueue();
732
        }
733
    }
734
    /**
735
     * Get an InputStream as the data object for the specific pid.
736
     * @param pid
737
     * @return
738
     * @throws FileNotFoundException
739
     */
740
    private InputStream getDataObject(String pid) throws FileNotFoundException {
741
        Identifier identifier = new Identifier();
742
        identifier.setValue(pid);
743
        String objectPath = objectPathMap.get(identifier);
744
        InputStream data = null;
745
        data = new FileInputStream(objectPath);
746
        return data;
747

    
748
    }
749
    
750
    /**
751
     * Get the SystemMetadata for the specified id from the distributed Map.
752
     * The null maybe is returned if there is no system metadata found.
753
     * @param id  the specified id.
754
     * @return the SystemMetadata associated with the id.
755
     */
756
    private SystemMetadata getSystemMetadata(String id) {
757
        SystemMetadata metadata = null;
758
        if(systemMetadataMap != null && id != null) {
759
            Identifier identifier = new Identifier();
760
            identifier.setValue(id);
761
            metadata = systemMetadataMap.get(identifier);
762
        }
763
        return metadata;
764
    }
765
    
766
    /**
767
     * Get the obsoletes chain of the specified id. The returned list doesn't include
768
     * the specified id itself. The newer version has the lower index number in the list.
769
     * Empty list will be returned if there is no document to be obsoleted by this id.
770
     * @param id
771
     * @return
772
     */
773
    private List<String> getObsoletes(String id) {
774
        List<String> obsoletes = new ArrayList<String>();
775
        while (id != null) {
776
            SystemMetadata metadata = getSystemMetadata(id);
777
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
778
            if(metadata != null) {
779
                Identifier identifier = metadata.getObsoletes();
780
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
781
                    obsoletes.add(identifier.getValue());
782
                    id = identifier.getValue();
783
                } 
784
            } 
785
        }
786
        return obsoletes;
787
    }
788
    
789
    /**
790
     * Overwrite and do nothing
791
     */
792
    public boolean cancel() {
793
        return true;
794
    }
795

    
796
}
(3-3/6)