Project

General

Profile

« Previous | Next » 

Revision 8352

Added by Jing Tao about 11 years ago

Rename the IndexGenerator to IndexGeneratorTimerTask.

View differences:

metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGenerator.java
1
/**
2
 *  Copyright: 2013 Regents of the University of California and the
3
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20

  
21
import java.io.File;
22
import java.io.FileInputStream;
23
import java.io.FileNotFoundException;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.text.SimpleDateFormat;
27
import java.util.ArrayList;
28
import java.util.Calendar;
29
import java.util.Collections;
30
import java.util.Date;
31
import java.util.List;
32
import java.util.TimerTask;
33

  
34
import javax.xml.parsers.ParserConfigurationException;
35
import javax.xml.xpath.XPathExpressionException;
36

  
37
import org.apache.commons.io.FileUtils;
38
import org.apache.commons.logging.Log;
39
import org.apache.commons.logging.LogFactory;
40
import org.apache.solr.client.solrj.SolrServerException;
41
import org.dataone.configuration.Settings;
42
import org.dataone.service.exceptions.InvalidRequest;
43
import org.dataone.service.exceptions.InvalidToken;
44
import org.dataone.service.exceptions.NotAuthorized;
45
import org.dataone.service.exceptions.NotFound;
46
import org.dataone.service.exceptions.NotImplemented;
47
import org.dataone.service.exceptions.ServiceFailure;
48
import org.dataone.service.exceptions.UnsupportedType;
49
import org.dataone.service.types.v1.Event;
50
import org.dataone.service.types.v1.Identifier;
51
import org.dataone.service.types.v1.ObjectFormatIdentifier;
52
import org.dataone.service.types.v1.SystemMetadata;
53
import org.dspace.foresite.OREParserException;
54
import org.xml.sax.SAXException;
55

  
56
import com.hazelcast.core.IMap;
57
import com.hazelcast.core.ISet;
58

  
59
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
60
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
61
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
62
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
63

  
64

  
65
/**
66
 * A class represents the object to generate massive solr indexes.
67
 * This can happen during an update of Metacat (generating index for all existing documents)
68
 * or regenerate index for those documents
69
 * failing to build index during the insert or update.
70
 * 
71
 * @author tao
72
 *
73
 */
74
public class IndexGenerator extends TimerTask {
75
    
76
    private static final int FIRST =0;
77
    private static final int SECOND =1;
78
    private static final int THIRD = 2;
79
    private static final int FOURTH = 3;
80
    public static final int WAITTIME = 10000;
81
    public static final int MAXWAITNUMBER = 180;
82
    private static final String HTTP = "http://";
83
    private static final String MNAPPENDIX = "/d1/mn";
84
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
85
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
86
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
87
    
88
    
89
    private SolrIndex solrIndex = null;
90
    //private SystemMetadataEventListener systemMetadataListener = null;
91
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
92
    private IMap<Identifier, String> objectPathMap;
93
    private ISet<SystemMetadata> indexQueue;
94
    private Log log = LogFactory.getLog(IndexGenerator.class);
95
    //private MNode mNode = null;
96
    private static List<String> resourceMapNamespaces = null;
97
    
98
    /**
99
     * Constructor
100
     * @param solrIndex
101
     * @param systemMetadataListener
102
     */
103
    public IndexGenerator(SolrIndex solrIndex) {
104
        this.solrIndex = solrIndex;
105
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
106
        //this.systemMetadataListener = systemMetadataListener;
107
        //this.mNode = new MNode(buildMNBaseURL());
108
      
109
    }
110
    
111
   
112
    
113
    /**
114
     * Build the index for all documents.
115
     * @throws SolrServerException 
116
     * @throws ServiceFailure 
117
     * @throws NotImplemented 
118
     * @throws NotAuthorized 
119
     * @throws InvalidToken 
120
     * @throws InvalidRequest 
121
     * @throws IndexEventLogException 
122
     * @throws IllegalAccessException 
123
     * @throws InstantiationException 
124
     * @throws ClassNotFoundException 
125
     * @throws ParserConfigurationException 
126
     * @throws SAXException 
127
     * @throws IOException 
128
     * @throws UnsupportedType 
129
     * @throws NotFound 
130
     * @throws XPathExpressionException 
131
     * @throws OREParserException 
132
     */
133
    public void indexAll() throws InvalidRequest, InvalidToken,
134
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
135
        Date since = null;
136
        Date until = null;
137
        index(since, until);
138
    }
139
    
140
    /**
141
     * Build the index for the docs which have been modified since the specified date.
142
     * @param since
143
     * @throws SolrServerException 
144
     * @throws ServiceFailure 
145
     * @throws NotImplemented 
146
     * @throws NotAuthorized 
147
     * @throws InvalidToken 
148
     * @throws InvalidRequest 
149
     * @throws IndexEventLogException 
150
     * @throws IllegalAccessException 
151
     * @throws InstantiationException 
152
     * @throws ClassNotFoundException 
153
     * @throws ParserConfigurationException 
154
     * @throws SAXException 
155
     * @throws IOException 
156
     * @throws UnsupportedType 
157
     * @throws NotFound 
158
     * @throws XPathExpressionException 
159
     * @throws OREParserException 
160
     */
161
    public void index(Date since) throws InvalidRequest, InvalidToken, 
162
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
163
        Date until = null;
164
        index(since, until);
165
    }
166
    
167
    /**
168
     *  Build the index for the docs which have been modified between the specified date.s
169
     * @param since
170
     * @param until
171
     * @throws SolrServerException 
172
     * @throws ServiceFailure 
173
     * @throws NotImplemented 
174
     * @throws NotAuthorized 
175
     * @throws InvalidToken 
176
     * @throws InvalidRequest 
177
     * @throws IndexEventLogException 
178
     * @throws IllegalAccessException 
179
     * @throws InstantiationException 
180
     * @throws ClassNotFoundException 
181
     * @throws ParserConfigurationException 
182
     * @throws SAXException 
183
     * @throws IOException 
184
     * @throws UnsupportedType 
185
     * @throws NotFound 
186
     * @throws XPathExpressionException 
187
     * @throws OREParserException 
188
     */
189
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
190
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
191
        Date processedDate = null;
192
        List<String> solrIds = null;
193
        initSystemMetadataMap();
194
        initObjectPathMap();
195
        initIndexQueue();
196
        List[] metacatIds = getMetacatIds(since, until);
197
        List<String> otherMetacatIds = metacatIds[FIRST];
198
        List<String> resourceMapIds =  metacatIds[SECOND];
199
        //List<String> otherDeletedMetacatIds = metacatIds[THIRD];
200
        //List<String> resourceMapDeletedIds = metacatIds[FOURTH];
201
        
202
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
203
        List<Long> maxCollection = new ArrayList<Long>();
204
        Date latestOtherId = null;
205
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
206
            int size = otherMetacatIds.size();
207
            String id = otherMetacatIds.get(size-1);
208
            SystemMetadata sysmeta = getSystemMetadata(id);
209
            latestOtherId = sysmeta.getDateSysMetadataModified();
210
            maxCollection.add(new Long(latestOtherId.getTime()));
211
        }
212
        
213
        /*Date latestDeletedOtherIds = null;
214
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
215
            int size = otherDeletedMetacatIds.size();
216
            String id = otherDeletedMetacatIds.get(size-1);
217
            SystemMetadata sysmeta = getSystemMetadata(id);
218
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
219
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
220
        }*/
221
        
222
        Date latestResourceId = null;
223
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
224
            int size = resourceMapIds.size();
225
            String id = resourceMapIds.get(size-1);
226
            SystemMetadata sysmeta = getSystemMetadata(id);
227
            latestResourceId = sysmeta.getDateSysMetadataModified();
228
            maxCollection.add(new Long(latestResourceId.getTime()));
229
        }
230
        
231
        /*Date latestDeletedResourceId = null;
232
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
233
            int size = resourceMapDeletedIds.size();
234
            String id = resourceMapDeletedIds.get(size-1);
235
            SystemMetadata sysmeta = getSystemMetadata(id);
236
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
237
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
238
        }*/
239
        
240
        if(!maxCollection.isEmpty()) {
241
            Long max = Collections.max(maxCollection);
242
            processedDate = new Date(max.longValue());
243
        }
244
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
245
            processedDate = latestOtherId;
246
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
247
            processedDate = latestResourceId;
248
        } else if (latestOtherId == null && latestResourceId != null) {
249
            processedDate = latestResourceId;
250
        } else if (latestOtherId != null && latestResourceId == null) {
251
            processedDate = latestOtherId;
252
        }*/
253
        
254
        
255
        //add the failedPids 
256
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
257
        List<String> failedOtherIds = new ArrayList<String>();
258
        List<String> failedResourceMapIds = new ArrayList<String>();
259
        if(failedEvents != null) {
260
            for(IndexEvent event : failedEvents) {
261
            	String id = event.getIdentifier().getValue();
262
                SystemMetadata sysmeta = getSystemMetadata(id);
263
                if(sysmeta != null) {
264
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
265
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
266
                        failedResourceMapIds.add(id);
267
                    } else {
268
                        failedOtherIds.add(id);
269
                    }
270
                }
271
            }
272
        }
273
        //indexFailedIds(failedOtherIds);
274
        //indexFailedIds(failedResourceMapIds);
275
        
276
        index(failedOtherIds);
277
        index(failedResourceMapIds);
278
        
279
        /*if(!failedOtherIds.isEmpty()) {
280
            failedOtherIds.addAll(otherMetacatIds);
281
        } else {
282
            failedOtherIds = otherMetacatIds;
283
        }
284
        
285
        if(!failedResourceMapIds.isEmpty()) {
286
            failedResourceMapIds.addAll(resourceMapIds);
287
        } else {
288
            failedResourceMapIds = resourceMapIds;
289
        }*/
290
        //log.info("the ids in index_event for reindex ( except the resourcemap)=====================================\n "+failedOtherIds);
291
        //log.info("the resourcemap ids in index_event for reindex =====================================\n "+failedResourceMapIds);
292
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
293
        //logFile(otherMetacatIds, "ids-for-timed-indexing-log");
294
        //log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
295
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
296
        //logFile(resourceMapIds, "ids-for-timed-indexing-log");
297
        //log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
298
        index(otherMetacatIds);
299
        //removeIndex(otherDeletedMetacatIds);
300
        index(resourceMapIds);
301
        //removeIndex(resourceMapDeletedIds);
302
       
303
        //record the timed index.
304
        if(processedDate != null) {
305
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
306
        }
307
        
308
    }
309
    
310
    /*
311
     * Write the docids which will be indexed into a file. 
312
     */
313
    /*private void logFile(List<String> ids, String fileName)  {
314
        if(ids != null) {
315
            try {
316
                String tempDir = System.getProperty("java.io.tmpdir");
317
                log.info("the temp dir is ===================== "+tempDir);
318
                File idsForIndex = new File(tempDir, fileName);
319
                if(!idsForIndex.exists()) {
320
                    idsForIndex.createNewFile();
321
                } 
322
                
323
                Date date = Calendar.getInstance().getTime();
324
                SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
325
                String dateStr = format.format(date);
326
                List<String> dateList = new ArrayList<String>();
327
                dateList.add(dateStr);
328
                Boolean append = true;
329
                FileUtils.writeLines(idsForIndex, dateList, append);//write time string
330
                FileUtils.writeLines(idsForIndex, ids, append);
331
            } catch (Exception e) {
332
                log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage());
333
            }
334
           
335
        }
336
    }*/
337
    /*
338
     * Doing index
339
     */
340
    private void index(List<String> metacatIds) {
341
        if(metacatIds != null) {
342
            for(String metacatId : metacatIds) {
343
                if(metacatId != null) {
344
                     generateIndex(metacatId);
345
                }
346
            }
347
        }
348
    }
349
    
350
    /*
351
     * Index those ids which failed in the process (We got them from the EventLog)
352
     */
353
    /*private void indexFailedIds(List<IndexEvent> events) {
354
        if(events != null) {
355
            for(IndexEvent event : events) {
356
                if(event != null) {
357
                    Identifier identifier = event.getIdentifier();
358
                    if(identifier != null) {
359
                        String id = identifier.getValue();
360
                        if(id != null) {
361
                            Event action = event.getAction();
362
                            //if (action != null && action.equals(Event.CREATE)) {
363
                                try {
364
                                    generateIndex(id);
365
                                    EventlogFactory.createIndexEventLog().remove(identifier);
366
                                } catch (Exception e) {
367
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage());
368
                                }
369
                            
370
                        }
371
                    }
372
                }
373
            }
374
        }
375
    }*/
376
    
377
    public void run() {
378
    
379
        try {
380
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
381
            index(since);
382
        } catch (InvalidRequest e) {
383
            // TODO Auto-generated catch block
384
            //e.printStackTrace();
385
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
386
        } catch (InvalidToken e) {
387
            // TODO Auto-generated catch block
388
            //e.printStackTrace();
389
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
390
        } catch (NotAuthorized e) {
391
            // TODO Auto-generated catch block
392
            //e.printStackTrace();
393
        } catch (NotImplemented e) {
394
            // TODO Auto-generated catch block
395
            //e.printStackTrace();
396
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
397
        } catch (ServiceFailure e) {
398
            // TODO Auto-generated catch block
399
            //e.printStackTrace();
400
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
401
        } catch (SolrServerException e) {
402
            // TODO Auto-generated catch block
403
            //e.printStackTrace();
404
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
405
        } catch (FileNotFoundException e) {
406
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
407
        } catch (ClassNotFoundException e) {
408
            // TODO Auto-generated catch block
409
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
410
        } catch (InstantiationException e) {
411
            // TODO Auto-generated catch block
412
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
413
        } catch (IllegalAccessException e) {
414
            // TODO Auto-generated catch block
415
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
416
        } catch (IndexEventLogException e) {
417
            // TODO Auto-generated catch block
418
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
419
        } catch (XPathExpressionException e) {
420
            // TODO Auto-generated catch block
421
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
422
        } catch (NotFound e) {
423
            // TODO Auto-generated catch block
424
            e.printStackTrace();
425
        } catch (UnsupportedType e) {
426
            // TODO Auto-generated catch block
427
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
428
        } catch (IOException e) {
429
            // TODO Auto-generated catch block
430
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
431
        } catch (SAXException e) {
432
            // TODO Auto-generated catch block
433
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
434
        } catch (ParserConfigurationException e) {
435
            // TODO Auto-generated catch block
436
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
437
        } catch (OREParserException e) {
438
            // TODO Auto-generated catch block
439
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
440
        }
441
    }
442
    
443
   
444
    
445
    /*
446
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
447
     * 
448
     * If since and util are null, it will return all of them.
449
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
450
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
451
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
452
     */
453
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
454
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
455
        String fileName = "ids-from-hazelcast";
456
        List<String> resourceMapIds = new ArrayList();
457
        //List<String> resourceMapDeletedIds = new ArrayList();
458
        List<String> otherIds = new ArrayList();
459
        //List<String> otherDeletedIds = new ArrayList();
460
        List[] ids = new List[2];
461
        ids[FIRST]= otherIds;
462
        ids[SECOND] = resourceMapIds;
463
        //ids[THIRD]  = otherDeletedIds;
464
        //ids[FOURTH] = resourceMapDeletedIds;
465
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
466
        Date otherPreviousDate = null;
467
        Date otherDeletedPreviousDate = null;
468
        Date resourceMapPreviousDate = null;
469
        Date resourceMapDeletedPreviousDate = null;
470
        if(metacatIds != null) {
471
            for(Identifier identifier : metacatIds) {
472
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
473
                    List<String> idLog = new ArrayList<String>();
474
                    idLog.add(identifier.getValue());
475
                    //logFile(idLog, fileName);
476
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
477
                    if(sysmeta != null) {
478
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
479
                        //System.out.println("the object format id is "+formatId.getValue());
480
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
481
                        boolean correctTimeRange = false;
482
                        Date sysDate = sysmeta.getDateSysMetadataModified();
483
                        if(since == null && until == null) {
484
                            correctTimeRange = true;
485
                        } else if (since != null && until == null) {
486
                            if(sysDate.getTime() > since.getTime()) {
487
                                correctTimeRange = true;
488
                            }
489
                        } else if (since == null && until != null) {
490
                            if(sysDate.getTime() < until.getTime()) {
491
                                correctTimeRange = true;
492
                            }
493
                        } else if (since != null && until != null) {
494
                            if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) {
495
                                correctTimeRange = true;
496
                            }
497
                        }
498
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
499
                            //for the resource map
500
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
501
                                //archived ids
502
                                if(!resourceMapDeletedIds.isEmpty()) {
503
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
504
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
505
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
506
                                    } else {
507
                                        int size = resourceMapDeletedIds.size();//
508
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
509
                                    }
510
                                } else {
511
                                    resourceMapDeletedIds.add(identifier.getValue());
512
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
513
                                }
514
                            } else {*/
515
                                // for all ids
516
                                if(!resourceMapIds.isEmpty()) {
517
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
518
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
519
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
520
                                    } else {
521
                                        int size = resourceMapIds.size();//
522
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
523
                                    }
524
                                } else {
525
                                    resourceMapIds.add(identifier.getValue());
526
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
527
                                }
528
                            //}
529
                        } else if (correctTimeRange) {
530
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
531
                                //for the archived ids
532
                                if(!otherDeletedIds.isEmpty()) {
533
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
534
                                        otherDeletedIds.add(identifier.getValue());
535
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
536
                                    } else {
537
                                        int size = otherDeletedIds.size();
538
                                        otherDeletedIds.add(size-1, identifier.getValue());
539
                                    }
540
                                } else {
541
                                    otherDeletedIds.add(identifier.getValue());
542
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
543
                                }
544
                            } else {*/
545
                                //for all ids
546
                                if(!otherIds.isEmpty()) {
547
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
548
                                        otherIds.add(identifier.getValue());
549
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
550
                                    } else {
551
                                        int size = otherIds.size();
552
                                        otherIds.add(size-1, identifier.getValue());
553
                                    }
554
                                } else {
555
                                    otherIds.add(identifier.getValue());
556
                                    otherPreviousDate = sysDate;//init otherPreviousDate
557
                                }
558
                            //}
559
                        }
560
                        
561
                    }
562
                }
563
            }
564
        }
565
        return ids;
566
    }
567
    
568
    /*
569
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
570
     */
571
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
572
        boolean isResourceMap = false;
573
        if(formatId != null && resourceMapNamespaces != null) {
574
            for(String namespace : resourceMapNamespaces) {
575
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
576
                    isResourceMap = true;
577
                    break;
578
                }
579
            }
580
        }
581
        return isResourceMap;
582
    }
583
    
584
   
585
    
586
    /*
587
     * Generate index for the id.
588
     */
589
    private void generateIndex(String id)  {
590
        //if id is null and sysmeta will be null. If sysmeta is null, it will be caught in solrIndex.update
591
        SystemMetadata sysmeta = getSystemMetadata(id);
592
        Identifier pid = new Identifier();
593
        pid.setValue(id);
594
        solrIndex.update(pid, sysmeta);
595
 
596
    }
597
    
598
    /*
599
     * Remove the solr index for the list of ids
600
     */
601
    /*private void removeIndex(List<String> ids) {
602
        if(ids!= null) {
603
            for(String id :ids) {
604
                try {
605
                    removeIndex(id);
606
                } catch (Exception e) {
607
                    IndexEvent event = new IndexEvent();
608
                    Identifier pid = new Identifier();
609
                    pid.setValue(id);
610
                    event.setIdentifier(pid);
611
                    event.setDate(Calendar.getInstance().getTime());
612
                    event.setAction(Event.DELETE);
613
                    String error = "IndexGenerator.index - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage();
614
                    event.setDescription(error);
615
                    try {
616
                        EventlogFactory.createIndexEventLog().write(event);
617
                    } catch (Exception ee) {
618
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
619
                    }
620
                    log.error(error);
621
                }
622
                
623
            }
624
        }
625
    }*/
626
    
627
    /*
628
     * Remove the index for the id
629
     */
630
    /*private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException, OREParserException  {
631
        if(id != null) {
632
            //solrIndex.remove(id);
633
        }
634
    }*/
635
    
636
    /*
637
     * Initialize the system metadata map
638
     */
639
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
640
        int times = 0;
641
        if(systemMetadataMap == null) {
642
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
643
        }
644
    }
645
    
646
    /*
647
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
648
     */
649
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
650
        if(objectPathMap == null) {
651
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
652
        }
653
    }
654
    
655
    
656
    
657
    /*
658
     * Initialize the index queue
659
     */
660
    private void initIndexQueue() throws FileNotFoundException, ServiceFailure {
661
        if(indexQueue == null) {
662
            indexQueue = DistributedMapsFactory.getIndexQueue();
663
        }
664
    }
665
    /**
666
     * Get an InputStream as the data object for the specific pid.
667
     * @param pid
668
     * @return
669
     * @throws FileNotFoundException
670
     */
671
    private InputStream getDataObject(String pid) throws FileNotFoundException {
672
        Identifier identifier = new Identifier();
673
        identifier.setValue(pid);
674
        String objectPath = objectPathMap.get(identifier);
675
        InputStream data = null;
676
        data = new FileInputStream(objectPath);
677
        return data;
678

  
679
    }
680
    
681
    /**
682
     * Get the SystemMetadata for the specified id from the distributed Map.
683
     * The null maybe is returned if there is no system metadata found.
684
     * @param id  the specified id.
685
     * @return the SystemMetadata associated with the id.
686
     */
687
    private SystemMetadata getSystemMetadata(String id) {
688
        SystemMetadata metadata = null;
689
        if(systemMetadataMap != null && id != null) {
690
            Identifier identifier = new Identifier();
691
            identifier.setValue(id);
692
            metadata = systemMetadataMap.get(identifier);
693
        }
694
        return metadata;
695
    }
696
    
697
    /**
698
     * Get the obsoletes chain of the specified id. The returned list doesn't include
699
     * the specified id itself. The newer version has the lower index number in the list.
700
     * Empty list will be returned if there is no document to be obsoleted by this id.
701
     * @param id
702
     * @return
703
     */
704
    private List<String> getObsoletes(String id) {
705
        List<String> obsoletes = new ArrayList<String>();
706
        while (id != null) {
707
            SystemMetadata metadata = getSystemMetadata(id);
708
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
709
            if(metadata != null) {
710
                Identifier identifier = metadata.getObsoletes();
711
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
712
                    obsoletes.add(identifier.getValue());
713
                    id = identifier.getValue();
714
                } 
715
            } 
716
        }
717
        return obsoletes;
718
    }
719
    
720
    /**
721
     * Overwrite and do nothing
722
     */
723
    public boolean cancel() {
724
        return true;
725
    }
726

  
727
}
metacat-index/src/test/java/edu/ucsb/nceas/metacat/index/IndexGeneratorIT.java
36 36
        SolrIndex solrIndex = generateSolrIndex();
37 37
        SystemMetadataEventListener systeMetaListener = new SystemMetadataEventListener(solrIndex);
38 38
        systeMetaListener.start();
39
        IndexGenerator generator = new IndexGenerator(solrIndex);
39
        IndexGeneratorTimerTask generator = new IndexGeneratorTimerTask(solrIndex);
40 40
        generator.indexAll();
41 41
        String result = SolrIndexIT.doQuery(solrIndex.getSolrServer());
42 42
        systeMetaListener.stop();
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGeneratorTimerTask.java
1
/**
2
 *  Copyright: 2013 Regents of the University of California and the
3
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20

  
21
import java.io.File;
22
import java.io.FileInputStream;
23
import java.io.FileNotFoundException;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.text.SimpleDateFormat;
27
import java.util.ArrayList;
28
import java.util.Calendar;
29
import java.util.Collections;
30
import java.util.Date;
31
import java.util.List;
32
import java.util.TimerTask;
33

  
34
import javax.xml.parsers.ParserConfigurationException;
35
import javax.xml.xpath.XPathExpressionException;
36

  
37
import org.apache.commons.io.FileUtils;
38
import org.apache.commons.logging.Log;
39
import org.apache.commons.logging.LogFactory;
40
import org.apache.solr.client.solrj.SolrServerException;
41
import org.dataone.configuration.Settings;
42
import org.dataone.service.exceptions.InvalidRequest;
43
import org.dataone.service.exceptions.InvalidToken;
44
import org.dataone.service.exceptions.NotAuthorized;
45
import org.dataone.service.exceptions.NotFound;
46
import org.dataone.service.exceptions.NotImplemented;
47
import org.dataone.service.exceptions.ServiceFailure;
48
import org.dataone.service.exceptions.UnsupportedType;
49
import org.dataone.service.types.v1.Event;
50
import org.dataone.service.types.v1.Identifier;
51
import org.dataone.service.types.v1.ObjectFormatIdentifier;
52
import org.dataone.service.types.v1.SystemMetadata;
53
import org.dspace.foresite.OREParserException;
54
import org.xml.sax.SAXException;
55

  
56
import com.hazelcast.core.IMap;
57
import com.hazelcast.core.ISet;
58

  
59
import edu.ucsb.nceas.metacat.common.SolrServerFactory;
60
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
61
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
62
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
63

  
64

  
65
/**
66
 * A class represents the object to generate massive solr indexes.
67
 * This can happen during an update of Metacat (generating index for all existing documents)
68
 * or regenerate index for those documents
69
 * failing to build index during the insert or update.
70
 * 
71
 * @author tao
72
 *
73
 */
74
public class IndexGeneratorTimerTask extends TimerTask {
75
    
76
    private static final int FIRST =0;
77
    private static final int SECOND =1;
78
    private static final int THIRD = 2;
79
    private static final int FOURTH = 3;
80
    public static final int WAITTIME = 10000;
81
    public static final int MAXWAITNUMBER = 180;
82
    private static final String HTTP = "http://";
83
    private static final String MNAPPENDIX = "/d1/mn";
84
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
85
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
86
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
87
    
88
    
89
    private SolrIndex solrIndex = null;
90
    //private SystemMetadataEventListener systemMetadataListener = null;
91
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
92
    private IMap<Identifier, String> objectPathMap;
93
    private ISet<SystemMetadata> indexQueue;
94
    private Log log = LogFactory.getLog(IndexGeneratorTimerTask.class);
95
    //private MNode mNode = null;
96
    private static List<String> resourceMapNamespaces = null;
97
    
98
    /**
99
     * Constructor
100
     * @param solrIndex
101
     * @param systemMetadataListener
102
     */
103
    public IndexGeneratorTimerTask(SolrIndex solrIndex) {
104
        this.solrIndex = solrIndex;
105
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
106
        //this.systemMetadataListener = systemMetadataListener;
107
        //this.mNode = new MNode(buildMNBaseURL());
108
      
109
    }
110
    
111
   
112
    
113
    /**
114
     * Build the index for all documents.
115
     * @throws SolrServerException 
116
     * @throws ServiceFailure 
117
     * @throws NotImplemented 
118
     * @throws NotAuthorized 
119
     * @throws InvalidToken 
120
     * @throws InvalidRequest 
121
     * @throws IndexEventLogException 
122
     * @throws IllegalAccessException 
123
     * @throws InstantiationException 
124
     * @throws ClassNotFoundException 
125
     * @throws ParserConfigurationException 
126
     * @throws SAXException 
127
     * @throws IOException 
128
     * @throws UnsupportedType 
129
     * @throws NotFound 
130
     * @throws XPathExpressionException 
131
     * @throws OREParserException 
132
     */
133
    public void indexAll() throws InvalidRequest, InvalidToken,
134
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
135
        Date since = null;
136
        Date until = null;
137
        index(since, until);
138
    }
139
    
140
    /**
141
     * Build the index for the docs which have been modified since the specified date.
142
     * @param since
143
     * @throws SolrServerException 
144
     * @throws ServiceFailure 
145
     * @throws NotImplemented 
146
     * @throws NotAuthorized 
147
     * @throws InvalidToken 
148
     * @throws InvalidRequest 
149
     * @throws IndexEventLogException 
150
     * @throws IllegalAccessException 
151
     * @throws InstantiationException 
152
     * @throws ClassNotFoundException 
153
     * @throws ParserConfigurationException 
154
     * @throws SAXException 
155
     * @throws IOException 
156
     * @throws UnsupportedType 
157
     * @throws NotFound 
158
     * @throws XPathExpressionException 
159
     * @throws OREParserException 
160
     */
161
    public void index(Date since) throws InvalidRequest, InvalidToken, 
162
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
163
        Date until = null;
164
        index(since, until);
165
    }
166
    
167
    /**
168
     *  Build the index for the docs which have been modified between the specified date.s
169
     * @param since
170
     * @param until
171
     * @throws SolrServerException 
172
     * @throws ServiceFailure 
173
     * @throws NotImplemented 
174
     * @throws NotAuthorized 
175
     * @throws InvalidToken 
176
     * @throws InvalidRequest 
177
     * @throws IndexEventLogException 
178
     * @throws IllegalAccessException 
179
     * @throws InstantiationException 
180
     * @throws ClassNotFoundException 
181
     * @throws ParserConfigurationException 
182
     * @throws SAXException 
183
     * @throws IOException 
184
     * @throws UnsupportedType 
185
     * @throws NotFound 
186
     * @throws XPathExpressionException 
187
     * @throws OREParserException 
188
     */
189
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
190
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
191
        Date processedDate = null;
192
        List<String> solrIds = null;
193
        initSystemMetadataMap();
194
        initObjectPathMap();
195
        initIndexQueue();
196
        List[] metacatIds = getMetacatIds(since, until);
197
        List<String> otherMetacatIds = metacatIds[FIRST];
198
        List<String> resourceMapIds =  metacatIds[SECOND];
199
        //List<String> otherDeletedMetacatIds = metacatIds[THIRD];
200
        //List<String> resourceMapDeletedIds = metacatIds[FOURTH];
201
        
202
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
203
        List<Long> maxCollection = new ArrayList<Long>();
204
        Date latestOtherId = null;
205
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
206
            int size = otherMetacatIds.size();
207
            String id = otherMetacatIds.get(size-1);
208
            SystemMetadata sysmeta = getSystemMetadata(id);
209
            latestOtherId = sysmeta.getDateSysMetadataModified();
210
            maxCollection.add(new Long(latestOtherId.getTime()));
211
        }
212
        
213
        /*Date latestDeletedOtherIds = null;
214
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
215
            int size = otherDeletedMetacatIds.size();
216
            String id = otherDeletedMetacatIds.get(size-1);
217
            SystemMetadata sysmeta = getSystemMetadata(id);
218
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
219
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
220
        }*/
221
        
222
        Date latestResourceId = null;
223
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
224
            int size = resourceMapIds.size();
225
            String id = resourceMapIds.get(size-1);
226
            SystemMetadata sysmeta = getSystemMetadata(id);
227
            latestResourceId = sysmeta.getDateSysMetadataModified();
228
            maxCollection.add(new Long(latestResourceId.getTime()));
229
        }
230
        
231
        /*Date latestDeletedResourceId = null;
232
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
233
            int size = resourceMapDeletedIds.size();
234
            String id = resourceMapDeletedIds.get(size-1);
235
            SystemMetadata sysmeta = getSystemMetadata(id);
236
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
237
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
238
        }*/
239
        
240
        if(!maxCollection.isEmpty()) {
241
            Long max = Collections.max(maxCollection);
242
            processedDate = new Date(max.longValue());
243
        }
244
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
245
            processedDate = latestOtherId;
246
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
247
            processedDate = latestResourceId;
248
        } else if (latestOtherId == null && latestResourceId != null) {
249
            processedDate = latestResourceId;
250
        } else if (latestOtherId != null && latestResourceId == null) {
251
            processedDate = latestOtherId;
252
        }*/
253
        
254
        
255
        //add the failedPids 
256
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
257
        List<String> failedOtherIds = new ArrayList<String>();
258
        List<String> failedResourceMapIds = new ArrayList<String>();
259
        if(failedEvents != null) {
260
            for(IndexEvent event : failedEvents) {
261
            	String id = event.getIdentifier().getValue();
262
                SystemMetadata sysmeta = getSystemMetadata(id);
263
                if(sysmeta != null) {
264
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
265
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
266
                        failedResourceMapIds.add(id);
267
                    } else {
268
                        failedOtherIds.add(id);
269
                    }
270
                }
271
            }
272
        }
273
        //indexFailedIds(failedOtherIds);
274
        //indexFailedIds(failedResourceMapIds);
275
        
276
        index(failedOtherIds);
277
        index(failedResourceMapIds);
278
        
279
        /*if(!failedOtherIds.isEmpty()) {
280
            failedOtherIds.addAll(otherMetacatIds);
281
        } else {
282
            failedOtherIds = otherMetacatIds;
283
        }
284
        
285
        if(!failedResourceMapIds.isEmpty()) {
286
            failedResourceMapIds.addAll(resourceMapIds);
287
        } else {
288
            failedResourceMapIds = resourceMapIds;
289
        }*/
290
        //log.info("the ids in index_event for reindex ( except the resourcemap)=====================================\n "+failedOtherIds);
291
        //log.info("the resourcemap ids in index_event for reindex =====================================\n "+failedResourceMapIds);
292
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
293
        //logFile(otherMetacatIds, "ids-for-timed-indexing-log");
294
        //log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
295
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
296
        //logFile(resourceMapIds, "ids-for-timed-indexing-log");
297
        //log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
298
        index(otherMetacatIds);
299
        //removeIndex(otherDeletedMetacatIds);
300
        index(resourceMapIds);
301
        //removeIndex(resourceMapDeletedIds);
302
       
303
        //record the timed index.
304
        if(processedDate != null) {
305
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
306
        }
307
        
308
    }
309
    
310
    /*
311
     * Write the docids which will be indexed into a file. 
312
     */
313
    /*private void logFile(List<String> ids, String fileName)  {
314
        if(ids != null) {
315
            try {
316
                String tempDir = System.getProperty("java.io.tmpdir");
317
                log.info("the temp dir is ===================== "+tempDir);
318
                File idsForIndex = new File(tempDir, fileName);
319
                if(!idsForIndex.exists()) {
320
                    idsForIndex.createNewFile();
321
                } 
322
                
323
                Date date = Calendar.getInstance().getTime();
324
                SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
325
                String dateStr = format.format(date);
326
                List<String> dateList = new ArrayList<String>();
327
                dateList.add(dateStr);
328
                Boolean append = true;
329
                FileUtils.writeLines(idsForIndex, dateList, append);//write time string
330
                FileUtils.writeLines(idsForIndex, ids, append);
331
            } catch (Exception e) {
332
                log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage());
333
            }
334
           
335
        }
336
    }*/
337
    /*
338
     * Doing index
339
     */
340
    private void index(List<String> metacatIds) {
341
        if(metacatIds != null) {
342
            for(String metacatId : metacatIds) {
343
                if(metacatId != null) {
344
                     generateIndex(metacatId);
345
                }
346
            }
347
        }
348
    }
349
    
350
    /*
351
     * Index those ids which failed in the process (We got them from the EventLog)
352
     */
353
    /*private void indexFailedIds(List<IndexEvent> events) {
354
        if(events != null) {
355
            for(IndexEvent event : events) {
356
                if(event != null) {
357
                    Identifier identifier = event.getIdentifier();
358
                    if(identifier != null) {
359
                        String id = identifier.getValue();
360
                        if(id != null) {
361
                            Event action = event.getAction();
362
                            //if (action != null && action.equals(Event.CREATE)) {
363
                                try {
364
                                    generateIndex(id);
365
                                    EventlogFactory.createIndexEventLog().remove(identifier);
366
                                } catch (Exception e) {
367
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage());
368
                                }
369
                            
370
                        }
371
                    }
372
                }
373
            }
374
        }
375
    }*/
376
    
377
    public void run() {
378
    
379
        try {
380
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
381
            index(since);
382
        } catch (InvalidRequest e) {
383
            // TODO Auto-generated catch block
384
            //e.printStackTrace();
385
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
386
        } catch (InvalidToken e) {
387
            // TODO Auto-generated catch block
388
            //e.printStackTrace();
389
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
390
        } catch (NotAuthorized e) {
391
            // TODO Auto-generated catch block
392
            //e.printStackTrace();
393
        } catch (NotImplemented e) {
394
            // TODO Auto-generated catch block
395
            //e.printStackTrace();
396
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
397
        } catch (ServiceFailure e) {
398
            // TODO Auto-generated catch block
399
            //e.printStackTrace();
400
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
401
        } catch (SolrServerException e) {
402
            // TODO Auto-generated catch block
403
            //e.printStackTrace();
404
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
405
        } catch (FileNotFoundException e) {
406
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
407
        } catch (ClassNotFoundException e) {
408
            // TODO Auto-generated catch block
409
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
410
        } catch (InstantiationException e) {
411
            // TODO Auto-generated catch block
412
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
413
        } catch (IllegalAccessException e) {
414
            // TODO Auto-generated catch block
415
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
416
        } catch (IndexEventLogException e) {
417
            // TODO Auto-generated catch block
418
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
419
        } catch (XPathExpressionException e) {
420
            // TODO Auto-generated catch block
421
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
422
        } catch (NotFound e) {
423
            // TODO Auto-generated catch block
424
            e.printStackTrace();
425
        } catch (UnsupportedType e) {
426
            // TODO Auto-generated catch block
427
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
428
        } catch (IOException e) {
429
            // TODO Auto-generated catch block
430
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
431
        } catch (SAXException e) {
432
            // TODO Auto-generated catch block
433
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
434
        } catch (ParserConfigurationException e) {
435
            // TODO Auto-generated catch block
436
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
437
        } catch (OREParserException e) {
438
            // TODO Auto-generated catch block
439
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
440
        }
441
    }
442
    
443
   
444
    
445
    /*
446
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
447
     * 
448
     * If since and util are null, it will return all of them.
449
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
450
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
451
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
452
     */
453
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
454
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
455
        String fileName = "ids-from-hazelcast";
456
        List<String> resourceMapIds = new ArrayList();
457
        //List<String> resourceMapDeletedIds = new ArrayList();
458
        List<String> otherIds = new ArrayList();
459
        //List<String> otherDeletedIds = new ArrayList();
460
        List[] ids = new List[2];
461
        ids[FIRST]= otherIds;
462
        ids[SECOND] = resourceMapIds;
463
        //ids[THIRD]  = otherDeletedIds;
464
        //ids[FOURTH] = resourceMapDeletedIds;
465
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
466
        Date otherPreviousDate = null;
467
        Date otherDeletedPreviousDate = null;
468
        Date resourceMapPreviousDate = null;
469
        Date resourceMapDeletedPreviousDate = null;
470
        if(metacatIds != null) {
471
            for(Identifier identifier : metacatIds) {
472
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
473
                    List<String> idLog = new ArrayList<String>();
474
                    idLog.add(identifier.getValue());
475
                    //logFile(idLog, fileName);
476
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
477
                    if(sysmeta != null) {
478
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
479
                        //System.out.println("the object format id is "+formatId.getValue());
480
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
481
                        boolean correctTimeRange = false;
482
                        Date sysDate = sysmeta.getDateSysMetadataModified();
483
                        if(since == null && until == null) {
484
                            correctTimeRange = true;
485
                        } else if (since != null && until == null) {
486
                            if(sysDate.getTime() > since.getTime()) {
487
                                correctTimeRange = true;
488
                            }
489
                        } else if (since == null && until != null) {
490
                            if(sysDate.getTime() < until.getTime()) {
491
                                correctTimeRange = true;
492
                            }
493
                        } else if (since != null && until != null) {
494
                            if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) {
495
                                correctTimeRange = true;
496
                            }
497
                        }
498
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
499
                            //for the resource map
500
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
501
                                //archived ids
502
                                if(!resourceMapDeletedIds.isEmpty()) {
503
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
504
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
505
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
506
                                    } else {
507
                                        int size = resourceMapDeletedIds.size();//
508
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
509
                                    }
510
                                } else {
511
                                    resourceMapDeletedIds.add(identifier.getValue());
512
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
513
                                }
514
                            } else {*/
515
                                // for all ids
516
                                if(!resourceMapIds.isEmpty()) {
517
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
518
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
519
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
520
                                    } else {
521
                                        int size = resourceMapIds.size();//
522
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
523
                                    }
524
                                } else {
525
                                    resourceMapIds.add(identifier.getValue());
526
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
527
                                }
528
                            //}
529
                        } else if (correctTimeRange) {
530
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
531
                                //for the archived ids
532
                                if(!otherDeletedIds.isEmpty()) {
533
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
534
                                        otherDeletedIds.add(identifier.getValue());
535
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
536
                                    } else {
537
                                        int size = otherDeletedIds.size();
538
                                        otherDeletedIds.add(size-1, identifier.getValue());
539
                                    }
540
                                } else {
541
                                    otherDeletedIds.add(identifier.getValue());
542
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
543
                                }
544
                            } else {*/
545
                                //for all ids
546
                                if(!otherIds.isEmpty()) {
547
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
548
                                        otherIds.add(identifier.getValue());
549
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
550
                                    } else {
551
                                        int size = otherIds.size();
552
                                        otherIds.add(size-1, identifier.getValue());
553
                                    }
554
                                } else {
555
                                    otherIds.add(identifier.getValue());
556
                                    otherPreviousDate = sysDate;//init otherPreviousDate
557
                                }
558
                            //}
559
                        }
560
                        
561
                    }
562
                }
563
            }
564
        }
565
        return ids;
566
    }
567
    
568
    /*
569
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
570
     */
571
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
572
        boolean isResourceMap = false;
573
        if(formatId != null && resourceMapNamespaces != null) {
574
            for(String namespace : resourceMapNamespaces) {
575
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
576
                    isResourceMap = true;
577
                    break;
578
                }
579
            }
580
        }
581
        return isResourceMap;
582
    }
583
    
584
   
585
    
586
    /*
587
     * Generate index for the id.
588
     */
589
    private void generateIndex(String id)  {
590
        //if id is null and sysmeta will be null. If sysmeta is null, it will be caught in solrIndex.update
591
        SystemMetadata sysmeta = getSystemMetadata(id);
592
        Identifier pid = new Identifier();
593
        pid.setValue(id);
594
        solrIndex.update(pid, sysmeta);
595
 
596
    }
597
    
598
    /*
599
     * Remove the solr index for the list of ids
600
     */
601
    /*private void removeIndex(List<String> ids) {
602
        if(ids!= null) {
603
            for(String id :ids) {
604
                try {
605
                    removeIndex(id);
606
                } catch (Exception e) {
607
                    IndexEvent event = new IndexEvent();
608
                    Identifier pid = new Identifier();
609
                    pid.setValue(id);
610
                    event.setIdentifier(pid);
611
                    event.setDate(Calendar.getInstance().getTime());
612
                    event.setAction(Event.DELETE);
613
                    String error = "IndexGenerator.index - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage();
614
                    event.setDescription(error);
615
                    try {
616
                        EventlogFactory.createIndexEventLog().write(event);
617
                    } catch (Exception ee) {
618
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
619
                    }
620
                    log.error(error);
621
                }
622
                
623
            }
624
        }
625
    }*/
626
    
627
    /*
628
     * Remove the index for the id
629
     */
630
    /*private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException, OREParserException  {
631
        if(id != null) {
632
            //solrIndex.remove(id);
633
        }
634
    }*/
635
    
636
    /*
637
     * Initialize the system metadata map
638
     */
639
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
640
        int times = 0;
641
        if(systemMetadataMap == null) {
642
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
643
        }
644
    }
645
    
646
    /*
647
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
648
     */
649
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
650
        if(objectPathMap == null) {
651
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
652
        }
653
    }
654
    
655
    
656
    
657
    /*
658
     * Initialize the index queue
659
     */
660
    private void initIndexQueue() throws FileNotFoundException, ServiceFailure {
661
        if(indexQueue == null) {
662
            indexQueue = DistributedMapsFactory.getIndexQueue();
663
        }
664
    }
665
    /**
666
     * Get an InputStream as the data object for the specific pid.
667
     * @param pid
668
     * @return
669
     * @throws FileNotFoundException
670
     */
671
    private InputStream getDataObject(String pid) throws FileNotFoundException {
672
        Identifier identifier = new Identifier();
673
        identifier.setValue(pid);
674
        String objectPath = objectPathMap.get(identifier);
675
        InputStream data = null;
676
        data = new FileInputStream(objectPath);
677
        return data;
678

  
679
    }
680
    
681
    /**
682
     * Get the SystemMetadata for the specified id from the distributed Map.
683
     * The null maybe is returned if there is no system metadata found.
684
     * @param id  the specified id.
685
     * @return the SystemMetadata associated with the id.
686
     */
687
    private SystemMetadata getSystemMetadata(String id) {
688
        SystemMetadata metadata = null;
689
        if(systemMetadataMap != null && id != null) {
690
            Identifier identifier = new Identifier();
691
            identifier.setValue(id);
692
            metadata = systemMetadataMap.get(identifier);
693
        }
694
        return metadata;
695
    }
696
    
697
    /**
698
     * Get the obsoletes chain of the specified id. The returned list doesn't include
699
     * the specified id itself. The newer version has the lower index number in the list.
700
     * Empty list will be returned if there is no document to be obsoleted by this id.
701
     * @param id
702
     * @return
703
     */
704
    private List<String> getObsoletes(String id) {
705
        List<String> obsoletes = new ArrayList<String>();
706
        while (id != null) {
707
            SystemMetadata metadata = getSystemMetadata(id);
708
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
709
            if(metadata != null) {
710
                Identifier identifier = metadata.getObsoletes();
711
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
712
                    obsoletes.add(identifier.getValue());
713
                    id = identifier.getValue();
714
                } 
715
            } 
716
        }
717
        return obsoletes;
718
    }
719
    
720
    /**
721
     * Overwrite and do nothing
722
     */
723
    public boolean cancel() {
724
        return true;
725
    }
726

  
727
}
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/ApplicationController.java
55 55
    private static ApplicationContext context = null;
56 56
    private String springConfigFileURL = "/index-processor-context.xml";
57 57
    private String metacatPropertiesFile = null;
58
    private static int waitingTime = IndexGenerator.WAITTIME;
59
    private static int maxAttempts = IndexGenerator.MAXWAITNUMBER;
58
    private static int waitingTime = IndexGeneratorTimerTask.WAITTIME;
59
    private static int maxAttempts = IndexGeneratorTimerTask.MAXWAITNUMBER;
60 60
    private static long period = DEFAULTINTERVAL;
61 61
    Log log = LogFactory.getLog(ApplicationController.class);
62 62
    
......
206 206
        if(period > 0) {
207 207
            SolrIndex index = solrIndexes.get(FIRST);
208 208
            //SystemMetadataEventListener listener = sysmetaListeners.get(FIRST);
209
            IndexGenerator generator = new IndexGenerator(index);
209
            IndexGeneratorTimerTask generator = new IndexGeneratorTimerTask(index);
210 210
            //Thread indexThread = new Thread(generator);
211 211
            //indexThread.start();
212 212
            Timer indexTimer = new Timer();
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff