Project

General

Profile

1 7606 tao
/**
2 8138 tao
 *  Copyright: 2013 Regents of the University of California and the
3 7606 tao
 *             National Center for Ecological Analysis and Synthesis
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
package edu.ucsb.nceas.metacat.index;
20
21 7691 tao
import java.io.FileInputStream;
22
import java.io.FileNotFoundException;
23 7876 tao
import java.io.IOException;
24 7606 tao
import java.io.InputStream;
25
import java.util.ArrayList;
26 7876 tao
import java.util.Collections;
27 7606 tao
import java.util.Date;
28
import java.util.List;
29 7788 tao
import java.util.TimerTask;
30 7606 tao
31 7876 tao
import javax.xml.parsers.ParserConfigurationException;
32
import javax.xml.xpath.XPathExpressionException;
33
34 7606 tao
import org.apache.commons.logging.Log;
35
import org.apache.commons.logging.LogFactory;
36
import org.apache.solr.client.solrj.SolrServerException;
37
import org.dataone.configuration.Settings;
38
import org.dataone.service.exceptions.InvalidRequest;
39
import org.dataone.service.exceptions.InvalidToken;
40
import org.dataone.service.exceptions.NotAuthorized;
41 7876 tao
import org.dataone.service.exceptions.NotFound;
42 7606 tao
import org.dataone.service.exceptions.NotImplemented;
43
import org.dataone.service.exceptions.ServiceFailure;
44 7876 tao
import org.dataone.service.exceptions.UnsupportedType;
45 7606 tao
import org.dataone.service.types.v1.Identifier;
46 7740 tao
import org.dataone.service.types.v1.ObjectFormatIdentifier;
47 8826 leinfelder
import org.dataone.service.types.v2.SystemMetadata;
48 8023 tao
import org.dspace.foresite.OREParserException;
49 7876 tao
import org.xml.sax.SAXException;
50 7606 tao
51 7691 tao
import com.hazelcast.core.IMap;
52 7793 tao
import com.hazelcast.core.ISet;
53 7606 tao
54 8464 leinfelder
import edu.ucsb.nceas.metacat.common.index.IndexTask;
55 7828 leinfelder
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent;
56 8864 tao
import edu.ucsb.nceas.metacat.common.resourcemap.ResourceMapNamespaces;
57 7802 tao
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
58 7806 tao
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
59 7691 tao
60 7802 tao
61 7606 tao
/**
62
 * A class represents the object to generate massive solr indexes.
63
 * This can happen during an update of Metacat (generating index for all existing documents)
64
 * or regenerate index for those documents
65
 * failing to build index during the insert or update.
66
 *
67
 * @author tao
68
 *
69
 */
70 8352 tao
public class IndexGeneratorTimerTask extends TimerTask {
71 7606 tao
72 7740 tao
    private static final int FIRST =0;
73
    private static final int SECOND =1;
74 7876 tao
    private static final int THIRD = 2;
75
    private static final int FOURTH = 3;
76 7774 tao
    public static final int WAITTIME = 10000;
77
    public static final int MAXWAITNUMBER = 180;
78 7606 tao
    private static final String HTTP = "http://";
79
    private static final String MNAPPENDIX = "/d1/mn";
80 8864 tao
    //private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
81 7774 tao
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
82
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
83 7748 tao
84
85 7606 tao
    private SolrIndex solrIndex = null;
86 7691 tao
    //private SystemMetadataEventListener systemMetadataListener = null;
87
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
88
    private IMap<Identifier, String> objectPathMap;
89 8464 leinfelder
    private IMap<Identifier, IndexTask> indexQueue;
90 8352 tao
    private Log log = LogFactory.getLog(IndexGeneratorTimerTask.class);
91 7793 tao
    //private MNode mNode = null;
92 7786 tao
    private static List<String> resourceMapNamespaces = null;
93 7606 tao
94
    /**
95
     * Constructor
96
     * @param solrIndex
97
     * @param systemMetadataListener
98
     */
99 8352 tao
    public IndexGeneratorTimerTask(SolrIndex solrIndex) {
100 7606 tao
        this.solrIndex = solrIndex;
101 8864 tao
        resourceMapNamespaces = ResourceMapNamespaces.getNamespaces();
102 7691 tao
        //this.systemMetadataListener = systemMetadataListener;
103 7793 tao
        //this.mNode = new MNode(buildMNBaseURL());
104 8292 tao
105 7606 tao
    }
106
107 8343 tao
108 7606 tao
109
    /**
110 7806 tao
     * Build the index for all documents.
111 7606 tao
     * @throws SolrServerException
112
     * @throws ServiceFailure
113
     * @throws NotImplemented
114
     * @throws NotAuthorized
115
     * @throws InvalidToken
116
     * @throws InvalidRequest
117 7806 tao
     * @throws IndexEventLogException
118
     * @throws IllegalAccessException
119
     * @throws InstantiationException
120
     * @throws ClassNotFoundException
121 7876 tao
     * @throws ParserConfigurationException
122
     * @throws SAXException
123
     * @throws IOException
124
     * @throws UnsupportedType
125
     * @throws NotFound
126
     * @throws XPathExpressionException
127 8023 tao
     * @throws OREParserException
128 7606 tao
     */
129 7806 tao
    public void indexAll() throws InvalidRequest, InvalidToken,
130 8023 tao
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
131 7606 tao
        Date since = null;
132
        Date until = null;
133 7806 tao
        index(since, until);
134 7606 tao
    }
135
136
    /**
137
     * Build the index for the docs which have been modified since the specified date.
138
     * @param since
139
     * @throws SolrServerException
140
     * @throws ServiceFailure
141
     * @throws NotImplemented
142
     * @throws NotAuthorized
143
     * @throws InvalidToken
144
     * @throws InvalidRequest
145 7806 tao
     * @throws IndexEventLogException
146
     * @throws IllegalAccessException
147
     * @throws InstantiationException
148
     * @throws ClassNotFoundException
149 7876 tao
     * @throws ParserConfigurationException
150
     * @throws SAXException
151
     * @throws IOException
152
     * @throws UnsupportedType
153
     * @throws NotFound
154
     * @throws XPathExpressionException
155 8023 tao
     * @throws OREParserException
156 7606 tao
     */
157 7806 tao
    public void index(Date since) throws InvalidRequest, InvalidToken,
158 8023 tao
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
159 7606 tao
        Date until = null;
160 7806 tao
        index(since, until);
161 7606 tao
    }
162
163
    /**
164
     *  Build the index for the docs which have been modified between the specified date.s
165
     * @param since
166
     * @param until
167
     * @throws SolrServerException
168
     * @throws ServiceFailure
169
     * @throws NotImplemented
170
     * @throws NotAuthorized
171
     * @throws InvalidToken
172
     * @throws InvalidRequest
173 7806 tao
     * @throws IndexEventLogException
174
     * @throws IllegalAccessException
175
     * @throws InstantiationException
176
     * @throws ClassNotFoundException
177 7876 tao
     * @throws ParserConfigurationException
178
     * @throws SAXException
179
     * @throws IOException
180
     * @throws UnsupportedType
181
     * @throws NotFound
182
     * @throws XPathExpressionException
183 8023 tao
     * @throws OREParserException
184 7606 tao
     */
185 7806 tao
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest,
186 8023 tao
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException {
187 7806 tao
        Date processedDate = null;
188 7606 tao
        List<String> solrIds = null;
189 7740 tao
        initSystemMetadataMap();
190
        initObjectPathMap();
191 8293 tao
        initIndexQueue();
192 7806 tao
        List[] metacatIds = getMetacatIds(since, until);
193
        List<String> otherMetacatIds = metacatIds[FIRST];
194
        List<String> resourceMapIds =  metacatIds[SECOND];
195 8291 tao
        //List<String> otherDeletedMetacatIds = metacatIds[THIRD];
196
        //List<String> resourceMapDeletedIds = metacatIds[FOURTH];
197 7806 tao
198
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
199 7876 tao
        List<Long> maxCollection = new ArrayList<Long>();
200 7806 tao
        Date latestOtherId = null;
201
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
202
            int size = otherMetacatIds.size();
203
            String id = otherMetacatIds.get(size-1);
204
            SystemMetadata sysmeta = getSystemMetadata(id);
205
            latestOtherId = sysmeta.getDateSysMetadataModified();
206 7876 tao
            maxCollection.add(new Long(latestOtherId.getTime()));
207 7606 tao
        }
208 7876 tao
209 8291 tao
        /*Date latestDeletedOtherIds = null;
210 7876 tao
        if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) {
211
            int size = otherDeletedMetacatIds.size();
212
            String id = otherDeletedMetacatIds.get(size-1);
213
            SystemMetadata sysmeta = getSystemMetadata(id);
214
            latestDeletedOtherIds = sysmeta.getDateSysMetadataModified();
215
            maxCollection.add(new Long(latestDeletedOtherIds.getTime()));
216 8291 tao
        }*/
217 7876 tao
218 7806 tao
        Date latestResourceId = null;
219
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
220
            int size = resourceMapIds.size();
221
            String id = resourceMapIds.get(size-1);
222
            SystemMetadata sysmeta = getSystemMetadata(id);
223
            latestResourceId = sysmeta.getDateSysMetadataModified();
224 7876 tao
            maxCollection.add(new Long(latestResourceId.getTime()));
225 7806 tao
        }
226 7876 tao
227 8291 tao
        /*Date latestDeletedResourceId = null;
228 7876 tao
        if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) {
229
            int size = resourceMapDeletedIds.size();
230
            String id = resourceMapDeletedIds.get(size-1);
231
            SystemMetadata sysmeta = getSystemMetadata(id);
232
            latestDeletedResourceId = sysmeta.getDateSysMetadataModified();
233
            maxCollection.add(new Long(latestDeletedResourceId.getTime()));
234 8291 tao
        }*/
235 7876 tao
236
        if(!maxCollection.isEmpty()) {
237
            Long max = Collections.max(maxCollection);
238
            processedDate = new Date(max.longValue());
239
        }
240
        /*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
241 7806 tao
            processedDate = latestOtherId;
242
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
243
            processedDate = latestResourceId;
244
        } else if (latestOtherId == null && latestResourceId != null) {
245
            processedDate = latestResourceId;
246
        } else if (latestOtherId != null && latestResourceId == null) {
247
            processedDate = latestOtherId;
248 7876 tao
        }*/
249 7806 tao
250 7876 tao
251 7806 tao
        //add the failedPids
252 7815 leinfelder
        List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null);
253 8293 tao
        List<String> failedOtherIds = new ArrayList<String>();
254
        List<String> failedResourceMapIds = new ArrayList<String>();
255 7815 leinfelder
        if(failedEvents != null) {
256
            for(IndexEvent event : failedEvents) {
257
            	String id = event.getIdentifier().getValue();
258 7806 tao
                SystemMetadata sysmeta = getSystemMetadata(id);
259 7857 tao
                if(sysmeta != null) {
260 7806 tao
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
261
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
262 8293 tao
                        failedResourceMapIds.add(id);
263 7806 tao
                    } else {
264 8293 tao
                        failedOtherIds.add(id);
265 7806 tao
                    }
266
                }
267
            }
268
        }
269 8293 tao
        //indexFailedIds(failedOtherIds);
270
        //indexFailedIds(failedResourceMapIds);
271 7806 tao
272 8293 tao
        index(failedOtherIds);
273
        index(failedResourceMapIds);
274
275 7857 tao
        /*if(!failedOtherIds.isEmpty()) {
276 7806 tao
            failedOtherIds.addAll(otherMetacatIds);
277
        } else {
278
            failedOtherIds = otherMetacatIds;
279
        }
280
281
        if(!failedResourceMapIds.isEmpty()) {
282
            failedResourceMapIds.addAll(resourceMapIds);
283
        } else {
284
            failedResourceMapIds = resourceMapIds;
285 7857 tao
        }*/
286 8293 tao
        //log.info("the ids in index_event for reindex ( except the resourcemap)=====================================\n "+failedOtherIds);
287
        //log.info("the resourcemap ids in index_event for reindex =====================================\n "+failedResourceMapIds);
288 7857 tao
        log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds);
289 8027 tao
        //logFile(otherMetacatIds, "ids-for-timed-indexing-log");
290 8291 tao
        //log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds);
291 7857 tao
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
292 8027 tao
        //logFile(resourceMapIds, "ids-for-timed-indexing-log");
293 8291 tao
        //log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds);
294 7857 tao
        index(otherMetacatIds);
295 8291 tao
        //removeIndex(otherDeletedMetacatIds);
296 7857 tao
        index(resourceMapIds);
297 8291 tao
        //removeIndex(resourceMapDeletedIds);
298 7740 tao
299 7806 tao
        //record the timed index.
300
        if(processedDate != null) {
301
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
302
        }
303 7734 tao
304 7740 tao
    }
305
306
    /*
307 7924 tao
     * Write the docids which will be indexed into a file.
308
     */
309 8084 tao
    /*private void logFile(List<String> ids, String fileName)  {
310 7924 tao
        if(ids != null) {
311 7931 tao
            try {
312
                String tempDir = System.getProperty("java.io.tmpdir");
313
                log.info("the temp dir is ===================== "+tempDir);
314 8017 tao
                File idsForIndex = new File(tempDir, fileName);
315 7931 tao
                if(!idsForIndex.exists()) {
316
                    idsForIndex.createNewFile();
317 8017 tao
                }
318
319 7931 tao
                Date date = Calendar.getInstance().getTime();
320
                SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z");
321
                String dateStr = format.format(date);
322
                List<String> dateList = new ArrayList<String>();
323
                dateList.add(dateStr);
324
                Boolean append = true;
325
                FileUtils.writeLines(idsForIndex, dateList, append);//write time string
326
                FileUtils.writeLines(idsForIndex, ids, append);
327
            } catch (Exception e) {
328
                log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage());
329 7924 tao
            }
330 7931 tao
331 7924 tao
        }
332 8084 tao
    }*/
333 7924 tao
    /*
334 7806 tao
     * Doing index
335 7740 tao
     */
336 7806 tao
    private void index(List<String> metacatIds) {
337 7688 tao
        if(metacatIds != null) {
338 7606 tao
            for(String metacatId : metacatIds) {
339
                if(metacatId != null) {
340 8293 tao
                     generateIndex(metacatId);
341 7606 tao
                }
342
            }
343 7688 tao
        }
344 7606 tao
    }
345
346 7857 tao
    /*
347
     * Index those ids which failed in the process (We got them from the EventLog)
348
     */
349 8293 tao
    /*private void indexFailedIds(List<IndexEvent> events) {
350 7857 tao
        if(events != null) {
351
            for(IndexEvent event : events) {
352
                if(event != null) {
353
                    Identifier identifier = event.getIdentifier();
354
                    if(identifier != null) {
355
                        String id = identifier.getValue();
356
                        if(id != null) {
357
                            Event action = event.getAction();
358 8291 tao
                            //if (action != null && action.equals(Event.CREATE)) {
359 7857 tao
                                try {
360
                                    generateIndex(id);
361
                                    EventlogFactory.createIndexEventLog().remove(identifier);
362
                                } catch (Exception e) {
363
                                    log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage());
364
                                }
365 8293 tao
366 7857 tao
                        }
367
                    }
368
                }
369
            }
370
        }
371 8293 tao
    }*/
372 7857 tao
373 7613 tao
    public void run() {
374 8343 tao
375 7613 tao
        try {
376 7806 tao
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
377
            index(since);
378 7613 tao
        } catch (InvalidRequest e) {
379
            // TODO Auto-generated catch block
380
            //e.printStackTrace();
381
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
382
        } catch (InvalidToken e) {
383
            // TODO Auto-generated catch block
384
            //e.printStackTrace();
385
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
386
        } catch (NotAuthorized e) {
387
            // TODO Auto-generated catch block
388
            //e.printStackTrace();
389
        } catch (NotImplemented e) {
390
            // TODO Auto-generated catch block
391
            //e.printStackTrace();
392
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
393
        } catch (ServiceFailure e) {
394
            // TODO Auto-generated catch block
395
            //e.printStackTrace();
396
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
397
        } catch (SolrServerException e) {
398
            // TODO Auto-generated catch block
399
            //e.printStackTrace();
400
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
401 7691 tao
        } catch (FileNotFoundException e) {
402
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
403 8343 tao
        } catch (ClassNotFoundException e) {
404 7806 tao
            // TODO Auto-generated catch block
405
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
406
        } catch (InstantiationException e) {
407
            // TODO Auto-generated catch block
408
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
409
        } catch (IllegalAccessException e) {
410
            // TODO Auto-generated catch block
411
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
412
        } catch (IndexEventLogException e) {
413
            // TODO Auto-generated catch block
414
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
415 7876 tao
        } catch (XPathExpressionException e) {
416
            // TODO Auto-generated catch block
417
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
418
        } catch (NotFound e) {
419
            // TODO Auto-generated catch block
420
            e.printStackTrace();
421
        } catch (UnsupportedType e) {
422
            // TODO Auto-generated catch block
423
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
424
        } catch (IOException e) {
425
            // TODO Auto-generated catch block
426
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
427
        } catch (SAXException e) {
428
            // TODO Auto-generated catch block
429
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
430
        } catch (ParserConfigurationException e) {
431
            // TODO Auto-generated catch block
432
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
433 8023 tao
        } catch (OREParserException e) {
434
            // TODO Auto-generated catch block
435
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
436 7806 tao
        }
437 7613 tao
    }
438
439 8293 tao
440 7606 tao
441
    /*
442 7806 tao
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
443
     *
444
     * If since and util are null, it will return all of them.
445 7740 tao
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
446 7806 tao
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
447
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
448 7606 tao
     */
449 7793 tao
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest,
450
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
451 8017 tao
        String fileName = "ids-from-hazelcast";
452 7740 tao
        List<String> resourceMapIds = new ArrayList();
453 8291 tao
        //List<String> resourceMapDeletedIds = new ArrayList();
454 7740 tao
        List<String> otherIds = new ArrayList();
455 8291 tao
        //List<String> otherDeletedIds = new ArrayList();
456
        List[] ids = new List[2];
457 7740 tao
        ids[FIRST]= otherIds;
458
        ids[SECOND] = resourceMapIds;
459 8291 tao
        //ids[THIRD]  = otherDeletedIds;
460
        //ids[FOURTH] = resourceMapDeletedIds;
461 7793 tao
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
462 7806 tao
        Date otherPreviousDate = null;
463 7876 tao
        Date otherDeletedPreviousDate = null;
464 7806 tao
        Date resourceMapPreviousDate = null;
465 7876 tao
        Date resourceMapDeletedPreviousDate = null;
466 7793 tao
        if(metacatIds != null) {
467
            for(Identifier identifier : metacatIds) {
468
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
469 8017 tao
                    List<String> idLog = new ArrayList<String>();
470
                    idLog.add(identifier.getValue());
471 8027 tao
                    //logFile(idLog, fileName);
472 7793 tao
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
473 7876 tao
                    if(sysmeta != null) {
474 7793 tao
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
475
                        //System.out.println("the object format id is "+formatId.getValue());
476
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
477
                        boolean correctTimeRange = false;
478
                        Date sysDate = sysmeta.getDateSysMetadataModified();
479
                        if(since == null && until == null) {
480
                            correctTimeRange = true;
481
                        } else if (since != null && until == null) {
482 7858 tao
                            if(sysDate.getTime() > since.getTime()) {
483 7793 tao
                                correctTimeRange = true;
484
                            }
485
                        } else if (since == null && until != null) {
486 7858 tao
                            if(sysDate.getTime() < until.getTime()) {
487 7793 tao
                                correctTimeRange = true;
488
                            }
489
                        } else if (since != null && until != null) {
490 7858 tao
                            if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) {
491 7793 tao
                                correctTimeRange = true;
492
                            }
493
                        }
494
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
495 7806 tao
                            //for the resource map
496 8291 tao
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
497 7876 tao
                                //archived ids
498
                                if(!resourceMapDeletedIds.isEmpty()) {
499
                                    if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) {
500
                                        resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
501
                                        resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
502
                                    } else {
503
                                        int size = resourceMapDeletedIds.size();//
504
                                        resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
505
                                    }
506 7806 tao
                                } else {
507 7876 tao
                                    resourceMapDeletedIds.add(identifier.getValue());
508
                                    resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate
509 7806 tao
                                }
510 8291 tao
                            } else {*/
511
                                // for all ids
512 7876 tao
                                if(!resourceMapIds.isEmpty()) {
513
                                    if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
514
                                        resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
515
                                        resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one
516
                                    } else {
517
                                        int size = resourceMapIds.size();//
518
                                        resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
519
                                    }
520
                                } else {
521
                                    resourceMapIds.add(identifier.getValue());
522
                                    resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate
523
                                }
524 8291 tao
                            //}
525 7838 tao
                        } else if (correctTimeRange) {
526 8291 tao
                            /*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) {
527 7876 tao
                                //for the archived ids
528
                                if(!otherDeletedIds.isEmpty()) {
529
                                    if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) {
530
                                        otherDeletedIds.add(identifier.getValue());
531
                                        otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one
532
                                    } else {
533
                                        int size = otherDeletedIds.size();
534
                                        otherDeletedIds.add(size-1, identifier.getValue());
535
                                    }
536 7740 tao
                                } else {
537 7876 tao
                                    otherDeletedIds.add(identifier.getValue());
538
                                    otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate
539 7740 tao
                                }
540 8291 tao
                            } else {*/
541
                                //for all ids
542 7876 tao
                                if(!otherIds.isEmpty()) {
543
                                    if(sysDate.getTime() > otherPreviousDate.getTime()) {
544
                                        otherIds.add(identifier.getValue());
545
                                        otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one
546
                                    } else {
547
                                        int size = otherIds.size();
548
                                        otherIds.add(size-1, identifier.getValue());
549
                                    }
550
                                } else {
551
                                    otherIds.add(identifier.getValue());
552
                                    otherPreviousDate = sysDate;//init otherPreviousDate
553
                                }
554 8291 tao
                            //}
555 7607 tao
                        }
556 7806 tao
557 7607 tao
                    }
558
                }
559
            }
560 7806 tao
        }
561 7606 tao
        return ids;
562
    }
563
564
    /*
565 7740 tao
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
566
     */
567 8864 tao
   public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
568
       return ResourceMapNamespaces.isResourceMap(formatId);
569 7740 tao
    }
570
571 7793 tao
572 7606 tao
573
    /*
574
     * Generate index for the id.
575
     */
576 8293 tao
    private void generateIndex(String id)  {
577 8343 tao
        //if id is null and sysmeta will be null. If sysmeta is null, it will be caught in solrIndex.update
578
        SystemMetadata sysmeta = getSystemMetadata(id);
579
        Identifier pid = new Identifier();
580
        pid.setValue(id);
581
        solrIndex.update(pid, sysmeta);
582
583 7606 tao
    }
584 7691 tao
585
    /*
586 7876 tao
     * Remove the solr index for the list of ids
587
     */
588 8291 tao
    /*private void removeIndex(List<String> ids) {
589 7876 tao
        if(ids!= null) {
590
            for(String id :ids) {
591 8035 tao
                try {
592
                    removeIndex(id);
593
                } catch (Exception e) {
594
                    IndexEvent event = new IndexEvent();
595
                    Identifier pid = new Identifier();
596
                    pid.setValue(id);
597
                    event.setIdentifier(pid);
598
                    event.setDate(Calendar.getInstance().getTime());
599
                    event.setAction(Event.DELETE);
600
                    String error = "IndexGenerator.index - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage();
601
                    event.setDescription(error);
602
                    try {
603
                        EventlogFactory.createIndexEventLog().write(event);
604
                    } catch (Exception ee) {
605
                        log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage());
606
                    }
607
                    log.error(error);
608
                }
609
610 7876 tao
            }
611
        }
612 8291 tao
    }*/
613 7876 tao
614
    /*
615 7857 tao
     * Remove the index for the id
616
     */
617 8291 tao
    /*private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException, OREParserException  {
618 7857 tao
        if(id != null) {
619 8287 tao
            //solrIndex.remove(id);
620 7857 tao
        }
621 8291 tao
    }*/
622 7857 tao
623
    /*
624 7691 tao
     * Initialize the system metadata map
625
     */
626
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
627
        int times = 0;
628
        if(systemMetadataMap == null) {
629 7774 tao
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
630 7691 tao
        }
631
    }
632
633
    /*
634
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
635
     */
636
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
637
        if(objectPathMap == null) {
638
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
639
        }
640
    }
641 8293 tao
642
643
644
    /*
645
     * Initialize the index queue
646
     */
647
    private void initIndexQueue() throws FileNotFoundException, ServiceFailure {
648
        if(indexQueue == null) {
649
            indexQueue = DistributedMapsFactory.getIndexQueue();
650
        }
651
    }
652 7691 tao
    /**
653
     * Get an InputStream as the data object for the specific pid.
654
     * @param pid
655
     * @return
656
     * @throws FileNotFoundException
657
     */
658
    private InputStream getDataObject(String pid) throws FileNotFoundException {
659
        Identifier identifier = new Identifier();
660
        identifier.setValue(pid);
661
        String objectPath = objectPathMap.get(identifier);
662
        InputStream data = null;
663
        data = new FileInputStream(objectPath);
664
        return data;
665
666
    }
667
668
    /**
669
     * Get the SystemMetadata for the specified id from the distributed Map.
670
     * The null maybe is returned if there is no system metadata found.
671
     * @param id  the specified id.
672
     * @return the SystemMetadata associated with the id.
673
     */
674
    private SystemMetadata getSystemMetadata(String id) {
675
        SystemMetadata metadata = null;
676
        if(systemMetadataMap != null && id != null) {
677
            Identifier identifier = new Identifier();
678
            identifier.setValue(id);
679
            metadata = systemMetadataMap.get(identifier);
680
        }
681
        return metadata;
682
    }
683
684
    /**
685
     * Get the obsoletes chain of the specified id. The returned list doesn't include
686
     * the specified id itself. The newer version has the lower index number in the list.
687
     * Empty list will be returned if there is no document to be obsoleted by this id.
688
     * @param id
689
     * @return
690
     */
691
    private List<String> getObsoletes(String id) {
692
        List<String> obsoletes = new ArrayList<String>();
693
        while (id != null) {
694
            SystemMetadata metadata = getSystemMetadata(id);
695
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
696
            if(metadata != null) {
697
                Identifier identifier = metadata.getObsoletes();
698
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
699
                    obsoletes.add(identifier.getValue());
700
                    id = identifier.getValue();
701
                }
702
            }
703
        }
704
        return obsoletes;
705
    }
706 7788 tao
707
    /**
708
     * Overwrite and do nothing
709
     */
710
    public boolean cancel() {
711
        return true;
712
    }
713 7691 tao
714 7606 tao
}