Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.FileInputStream;
30
import java.io.FileNotFoundException;
31
import java.io.InputStream;
32
import java.util.ArrayList;
33
import java.util.Date;
34
import java.util.List;
35
import java.util.TimerTask;
36
import java.util.Vector;
37

    
38
import org.apache.commons.logging.Log;
39
import org.apache.commons.logging.LogFactory;
40
import org.apache.solr.client.solrj.SolrServerException;
41
import org.dataone.client.MNode;
42
import org.dataone.configuration.Settings;
43
import org.dataone.service.exceptions.InvalidRequest;
44
import org.dataone.service.exceptions.InvalidToken;
45
import org.dataone.service.exceptions.NotAuthorized;
46
import org.dataone.service.exceptions.NotImplemented;
47
import org.dataone.service.exceptions.ServiceFailure;
48
import org.dataone.service.types.v1.Identifier;
49
import org.dataone.service.types.v1.ObjectFormatIdentifier;
50
import org.dataone.service.types.v1.ObjectInfo;
51
import org.dataone.service.types.v1.ObjectList;
52
import org.dataone.service.types.v1.SystemMetadata;
53

    
54
import com.hazelcast.core.IMap;
55
import com.hazelcast.core.ISet;
56

    
57

    
58
/**
59
 * A class represents the object to generate massive solr indexes.
60
 * This can happen during an update of Metacat (generating index for all existing documents)
61
 * or regenerate index for those documents
62
 * failing to build index during the insert or update.
63
 * 
64
 * @author tao
65
 *
66
 */
67
public class IndexGenerator extends TimerTask {
68
    
69
    private static final int FIRST =0;
70
    private static final int SECOND =1;
71
    public static final int WAITTIME = 10000;
72
    public static final int MAXWAITNUMBER = 180;
73
    private static final String HTTP = "http://";
74
    private static final String MNAPPENDIX = "/d1/mn";
75
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
76
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
77
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
78
    
79
    private static int waitingTime = WAITTIME;
80
    private static int maxAttempts = MAXWAITNUMBER;
81
    
82
    private SolrIndex solrIndex = null;
83
    //private SystemMetadataEventListener systemMetadataListener = null;
84
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
85
    private IMap<Identifier, String> objectPathMap;
86
    private Log log = LogFactory.getLog(IndexGenerator.class);
87
    //private MNode mNode = null;
88
    private static List<String> resourceMapNamespaces = null;
89
    
90
    /**
91
     * Constructor
92
     * @param solrIndex
93
     * @param systemMetadataListener
94
     */
95
    public IndexGenerator(SolrIndex solrIndex) {
96
        this.solrIndex = solrIndex;
97
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
98
        //this.systemMetadataListener = systemMetadataListener;
99
        //this.mNode = new MNode(buildMNBaseURL());
100
        try {
101
            waitingTime = Settings.getConfiguration().getInt(WAITIMEPOPERTYNAME);
102
            maxAttempts = Settings.getConfiguration().getInt(MAXATTEMPTSPROPERTYNAME);
103
        } catch (Exception e) {
104
            log.warn("IndexGenerator.constructor - couldn't read the waiting time or maxattempts from the metacat.properties file since : "+e.getMessage()+". Default values will be used");
105
            waitingTime = WAITTIME;
106
            maxAttempts = MAXWAITNUMBER;
107
        }
108
    }
109
    
110
    /**
111
     * Build the index for all documents in Metacat without overwriting.
112
     * @throws SolrServerException 
113
     * @throws ServiceFailure 
114
     * @throws NotImplemented 
115
     * @throws NotAuthorized 
116
     * @throws InvalidToken 
117
     * @throws InvalidRequest 
118
     */
119
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
120
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
121
        boolean force = false;
122
        indexAll(force);
123
    }
124
    
125
    /**
126
     * Build the index for all documents. If force is true, the existed index for documents
127
     * will be overwritten. 
128
     * @param force
129
     * @throws SolrServerException 
130
     * @throws ServiceFailure 
131
     * @throws NotImplemented 
132
     * @throws NotAuthorized 
133
     * @throws InvalidToken 
134
     * @throws InvalidRequest 
135
     */
136
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
137
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
138
        Date since = null;
139
        Date until = null;
140
        index(since, until, force);
141
    }
142
    
143
    /**
144
     * Build the index for the docs which have been modified since the specified date.
145
     * @param since
146
     * @param force 
147
     * @throws SolrServerException 
148
     * @throws ServiceFailure 
149
     * @throws NotImplemented 
150
     * @throws NotAuthorized 
151
     * @throws InvalidToken 
152
     * @throws InvalidRequest 
153
     */
154
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
155
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
156
        Date until = null;
157
        index(since, until, force);
158
    }
159
    
160
    /**
161
     *  Build the index for the docs which have been modified between the specified date.s
162
     * @param since
163
     * @param until
164
     * @param force
165
     * @throws SolrServerException 
166
     * @throws ServiceFailure 
167
     * @throws NotImplemented 
168
     * @throws NotAuthorized 
169
     * @throws InvalidToken 
170
     * @throws InvalidRequest 
171
     * @throws FileNotFoundException 
172
     */
173
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
174
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
175
        List<String> solrIds = null;
176
        initSystemMetadataMap();
177
        initObjectPathMap();
178
        List[] allMetacatIds = getMetacatIds(since, until);
179
        List<String> otherMetacatIds = allMetacatIds[FIRST];
180
        List<String> resourceMapIds = allMetacatIds[SECOND];
181
        log.info("the metacat ids (exception resource map -----------------------------"+otherMetacatIds);
182
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
183
        if(!force) {
184
            solrIds = getSolrDocIds();
185
        }
186
        log.info("the solr ids -----------------------------"+solrIds);
187
        index(otherMetacatIds, solrIds, force);
188
        index(resourceMapIds, solrIds, force);
189
       
190
        
191
    }
192
    
193
    /*
194
     * Doing index when it is necessary when compare metacatids and solrids.
195
     */
196
    private void index(List<String> metacatIds, List<String>solrIds, boolean force) {
197
        if(metacatIds != null) {
198
            for(String metacatId : metacatIds) {
199
                if(metacatId != null) {
200
                    boolean buildIndex = true;
201
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
202
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
203
                        buildIndex = false;
204
                    }
205
                    if(buildIndex) {
206
                        try {
207
                            generateIndex(metacatId);
208
                        } catch (Exception e) {
209
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
210
                        }
211
                        
212
                    }
213
                }
214
            }
215
        }
216
    }
217
    
218
    public void run() {
219
        try {
220
            indexAll();
221
        } catch (InvalidRequest e) {
222
            // TODO Auto-generated catch block
223
            //e.printStackTrace();
224
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
225
        } catch (InvalidToken e) {
226
            // TODO Auto-generated catch block
227
            //e.printStackTrace();
228
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
229
        } catch (NotAuthorized e) {
230
            // TODO Auto-generated catch block
231
            //e.printStackTrace();
232
        } catch (NotImplemented e) {
233
            // TODO Auto-generated catch block
234
            //e.printStackTrace();
235
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
236
        } catch (ServiceFailure e) {
237
            // TODO Auto-generated catch block
238
            //e.printStackTrace();
239
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
240
        } catch (SolrServerException e) {
241
            // TODO Auto-generated catch block
242
            //e.printStackTrace();
243
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
244
        } catch (FileNotFoundException e) {
245
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
246
        }
247
    }
248
    
249
    /*
250
     * Get the indexed ids list from the solr server.
251
     * An empty list will be returned if there is no ids.
252
     */
253
    private List<String> getSolrDocIds() throws SolrServerException {
254
        List<String> ids = solrIndex.getSolrIds();
255
        return ids;
256
    }
257
    
258
    /*
259
     * Get an array of the list of ids of the metacat. If since and util are null, it will return all of them.
260
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
261
     * The reason to split them is when we index the resource map, we need the index of the document in the resource map ready.
262
     */
263
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
264
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
265
        
266
        List<String> resourceMapIds = new ArrayList();
267
        List<String> otherIds = new ArrayList();
268
        List[] ids = new List[2];
269
        ids[FIRST]= otherIds;
270
        ids[SECOND] = resourceMapIds;
271
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
272
        if(metacatIds != null) {
273
            for(Identifier identifier : metacatIds) {
274
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
275
                    SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
276
                    if(sysmeta != null && !sysmeta.getArchived()) {
277
                        ObjectFormatIdentifier formatId =sysmeta.getFormatId();
278
                        //System.out.println("the object format id is "+formatId.getValue());
279
                        //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
280
                        boolean correctTimeRange = false;
281
                        Date sysDate = sysmeta.getDateSysMetadataModified();
282
                        if(since == null && until == null) {
283
                            correctTimeRange = true;
284
                        } else if (since != null && until == null) {
285
                            if(sysDate.getTime() >= since.getTime()) {
286
                                correctTimeRange = true;
287
                            }
288
                        } else if (since == null && until != null) {
289
                            if(sysDate.getTime() <= until.getTime()) {
290
                                correctTimeRange = true;
291
                            }
292
                        } else if (since != null && until != null) {
293
                            if(sysDate.getTime() >= since.getTime() && sysDate.getTime() <= until.getTime()) {
294
                                correctTimeRange = true;
295
                            }
296
                        }
297
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
298
                            resourceMapIds.add(identifier.getValue());
299
                        } else {
300
                            otherIds.add(identifier.getValue());
301
                        }
302
                    }
303
                }
304
            }
305
        }
306
        /*if(objects != null) {
307
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
308
            if(objectInfoList != null) {
309
                for(ObjectInfo info : objectInfoList) {
310
                    if(info != null) {
311
                        Identifier identifier = info.getIdentifier();
312
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
313
                            SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
314
                            if(sysmeta != null && !sysmeta.getArchived()) {
315
                                ObjectFormatIdentifier formatId =sysmeta.getFormatId();
316
                                //System.out.println("the object format id is "+formatId.getValue());
317
                                //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
318
                                if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
319
                                    resourceMapIds.add(identifier.getValue());
320
                                } else {
321
                                    otherIds.add(identifier.getValue());
322
                                }
323
                            }
324
                            //ids.add(identifier.getValue());
325
                        }
326
                    }
327
                }
328
            }
329
        }*/
330
        return ids;
331
    }
332
    
333
    /*
334
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
335
     */
336
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
337
        boolean isResourceMap = false;
338
        if(formatId != null && resourceMapNamespaces != null) {
339
            for(String namespace : resourceMapNamespaces) {
340
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
341
                    isResourceMap = true;
342
                    break;
343
                }
344
            }
345
        }
346
        return isResourceMap;
347
    }
348
    
349
   
350
    
351
    /*
352
     * Generate index for the id.
353
     */
354
    private void generateIndex(String id) throws Exception {
355
        if(id != null)  {
356
                SystemMetadata sysmeta = getSystemMetadata(id);
357
                //only update none-archived id.
358
                if(sysmeta != null && !sysmeta.getArchived()) {
359
                        InputStream data = getDataObject(id);
360
                        Identifier obsolete = sysmeta.getObsoletes();
361
                        List<String> obsoleteChain = null;
362
                        if(obsolete != null) {
363
                            obsoleteChain = getObsoletes(id);
364
                        } 
365
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
366
                } else {
367
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
368
                }
369
           
370
        }
371
    }
372
    
373
    /*
374
     * Initialize the system metadata map
375
     */
376
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
377
        int times = 0;
378
        if(systemMetadataMap == null) {
379
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
380
            /*while(true) {
381
                try {
382
                    systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
383
                    break;
384
                } catch (FileNotFoundException e) {
385
                    throw e;
386
                } catch (ServiceFailure e) {
387
                    if(times <= maxAttempts) {
388
                        log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
389
                                         +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
390
                        try {
391
                            Thread.sleep(waitingTime);
392
                        } catch (Exception ee) {
393
                            log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for "+waitingTime/1000+" seconds to wait the hazelcast service");
394
                        }
395
                       
396
                    } else {
397
                        throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the system metadata from it and the building index can't happen this time");
398
                    }
399
                }
400
                times++;
401
            }*/
402
        }
403
    }
404
    
405
    /*
406
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
407
     */
408
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
409
        if(objectPathMap == null) {
410
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
411
        }
412
    }
413
    /**
414
     * Get an InputStream as the data object for the specific pid.
415
     * @param pid
416
     * @return
417
     * @throws FileNotFoundException
418
     */
419
    private InputStream getDataObject(String pid) throws FileNotFoundException {
420
        Identifier identifier = new Identifier();
421
        identifier.setValue(pid);
422
        String objectPath = objectPathMap.get(identifier);
423
        InputStream data = null;
424
        data = new FileInputStream(objectPath);
425
        return data;
426

    
427
    }
428
    
429
    /**
430
     * Get the SystemMetadata for the specified id from the distributed Map.
431
     * The null maybe is returned if there is no system metadata found.
432
     * @param id  the specified id.
433
     * @return the SystemMetadata associated with the id.
434
     */
435
    private SystemMetadata getSystemMetadata(String id) {
436
        SystemMetadata metadata = null;
437
        if(systemMetadataMap != null && id != null) {
438
            Identifier identifier = new Identifier();
439
            identifier.setValue(id);
440
            metadata = systemMetadataMap.get(identifier);
441
        }
442
        return metadata;
443
    }
444
    
445
    /**
446
     * Get the obsoletes chain of the specified id. The returned list doesn't include
447
     * the specified id itself. The newer version has the lower index number in the list.
448
     * Empty list will be returned if there is no document to be obsoleted by this id.
449
     * @param id
450
     * @return
451
     */
452
    private List<String> getObsoletes(String id) {
453
        List<String> obsoletes = new ArrayList<String>();
454
        while (id != null) {
455
            SystemMetadata metadata = getSystemMetadata(id);
456
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
457
            if(metadata != null) {
458
                Identifier identifier = metadata.getObsoletes();
459
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
460
                    obsoletes.add(identifier.getValue());
461
                    id = identifier.getValue();
462
                } 
463
            } 
464
        }
465
        return obsoletes;
466
    }
467
    
468
    /**
469
     * Overwrite and do nothing
470
     */
471
    public boolean cancel() {
472
        return true;
473
    }
474

    
475
}
(3-3/6)