Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.FileInputStream;
30
import java.io.FileNotFoundException;
31
import java.io.InputStream;
32
import java.util.ArrayList;
33
import java.util.Date;
34
import java.util.List;
35
import java.util.TimerTask;
36
import java.util.Vector;
37

    
38
import org.apache.commons.logging.Log;
39
import org.apache.commons.logging.LogFactory;
40
import org.apache.solr.client.solrj.SolrServerException;
41
import org.dataone.client.MNode;
42
import org.dataone.configuration.Settings;
43
import org.dataone.service.exceptions.InvalidRequest;
44
import org.dataone.service.exceptions.InvalidToken;
45
import org.dataone.service.exceptions.NotAuthorized;
46
import org.dataone.service.exceptions.NotImplemented;
47
import org.dataone.service.exceptions.ServiceFailure;
48
import org.dataone.service.types.v1.Identifier;
49
import org.dataone.service.types.v1.ObjectFormatIdentifier;
50
import org.dataone.service.types.v1.ObjectInfo;
51
import org.dataone.service.types.v1.ObjectList;
52
import org.dataone.service.types.v1.SystemMetadata;
53

    
54
import com.hazelcast.core.IMap;
55

    
56

    
57
/**
58
 * A class represents the object to generate massive solr indexes.
59
 * This can happen during an update of Metacat (generating index for all existing documents)
60
 * or regenerate index for those documents
61
 * failing to build index during the insert or update.
62
 * 
63
 * @author tao
64
 *
65
 */
66
public class IndexGenerator extends TimerTask {
67
    
68
    private static final int FIRST =0;
69
    private static final int SECOND =1;
70
    public static final int WAITTIME = 10000;
71
    public static final int MAXWAITNUMBER = 180;
72
    private static final String HTTP = "http://";
73
    private static final String MNAPPENDIX = "/d1/mn";
74
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
75
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
76
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
77
    
78
    private static int waitingTime = WAITTIME;
79
    private static int maxAttempts = MAXWAITNUMBER;
80
    
81
    private SolrIndex solrIndex = null;
82
    //private SystemMetadataEventListener systemMetadataListener = null;
83
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
84
    private IMap<Identifier, String> objectPathMap;
85
    private Log log = LogFactory.getLog(IndexGenerator.class);
86
    private MNode mNode = null;
87
    private static List<String> resourceMapNamespaces = null;
88
    
89
    /**
90
     * Constructor
91
     * @param solrIndex
92
     * @param systemMetadataListener
93
     */
94
    public IndexGenerator(SolrIndex solrIndex) {
95
        this.solrIndex = solrIndex;
96
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
97
        //this.systemMetadataListener = systemMetadataListener;
98
        this.mNode = new MNode(buildMNBaseURL());
99
        try {
100
            waitingTime = Settings.getConfiguration().getInt(WAITIMEPOPERTYNAME);
101
            maxAttempts = Settings.getConfiguration().getInt(MAXATTEMPTSPROPERTYNAME);
102
        } catch (Exception e) {
103
            log.warn("IndexGenerator.constructor - couldn't read the waiting time or maxattempts from the metacat.properties file since : "+e.getMessage()+". Default values will be used");
104
            waitingTime = WAITTIME;
105
            maxAttempts = MAXWAITNUMBER;
106
        }
107
    }
108
    
109
    /**
110
     * Build the index for all documents in Metacat without overwriting.
111
     * @throws SolrServerException 
112
     * @throws ServiceFailure 
113
     * @throws NotImplemented 
114
     * @throws NotAuthorized 
115
     * @throws InvalidToken 
116
     * @throws InvalidRequest 
117
     */
118
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
119
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
120
        boolean force = false;
121
        indexAll(force);
122
    }
123
    
124
    /**
125
     * Build the index for all documents. If force is true, the existed index for documents
126
     * will be overwritten. 
127
     * @param force
128
     * @throws SolrServerException 
129
     * @throws ServiceFailure 
130
     * @throws NotImplemented 
131
     * @throws NotAuthorized 
132
     * @throws InvalidToken 
133
     * @throws InvalidRequest 
134
     */
135
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
136
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
137
        Date since = null;
138
        Date until = null;
139
        index(since, until, force);
140
    }
141
    
142
    /**
143
     * Build the index for the docs which have been modified since the specified date.
144
     * @param since
145
     * @param force 
146
     * @throws SolrServerException 
147
     * @throws ServiceFailure 
148
     * @throws NotImplemented 
149
     * @throws NotAuthorized 
150
     * @throws InvalidToken 
151
     * @throws InvalidRequest 
152
     */
153
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
154
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
155
        Date until = null;
156
        index(since, until, force);
157
    }
158
    
159
    /**
160
     *  Build the index for the docs which have been modified between the specified date.s
161
     * @param since
162
     * @param until
163
     * @param force
164
     * @throws SolrServerException 
165
     * @throws ServiceFailure 
166
     * @throws NotImplemented 
167
     * @throws NotAuthorized 
168
     * @throws InvalidToken 
169
     * @throws InvalidRequest 
170
     * @throws FileNotFoundException 
171
     */
172
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
173
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
174
        List<String> solrIds = null;
175
        initSystemMetadataMap();
176
        initObjectPathMap();
177
        List[] allMetacatIds = getMetadataIds(since, until);
178
        List<String> otherMetacatIds = allMetacatIds[FIRST];
179
        List<String> resourceMapIds = allMetacatIds[SECOND];
180
        log.info("the metacat ids (exception resource map -----------------------------"+otherMetacatIds);
181
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
182
        if(!force) {
183
            solrIds = getSolrDocIds();
184
        }
185
        log.info("the solr ids -----------------------------"+solrIds);
186
        index(otherMetacatIds, solrIds, force);
187
        index(resourceMapIds, solrIds, force);
188
       
189
        
190
    }
191
    
192
    /*
193
     * Doing index when it is necessary when compare metacatids and solrids.
194
     */
195
    private void index(List<String> metacatIds, List<String>solrIds, boolean force) {
196
        if(metacatIds != null) {
197
            for(String metacatId : metacatIds) {
198
                if(metacatId != null) {
199
                    boolean buildIndex = true;
200
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
201
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
202
                        buildIndex = false;
203
                    }
204
                    if(buildIndex) {
205
                        try {
206
                            generateIndex(metacatId);
207
                        } catch (Exception e) {
208
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
209
                        }
210
                        
211
                    }
212
                }
213
            }
214
        }
215
    }
216
    
217
    public void run() {
218
        try {
219
            indexAll();
220
        } catch (InvalidRequest e) {
221
            // TODO Auto-generated catch block
222
            //e.printStackTrace();
223
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
224
        } catch (InvalidToken e) {
225
            // TODO Auto-generated catch block
226
            //e.printStackTrace();
227
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
228
        } catch (NotAuthorized e) {
229
            // TODO Auto-generated catch block
230
            //e.printStackTrace();
231
        } catch (NotImplemented e) {
232
            // TODO Auto-generated catch block
233
            //e.printStackTrace();
234
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
235
        } catch (ServiceFailure e) {
236
            // TODO Auto-generated catch block
237
            //e.printStackTrace();
238
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
239
        } catch (SolrServerException e) {
240
            // TODO Auto-generated catch block
241
            //e.printStackTrace();
242
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
243
        } catch (FileNotFoundException e) {
244
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
245
        }
246
    }
247
    
248
    /*
249
     * Get the indexed ids list from the solr server.
250
     * An empty list will be returned if there is no ids.
251
     */
252
    private List<String> getSolrDocIds() throws SolrServerException {
253
        List<String> ids = solrIndex.getSolrIds();
254
        return ids;
255
    }
256
    
257
    /*
258
     * Get an array of the list of ids of the metacat. If since and util are null, it will return all of them.
259
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
260
     * The reason to split them is when we index the resource map, we need the index of the document in the resource map ready.
261
     */
262
    private List[] getMetadataIds(Date since, Date until) throws InvalidRequest, 
263
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
264
        
265
        List<String> resourceMapIds = new ArrayList();
266
        List<String> otherIds = new ArrayList();
267
        List[] ids = new List[2];
268
        ids[FIRST]= otherIds;
269
        ids[SECOND] = resourceMapIds;
270
        ObjectList objects = null;
271
        int times = 0;
272
        while (true) {
273
            try {
274
                mNode.ping();
275
                break;
276
            } catch (Exception e) {
277
                if(times <= maxAttempts) {
278
                    log.warn("IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
279
                                    " is not ready :" +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
280
                    try {
281
                        Thread.sleep(waitingTime);
282
                    } catch (Exception ee) {
283
                        log.warn("IndexGenerator.getMetadataIds - the thread can't sleep for "+ waitingTime/1000+" seconds to wait the MNode");
284
                    }
285
                   
286
                } else {
287
                    throw new ServiceFailure("0000", "IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
288
                                    " is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the objects list from it and the building index can't happen this time");
289
                }
290
                
291
            }
292
            times++;
293
        }
294
        if(since == null && until == null) {
295
            objects = mNode.listObjects();
296
        } else {
297
            objects = mNode.listObjects(since, until, null, true, 0, Integer.MAX_VALUE);
298
        }
299
        if(objects != null) {
300
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
301
            if(objectInfoList != null) {
302
                for(ObjectInfo info : objectInfoList) {
303
                    if(info != null) {
304
                        Identifier identifier = info.getIdentifier();
305
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
306
                            SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
307
                            if(sysmeta != null && !sysmeta.getArchived()) {
308
                                ObjectFormatIdentifier formatId =sysmeta.getFormatId();
309
                                //System.out.println("the object format id is "+formatId.getValue());
310
                                //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
311
                                if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
312
                                    resourceMapIds.add(identifier.getValue());
313
                                } else {
314
                                    otherIds.add(identifier.getValue());
315
                                }
316
                            }
317
                            //ids.add(identifier.getValue());
318
                        }
319
                    }
320
                }
321
            }
322
        }
323
        return ids;
324
    }
325
    
326
    /*
327
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
328
     */
329
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
330
        boolean isResourceMap = false;
331
        if(formatId != null && resourceMapNamespaces != null) {
332
            for(String namespace : resourceMapNamespaces) {
333
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
334
                    isResourceMap = true;
335
                    break;
336
                }
337
            }
338
        }
339
        return isResourceMap;
340
    }
341
    
342
    /*
343
     * Build up the mn base url
344
     */
345
    private String buildMNBaseURL() {
346
        String httpPort = Settings.getConfiguration().getString("server.httpPort");
347
        String serverURL = "http://";
348
        if(httpPort.equals("443") || httpPort.equals("8443"))
349
        {
350
            serverURL = "https://";
351
        }
352
        serverURL = serverURL+Settings.getConfiguration().getString("server.name");
353
        if (!httpPort.equals("80")) {
354
            serverURL += ":" + httpPort;
355
        }
356
        serverURL = serverURL +"/"+ Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
357
        log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+serverURL);
358
        return serverURL;
359
    }
360
    
361
    
362
    /*
363
     * Generate index for the id.
364
     */
365
    private void generateIndex(String id) throws Exception {
366
        if(id != null)  {
367
                SystemMetadata sysmeta = getSystemMetadata(id);
368
                //only update none-archived id.
369
                if(sysmeta != null && !sysmeta.getArchived()) {
370
                        InputStream data = getDataObject(id);
371
                        Identifier obsolete = sysmeta.getObsoletes();
372
                        List<String> obsoleteChain = null;
373
                        if(obsolete != null) {
374
                            obsoleteChain = getObsoletes(id);
375
                        } 
376
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
377
                } else {
378
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
379
                }
380
           
381
        }
382
    }
383
    
384
    /*
385
     * Initialize the system metadata map
386
     */
387
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
388
        int times = 0;
389
        if(systemMetadataMap == null) {
390
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
391
            /*while(true) {
392
                try {
393
                    systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
394
                    break;
395
                } catch (FileNotFoundException e) {
396
                    throw e;
397
                } catch (ServiceFailure e) {
398
                    if(times <= maxAttempts) {
399
                        log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
400
                                         +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
401
                        try {
402
                            Thread.sleep(waitingTime);
403
                        } catch (Exception ee) {
404
                            log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for "+waitingTime/1000+" seconds to wait the hazelcast service");
405
                        }
406
                       
407
                    } else {
408
                        throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the system metadata from it and the building index can't happen this time");
409
                    }
410
                }
411
                times++;
412
            }*/
413
        }
414
    }
415
    
416
    /*
417
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
418
     */
419
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
420
        if(objectPathMap == null) {
421
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
422
        }
423
    }
424
    /**
425
     * Get an InputStream as the data object for the specific pid.
426
     * @param pid
427
     * @return
428
     * @throws FileNotFoundException
429
     */
430
    private InputStream getDataObject(String pid) throws FileNotFoundException {
431
        Identifier identifier = new Identifier();
432
        identifier.setValue(pid);
433
        String objectPath = objectPathMap.get(identifier);
434
        InputStream data = null;
435
        data = new FileInputStream(objectPath);
436
        return data;
437

    
438
    }
439
    
440
    /**
441
     * Get the SystemMetadata for the specified id from the distributed Map.
442
     * The null maybe is returned if there is no system metadata found.
443
     * @param id  the specified id.
444
     * @return the SystemMetadata associated with the id.
445
     */
446
    private SystemMetadata getSystemMetadata(String id) {
447
        SystemMetadata metadata = null;
448
        if(systemMetadataMap != null && id != null) {
449
            Identifier identifier = new Identifier();
450
            identifier.setValue(id);
451
            metadata = systemMetadataMap.get(identifier);
452
        }
453
        return metadata;
454
    }
455
    
456
    /**
457
     * Get the obsoletes chain of the specified id. The returned list doesn't include
458
     * the specified id itself. The newer version has the lower index number in the list.
459
     * Empty list will be returned if there is no document to be obsoleted by this id.
460
     * @param id
461
     * @return
462
     */
463
    private List<String> getObsoletes(String id) {
464
        List<String> obsoletes = new ArrayList<String>();
465
        while (id != null) {
466
            SystemMetadata metadata = getSystemMetadata(id);
467
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
468
            if(metadata != null) {
469
                Identifier identifier = metadata.getObsoletes();
470
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
471
                    obsoletes.add(identifier.getValue());
472
                    id = identifier.getValue();
473
                } 
474
            } 
475
        }
476
        return obsoletes;
477
    }
478
    
479
    /**
480
     * Overwrite and do nothing
481
     */
482
    public boolean cancel() {
483
        return true;
484
    }
485

    
486
}
(3-3/6)