Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.FileInputStream;
30
import java.io.FileNotFoundException;
31
import java.io.InputStream;
32
import java.util.ArrayList;
33
import java.util.Date;
34
import java.util.List;
35
import java.util.Vector;
36

    
37
import org.apache.commons.logging.Log;
38
import org.apache.commons.logging.LogFactory;
39
import org.apache.solr.client.solrj.SolrServerException;
40
import org.dataone.client.MNode;
41
import org.dataone.configuration.Settings;
42
import org.dataone.service.exceptions.InvalidRequest;
43
import org.dataone.service.exceptions.InvalidToken;
44
import org.dataone.service.exceptions.NotAuthorized;
45
import org.dataone.service.exceptions.NotImplemented;
46
import org.dataone.service.exceptions.ServiceFailure;
47
import org.dataone.service.types.v1.Identifier;
48
import org.dataone.service.types.v1.ObjectFormatIdentifier;
49
import org.dataone.service.types.v1.ObjectInfo;
50
import org.dataone.service.types.v1.ObjectList;
51
import org.dataone.service.types.v1.SystemMetadata;
52

    
53
import com.hazelcast.core.IMap;
54

    
55

    
56
/**
57
 * A class represents the object to generate massive solr indexes.
58
 * This can happen during an update of Metacat (generating index for all existing documents)
59
 * or regenerate index for those documents
60
 * failing to build index during the insert or update.
61
 * 
62
 * @author tao
63
 *
64
 */
65
public class IndexGenerator implements Runnable {
66
    
67
    private static final int FIRST =0;
68
    private static final int SECOND =1;
69
    public static final int WAITTIME = 10000;
70
    public static final int MAXWAITNUMBER = 180;
71
    private static final String HTTP = "http://";
72
    private static final String MNAPPENDIX = "/d1/mn";
73
    private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace";
74
    public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime";
75
    public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts";
76
    
77
    private static int waitingTime = WAITTIME;
78
    private static int maxAttempts = MAXWAITNUMBER;
79
    
80
    private SolrIndex solrIndex = null;
81
    //private SystemMetadataEventListener systemMetadataListener = null;
82
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
83
    private IMap<Identifier, String> objectPathMap;
84
    private Log log = LogFactory.getLog(IndexGenerator.class);
85
    private MNode mNode = null;
86
    private static List<String> resourceMapNamespaces = null;
87
    
88
    /**
89
     * Constructor
90
     * @param solrIndex
91
     * @param systemMetadataListener
92
     */
93
    public IndexGenerator(SolrIndex solrIndex) {
94
        this.solrIndex = solrIndex;
95
        resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME);
96
        //this.systemMetadataListener = systemMetadataListener;
97
        this.mNode = new MNode(buildMNBaseURL());
98
        try {
99
            waitingTime = Settings.getConfiguration().getInt(WAITIMEPOPERTYNAME);
100
            maxAttempts = Settings.getConfiguration().getInt(MAXATTEMPTSPROPERTYNAME);
101
        } catch (Exception e) {
102
            log.warn("IndexGenerator.constructor - couldn't read the waiting time or maxattempts from the metacat.properties file since : "+e.getMessage()+". Default values will be used");
103
            waitingTime = WAITTIME;
104
            maxAttempts = MAXWAITNUMBER;
105
        }
106
    }
107
    
108
    /**
109
     * Build the index for all documents in Metacat without overwriting.
110
     * @throws SolrServerException 
111
     * @throws ServiceFailure 
112
     * @throws NotImplemented 
113
     * @throws NotAuthorized 
114
     * @throws InvalidToken 
115
     * @throws InvalidRequest 
116
     */
117
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
118
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
119
        boolean force = false;
120
        indexAll(force);
121
    }
122
    
123
    /**
124
     * Build the index for all documents. If force is true, the existed index for documents
125
     * will be overwritten. 
126
     * @param force
127
     * @throws SolrServerException 
128
     * @throws ServiceFailure 
129
     * @throws NotImplemented 
130
     * @throws NotAuthorized 
131
     * @throws InvalidToken 
132
     * @throws InvalidRequest 
133
     */
134
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
135
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
136
        Date since = null;
137
        Date until = null;
138
        index(since, until, force);
139
    }
140
    
141
    /**
142
     * Build the index for the docs which have been modified since the specified date.
143
     * @param since
144
     * @param force 
145
     * @throws SolrServerException 
146
     * @throws ServiceFailure 
147
     * @throws NotImplemented 
148
     * @throws NotAuthorized 
149
     * @throws InvalidToken 
150
     * @throws InvalidRequest 
151
     */
152
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
153
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
154
        Date until = null;
155
        index(since, until, force);
156
    }
157
    
158
    /**
159
     *  Build the index for the docs which have been modified between the specified date.s
160
     * @param since
161
     * @param until
162
     * @param force
163
     * @throws SolrServerException 
164
     * @throws ServiceFailure 
165
     * @throws NotImplemented 
166
     * @throws NotAuthorized 
167
     * @throws InvalidToken 
168
     * @throws InvalidRequest 
169
     * @throws FileNotFoundException 
170
     */
171
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
172
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
173
        List<String> solrIds = null;
174
        initSystemMetadataMap();
175
        initObjectPathMap();
176
        List[] allMetacatIds = getMetadataIds(since, until);
177
        List<String> otherMetacatIds = allMetacatIds[FIRST];
178
        List<String> resourceMapIds = allMetacatIds[SECOND];
179
        log.info("the metacat ids (exception resource map -----------------------------"+otherMetacatIds);
180
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
181
        if(!force) {
182
            solrIds = getSolrDocIds();
183
        }
184
        log.info("the solr ids -----------------------------"+solrIds);
185
        index(otherMetacatIds, solrIds, force);
186
        index(resourceMapIds, solrIds, force);
187
       
188
        
189
    }
190
    
191
    /*
192
     * Doing index when it is necessary when compare metacatids and solrids.
193
     */
194
    private void index(List<String> metacatIds, List<String>solrIds, boolean force) {
195
        if(metacatIds != null) {
196
            for(String metacatId : metacatIds) {
197
                if(metacatId != null) {
198
                    boolean buildIndex = true;
199
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
200
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
201
                        buildIndex = false;
202
                    }
203
                    if(buildIndex) {
204
                        try {
205
                            generateIndex(metacatId);
206
                        } catch (Exception e) {
207
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
208
                        }
209
                        
210
                    }
211
                }
212
            }
213
        }
214
    }
215
    
216
    public void run() {
217
        try {
218
            indexAll();
219
        } catch (InvalidRequest e) {
220
            // TODO Auto-generated catch block
221
            //e.printStackTrace();
222
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
223
        } catch (InvalidToken e) {
224
            // TODO Auto-generated catch block
225
            //e.printStackTrace();
226
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
227
        } catch (NotAuthorized e) {
228
            // TODO Auto-generated catch block
229
            //e.printStackTrace();
230
        } catch (NotImplemented e) {
231
            // TODO Auto-generated catch block
232
            //e.printStackTrace();
233
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
234
        } catch (ServiceFailure e) {
235
            // TODO Auto-generated catch block
236
            //e.printStackTrace();
237
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
238
        } catch (SolrServerException e) {
239
            // TODO Auto-generated catch block
240
            //e.printStackTrace();
241
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
242
        } catch (FileNotFoundException e) {
243
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
244
        }
245
    }
246
    
247
    /*
248
     * Get the indexed ids list from the solr server.
249
     * An empty list will be returned if there is no ids.
250
     */
251
    private List<String> getSolrDocIds() throws SolrServerException {
252
        List<String> ids = solrIndex.getSolrIds();
253
        return ids;
254
    }
255
    
256
    /*
257
     * Get an array of the list of ids of the metacat. If since and util are null, it will return all of them.
258
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
259
     * The reason to split them is when we index the resource map, we need the index of the document in the resource map ready.
260
     */
261
    private List[] getMetadataIds(Date since, Date until) throws InvalidRequest, 
262
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
263
        
264
        List<String> resourceMapIds = new ArrayList();
265
        List<String> otherIds = new ArrayList();
266
        List[] ids = new List[2];
267
        ids[FIRST]= otherIds;
268
        ids[SECOND] = resourceMapIds;
269
        ObjectList objects = null;
270
        int times = 0;
271
        while (true) {
272
            try {
273
                mNode.ping();
274
                break;
275
            } catch (Exception e) {
276
                if(times <= maxAttempts) {
277
                    log.warn("IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
278
                                    " is not ready :" +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
279
                    try {
280
                        Thread.sleep(waitingTime);
281
                    } catch (Exception ee) {
282
                        log.warn("IndexGenerator.getMetadataIds - the thread can't sleep for "+ waitingTime/1000+" seconds to wait the MNode");
283
                    }
284
                   
285
                } else {
286
                    throw new ServiceFailure("0000", "IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
287
                                    " is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the objects list from it and the building index can't happen this time");
288
                }
289
                
290
            }
291
            times++;
292
        }
293
        if(since == null && until == null) {
294
            objects = mNode.listObjects();
295
        } else {
296
            objects = mNode.listObjects(since, until, null, true, 0, Integer.MAX_VALUE);
297
        }
298
        if(objects != null) {
299
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
300
            if(objectInfoList != null) {
301
                for(ObjectInfo info : objectInfoList) {
302
                    if(info != null) {
303
                        Identifier identifier = info.getIdentifier();
304
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
305
                            SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
306
                            if(sysmeta != null && !sysmeta.getArchived()) {
307
                                ObjectFormatIdentifier formatId =sysmeta.getFormatId();
308
                                //System.out.println("the object format id is "+formatId.getValue());
309
                                //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
310
                                if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
311
                                    resourceMapIds.add(identifier.getValue());
312
                                } else {
313
                                    otherIds.add(identifier.getValue());
314
                                }
315
                            }
316
                            //ids.add(identifier.getValue());
317
                        }
318
                    }
319
                }
320
            }
321
        }
322
        return ids;
323
    }
324
    
325
    /*
326
     * If the specified ObjectFormatIdentifier is a resrouce map namespace.
327
     */
328
    public static boolean isResourceMap(ObjectFormatIdentifier formatId) {
329
        boolean isResourceMap = false;
330
        if(formatId != null && resourceMapNamespaces != null) {
331
            for(String namespace : resourceMapNamespaces) {
332
                if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) {
333
                    isResourceMap = true;
334
                    break;
335
                }
336
            }
337
        }
338
        return isResourceMap;
339
    }
340
    
341
    /*
342
     * Build up the mn base url
343
     */
344
    private String buildMNBaseURL() {
345
        String httpPort = Settings.getConfiguration().getString("server.httpPort");
346
        String serverURL = "http://";
347
        if(httpPort.equals("443") || httpPort.equals("8443"))
348
        {
349
            serverURL = "https://";
350
        }
351
        serverURL = serverURL+Settings.getConfiguration().getString("server.name");
352
        if (!httpPort.equals("80")) {
353
            serverURL += ":" + httpPort;
354
        }
355
        serverURL = serverURL +"/"+ Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
356
        log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+serverURL);
357
        return serverURL;
358
    }
359
    
360
    
361
    /*
362
     * Generate index for the id.
363
     */
364
    private void generateIndex(String id) throws Exception {
365
        if(id != null)  {
366
                SystemMetadata sysmeta = getSystemMetadata(id);
367
                //only update none-archived id.
368
                if(sysmeta != null && !sysmeta.getArchived()) {
369
                        InputStream data = getDataObject(id);
370
                        Identifier obsolete = sysmeta.getObsoletes();
371
                        List<String> obsoleteChain = null;
372
                        if(obsolete != null) {
373
                            obsoleteChain = getObsoletes(id);
374
                        } 
375
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
376
                } else {
377
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
378
                }
379
           
380
        }
381
    }
382
    
383
    /*
384
     * Initialize the system metadata map
385
     */
386
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
387
        int times = 0;
388
        if(systemMetadataMap == null) {
389
            systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
390
            /*while(true) {
391
                try {
392
                    systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
393
                    break;
394
                } catch (FileNotFoundException e) {
395
                    throw e;
396
                } catch (ServiceFailure e) {
397
                    if(times <= maxAttempts) {
398
                        log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
399
                                         +e.getMessage()+"\nWe will try to access it "+waitingTime/1000+" seconds later ");
400
                        try {
401
                            Thread.sleep(waitingTime);
402
                        } catch (Exception ee) {
403
                            log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for "+waitingTime/1000+" seconds to wait the hazelcast service");
404
                        }
405
                       
406
                    } else {
407
                        throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for "+maxAttempts*waitingTime/1000+" seconds. We can't get the system metadata from it and the building index can't happen this time");
408
                    }
409
                }
410
                times++;
411
            }*/
412
        }
413
    }
414
    
415
    /*
416
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
417
     */
418
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
419
        if(objectPathMap == null) {
420
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
421
        }
422
    }
423
    /**
424
     * Get an InputStream as the data object for the specific pid.
425
     * @param pid
426
     * @return
427
     * @throws FileNotFoundException
428
     */
429
    private InputStream getDataObject(String pid) throws FileNotFoundException {
430
        Identifier identifier = new Identifier();
431
        identifier.setValue(pid);
432
        String objectPath = objectPathMap.get(identifier);
433
        InputStream data = null;
434
        data = new FileInputStream(objectPath);
435
        return data;
436

    
437
    }
438
    
439
    /**
440
     * Get the SystemMetadata for the specified id from the distributed Map.
441
     * The null maybe is returned if there is no system metadata found.
442
     * @param id  the specified id.
443
     * @return the SystemMetadata associated with the id.
444
     */
445
    private SystemMetadata getSystemMetadata(String id) {
446
        SystemMetadata metadata = null;
447
        if(systemMetadataMap != null && id != null) {
448
            Identifier identifier = new Identifier();
449
            identifier.setValue(id);
450
            metadata = systemMetadataMap.get(identifier);
451
        }
452
        return metadata;
453
    }
454
    
455
    /**
456
     * Get the obsoletes chain of the specified id. The returned list doesn't include
457
     * the specified id itself. The newer version has the lower index number in the list.
458
     * Empty list will be returned if there is no document to be obsoleted by this id.
459
     * @param id
460
     * @return
461
     */
462
    private List<String> getObsoletes(String id) {
463
        List<String> obsoletes = new ArrayList<String>();
464
        while (id != null) {
465
            SystemMetadata metadata = getSystemMetadata(id);
466
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
467
            if(metadata != null) {
468
                Identifier identifier = metadata.getObsoletes();
469
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
470
                    obsoletes.add(identifier.getValue());
471
                    id = identifier.getValue();
472
                } 
473
            } 
474
        }
475
        return obsoletes;
476
    }
477

    
478
}
(3-3/6)