Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.FileInputStream;
30
import java.io.FileNotFoundException;
31
import java.io.InputStream;
32
import java.util.ArrayList;
33
import java.util.Date;
34
import java.util.List;
35
import java.util.Vector;
36

    
37
import org.apache.commons.logging.Log;
38
import org.apache.commons.logging.LogFactory;
39
import org.apache.solr.client.solrj.SolrServerException;
40
import org.dataone.client.MNode;
41
import org.dataone.configuration.Settings;
42
import org.dataone.service.exceptions.InvalidRequest;
43
import org.dataone.service.exceptions.InvalidToken;
44
import org.dataone.service.exceptions.NotAuthorized;
45
import org.dataone.service.exceptions.NotImplemented;
46
import org.dataone.service.exceptions.ServiceFailure;
47
import org.dataone.service.types.v1.Identifier;
48
import org.dataone.service.types.v1.ObjectInfo;
49
import org.dataone.service.types.v1.ObjectList;
50
import org.dataone.service.types.v1.SystemMetadata;
51

    
52
import com.hazelcast.core.IMap;
53

    
54

    
55
/**
56
 * A class represents the object to generate massive solr indexes.
57
 * This can happen during an update of Metacat (generating index for all existing documents)
58
 * or regenerate index for those documents
59
 * failing to build index during the insert or update.
60
 * 
61
 * @author tao
62
 *
63
 */
64
public class IndexGenerator implements Runnable {
65
    
66
    private static final int WAITTIME = 10000;
67
    private static final int MAXWAITNUMBER = 180;
68
    private static final String HTTP = "http://";
69
    private static final String MNAPPENDIX = "/d1/mn";
70
    private SolrIndex solrIndex = null;
71
    //private SystemMetadataEventListener systemMetadataListener = null;
72
    private IMap<Identifier, SystemMetadata> systemMetadataMap;
73
    
74
    private IMap<Identifier, String> objectPathMap;
75
    private Log log = LogFactory.getLog(IndexGenerator.class);
76
    private MNode mNode = null;
77
    
78
    /**
79
     * Constructor
80
     * @param solrIndex
81
     * @param systemMetadataListener
82
     */
83
    public IndexGenerator(SolrIndex solrIndex) {
84
        this.solrIndex = solrIndex;
85
        //this.systemMetadataListener = systemMetadataListener;
86
        this.mNode = new MNode(buildMNBaseURL());
87
    }
88
    
89
    /**
90
     * Build the index for all documents in Metacat without overwriting.
91
     * @throws SolrServerException 
92
     * @throws ServiceFailure 
93
     * @throws NotImplemented 
94
     * @throws NotAuthorized 
95
     * @throws InvalidToken 
96
     * @throws InvalidRequest 
97
     */
98
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
99
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
100
        boolean force = false;
101
        indexAll(force);
102
    }
103
    
104
    /**
105
     * Build the index for all documents. If force is true, the existed index for documents
106
     * will be overwritten. 
107
     * @param force
108
     * @throws SolrServerException 
109
     * @throws ServiceFailure 
110
     * @throws NotImplemented 
111
     * @throws NotAuthorized 
112
     * @throws InvalidToken 
113
     * @throws InvalidRequest 
114
     */
115
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
116
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
117
        Date since = null;
118
        Date until = null;
119
        index(since, until, force);
120
    }
121
    
122
    /**
123
     * Build the index for the docs which have been modified since the specified date.
124
     * @param since
125
     * @param force 
126
     * @throws SolrServerException 
127
     * @throws ServiceFailure 
128
     * @throws NotImplemented 
129
     * @throws NotAuthorized 
130
     * @throws InvalidToken 
131
     * @throws InvalidRequest 
132
     */
133
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
134
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
135
        Date until = null;
136
        index(since, until, force);
137
    }
138
    
139
    /**
140
     *  Build the index for the docs which have been modified between the specified date.s
141
     * @param since
142
     * @param until
143
     * @param force
144
     * @throws SolrServerException 
145
     * @throws ServiceFailure 
146
     * @throws NotImplemented 
147
     * @throws NotAuthorized 
148
     * @throws InvalidToken 
149
     * @throws InvalidRequest 
150
     * @throws FileNotFoundException 
151
     */
152
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
153
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
154
        List<String> solrIds = null;
155
        List<String> metacatIds = null;
156
        metacatIds = getMetadataIds(since, until);
157
        log.info("the metacat ids -----------------------------"+metacatIds);
158
        if(!force) {
159
            solrIds = getSolrDocIds();
160
        }
161
        log.info("the solr ids -----------------------------"+solrIds);
162
        
163
        initSystemMetadataMap();
164
        initObjectPathMap();
165
        if(metacatIds != null) {
166
            for(String metacatId : metacatIds) {
167
                if(metacatId != null) {
168
                    boolean buildIndex = true;
169
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
170
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
171
                        buildIndex = false;
172
                    }
173
                    if(buildIndex) {
174
                        try {
175
                            generateIndex(metacatId);
176
                        } catch (Exception e) {
177
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
178
                        }
179
                        
180
                    }
181
                }
182
            }
183
        }
184
    }
185
    
186
    public void run() {
187
        try {
188
            indexAll();
189
        } catch (InvalidRequest e) {
190
            // TODO Auto-generated catch block
191
            //e.printStackTrace();
192
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
193
        } catch (InvalidToken e) {
194
            // TODO Auto-generated catch block
195
            //e.printStackTrace();
196
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
197
        } catch (NotAuthorized e) {
198
            // TODO Auto-generated catch block
199
            //e.printStackTrace();
200
        } catch (NotImplemented e) {
201
            // TODO Auto-generated catch block
202
            //e.printStackTrace();
203
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
204
        } catch (ServiceFailure e) {
205
            // TODO Auto-generated catch block
206
            //e.printStackTrace();
207
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
208
        } catch (SolrServerException e) {
209
            // TODO Auto-generated catch block
210
            //e.printStackTrace();
211
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
212
        } catch (FileNotFoundException e) {
213
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
214
        }
215
    }
216
    
217
    /*
218
     * Get the indexed ids list from the solr server.
219
     * An empty list will be returned if there is no ids.
220
     */
221
    private List<String> getSolrDocIds() throws SolrServerException {
222
        List<String> ids = solrIndex.getSolrIds();
223
        return ids;
224
    }
225
    
226
    /*
227
     * Get the ids of the metacat. If since and util are null, it will return all of them
228
     */
229
    private List<String> getMetadataIds(Date since, Date until) throws InvalidRequest, 
230
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
231
        List<String> ids = new ArrayList();
232
        ObjectList objects = null;
233
        int times = 0;
234
        while (true) {
235
            try {
236
                mNode.ping();
237
                break;
238
            } catch (Exception e) {
239
                if(times <= MAXWAITNUMBER) {
240
                    log.warn("IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
241
                                    " is not ready :" +e.getMessage()+"\nWe will try to access it 10 seconds later ");
242
                    try {
243
                        Thread.sleep(WAITTIME);
244
                    } catch (Exception ee) {
245
                        log.warn("IndexGenerator.getMetadataIds - the thread can't sleep for 10 seconds to wait the MNode");
246
                    }
247
                   
248
                } else {
249
                    throw new ServiceFailure("0000", "IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
250
                                    " is not ready even though Metacat-index wailted for 30 minutes. We can't get the objects list from it and the building index can't happen this time");
251
                }
252
                
253
            }
254
            times++;
255
        }
256
        if(since == null && until == null) {
257
            objects = mNode.listObjects();
258
        } else {
259
            objects = mNode.listObjects(since, until, null, true, 0, Integer.MAX_VALUE);
260
        }
261
        if(objects != null) {
262
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
263
            if(objectInfoList != null) {
264
                for(ObjectInfo info : objectInfoList) {
265
                    if(info != null) {
266
                        Identifier identifier = info.getIdentifier();
267
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
268
                            ids.add(identifier.getValue());
269
                        }
270
                    }
271
                }
272
            }
273
        }
274
        return ids;
275
    }
276
    
277
    /*
278
     * Build up the mn base url
279
     */
280
    private String buildMNBaseURL() {
281
        String httpPort = Settings.getConfiguration().getString("server.httpPort");
282
        String serverURL = "http://";
283
        if(httpPort.equals("443") || httpPort.equals("8443"))
284
        {
285
            serverURL = "https://";
286
        }
287
        serverURL = serverURL+Settings.getConfiguration().getString("server.name");
288
        if (!httpPort.equals("80")) {
289
            serverURL += ":" + httpPort;
290
        }
291
        serverURL = serverURL +"/"+ Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
292
        log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+serverURL);
293
        return serverURL;
294
    }
295
    
296
    
297
    /*
298
     * Generate index for the id.
299
     */
300
    private void generateIndex(String id) throws Exception {
301
        if(id != null)  {
302
                SystemMetadata sysmeta = getSystemMetadata(id);
303
                //only update none-archived id.
304
                if(sysmeta != null && !sysmeta.getArchived()) {
305
                        InputStream data = getDataObject(id);
306
                        Identifier obsolete = sysmeta.getObsoletes();
307
                        List<String> obsoleteChain = null;
308
                        if(obsolete != null) {
309
                            obsoleteChain = getObsoletes(id);
310
                        } 
311
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
312
                } else {
313
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
314
                }
315
           
316
        }
317
    }
318
    
319
    /*
320
     * Initialize the system metadata map
321
     */
322
    private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
323
        int times = 0;
324
        if(systemMetadataMap == null) {
325
            while(true) {
326
                try {
327
                    systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
328
                    break;
329
                } catch (FileNotFoundException e) {
330
                    throw e;
331
                } catch (ServiceFailure e) {
332
                    if(times <= MAXWAITNUMBER) {
333
                        log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
334
                                         +e.getMessage()+"\nWe will try to access it 10 seconds later ");
335
                        try {
336
                            Thread.sleep(WAITTIME);
337
                        } catch (Exception ee) {
338
                            log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for 10 seconds to wait the hazelcast service");
339
                        }
340
                       
341
                    } else {
342
                        throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for 30 minutes. We can't get the system metadata from it and the building index can't happen this time");
343
                    }
344
                }
345
                times++;
346
            }
347
        }
348
    }
349
    
350
    /*
351
     * We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
352
     */
353
    private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
354
        if(objectPathMap == null) {
355
            objectPathMap = DistributedMapsFactory.getObjectPathMap();
356
        }
357
    }
358
    /**
359
     * Get an InputStream as the data object for the specific pid.
360
     * @param pid
361
     * @return
362
     * @throws FileNotFoundException
363
     */
364
    private InputStream getDataObject(String pid) throws FileNotFoundException {
365
        Identifier identifier = new Identifier();
366
        identifier.setValue(pid);
367
        String objectPath = objectPathMap.get(identifier);
368
        InputStream data = null;
369
        data = new FileInputStream(objectPath);
370
        return data;
371

    
372
    }
373
    
374
    /**
375
     * Get the SystemMetadata for the specified id from the distributed Map.
376
     * The null maybe is returned if there is no system metadata found.
377
     * @param id  the specified id.
378
     * @return the SystemMetadata associated with the id.
379
     */
380
    private SystemMetadata getSystemMetadata(String id) {
381
        SystemMetadata metadata = null;
382
        if(systemMetadataMap != null && id != null) {
383
            Identifier identifier = new Identifier();
384
            identifier.setValue(id);
385
            metadata = systemMetadataMap.get(identifier);
386
        }
387
        return metadata;
388
    }
389
    
390
    /**
391
     * Get the obsoletes chain of the specified id. The returned list doesn't include
392
     * the specified id itself. The newer version has the lower index number in the list.
393
     * Empty list will be returned if there is no document to be obsoleted by this id.
394
     * @param id
395
     * @return
396
     */
397
    private List<String> getObsoletes(String id) {
398
        List<String> obsoletes = new ArrayList<String>();
399
        while (id != null) {
400
            SystemMetadata metadata = getSystemMetadata(id);
401
            id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
402
            if(metadata != null) {
403
                Identifier identifier = metadata.getObsoletes();
404
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
405
                    obsoletes.add(identifier.getValue());
406
                    id = identifier.getValue();
407
                } 
408
            } 
409
        }
410
        return obsoletes;
411
    }
412

    
413
}
(3-3/6)