Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.InputStream;
30
import java.util.ArrayList;
31
import java.util.Date;
32
import java.util.List;
33
import java.util.Vector;
34

    
35
import org.apache.commons.logging.Log;
36
import org.apache.commons.logging.LogFactory;
37
import org.apache.solr.client.solrj.SolrServerException;
38
import org.dataone.client.MNode;
39
import org.dataone.configuration.Settings;
40
import org.dataone.service.exceptions.InvalidRequest;
41
import org.dataone.service.exceptions.InvalidToken;
42
import org.dataone.service.exceptions.NotAuthorized;
43
import org.dataone.service.exceptions.NotImplemented;
44
import org.dataone.service.exceptions.ServiceFailure;
45
import org.dataone.service.types.v1.Identifier;
46
import org.dataone.service.types.v1.ObjectInfo;
47
import org.dataone.service.types.v1.ObjectList;
48
import org.dataone.service.types.v1.SystemMetadata;
49

    
50

    
51
/**
52
 * A class represents the object to generate massive solr indexes.
53
 * This can happen during an update of Metacat (generating index for all existing documents)
54
 * or regenerate index for those documents
55
 * failing to build index during the insert or update.
56
 * 
57
 * @author tao
58
 *
59
 */
60
public class IndexGenerator implements Runnable {
61
    
62
    private static final int WAITTIME = 10000;
63
    private static final int MAXWAITNUMBER = 180;
64
    private static final String HTTP = "http://";
65
    private static final String MNAPPENDIX = "/d1/mn";
66
    private SolrIndex solrIndex = null;
67
    private SystemMetadataEventListener systemMetadataListener = null;
68
    private Log log = LogFactory.getLog(IndexGenerator.class);
69
    private MNode mNode = null;
70
    
71
    /**
72
     * Constructor
73
     * @param solrIndex
74
     * @param systemMetadataListener
75
     */
76
    public IndexGenerator(SolrIndex solrIndex, SystemMetadataEventListener systemMetadataListener) {
77
        this.solrIndex = solrIndex;
78
        this.systemMetadataListener = systemMetadataListener;
79
        this.mNode = new MNode(buildMNBaseURL());
80
    }
81
    
82
    /**
83
     * Build the index for all documents in Metacat without overwriting.
84
     * @throws SolrServerException 
85
     * @throws ServiceFailure 
86
     * @throws NotImplemented 
87
     * @throws NotAuthorized 
88
     * @throws InvalidToken 
89
     * @throws InvalidRequest 
90
     */
91
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
92
                            NotImplemented, ServiceFailure, SolrServerException {
93
        boolean force = false;
94
        indexAll(force);
95
    }
96
    
97
    /**
98
     * Build the index for all documents. If force is true, the existed index for documents
99
     * will be overwritten. 
100
     * @param force
101
     * @throws SolrServerException 
102
     * @throws ServiceFailure 
103
     * @throws NotImplemented 
104
     * @throws NotAuthorized 
105
     * @throws InvalidToken 
106
     * @throws InvalidRequest 
107
     */
108
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
109
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
110
        Date since = null;
111
        Date until = null;
112
        index(since, until, force);
113
    }
114
    
115
    /**
116
     * Build the index for the docs which have been modified since the specified date.
117
     * @param since
118
     * @param force 
119
     * @throws SolrServerException 
120
     * @throws ServiceFailure 
121
     * @throws NotImplemented 
122
     * @throws NotAuthorized 
123
     * @throws InvalidToken 
124
     * @throws InvalidRequest 
125
     */
126
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
127
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
128
        Date until = null;
129
        index(since, until, force);
130
    }
131
    
132
    /**
133
     *  Build the index for the docs which have been modified between the specified date.s
134
     * @param since
135
     * @param until
136
     * @param force
137
     * @throws SolrServerException 
138
     * @throws ServiceFailure 
139
     * @throws NotImplemented 
140
     * @throws NotAuthorized 
141
     * @throws InvalidToken 
142
     * @throws InvalidRequest 
143
     */
144
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
145
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
146
        List<String> solrIds = null;
147
        List<String> metacatIds = null;
148
        if(!force) {
149
            solrIds = getSolrDocIds();
150
        }
151
        log.info("the solr ids -----------------------------"+solrIds);
152
        metacatIds = getMetadataIds(since, until);
153
        log.info("the metacat ids -----------------------------"+metacatIds);
154
        if(metacatIds != null) {
155
            for(String metacatId : metacatIds) {
156
                if(metacatId != null) {
157
                    boolean buildIndex = true;
158
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
159
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
160
                        buildIndex = false;
161
                    }
162
                    if(buildIndex) {
163
                        try {
164
                            generateIndex(metacatId);
165
                        } catch (Exception e) {
166
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
167
                        }
168
                        
169
                    }
170
                }
171
            }
172
        }
173
    }
174
    
175
    public void run() {
176
        try {
177
            indexAll();
178
        } catch (InvalidRequest e) {
179
            // TODO Auto-generated catch block
180
            //e.printStackTrace();
181
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
182
        } catch (InvalidToken e) {
183
            // TODO Auto-generated catch block
184
            //e.printStackTrace();
185
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
186
        } catch (NotAuthorized e) {
187
            // TODO Auto-generated catch block
188
            //e.printStackTrace();
189
        } catch (NotImplemented e) {
190
            // TODO Auto-generated catch block
191
            //e.printStackTrace();
192
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
193
        } catch (ServiceFailure e) {
194
            // TODO Auto-generated catch block
195
            //e.printStackTrace();
196
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
197
        } catch (SolrServerException e) {
198
            // TODO Auto-generated catch block
199
            //e.printStackTrace();
200
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
201
        }
202
    }
203
    
204
    /*
205
     * Get the indexed ids list from the solr server.
206
     * An empty list will be returned if there is no ids.
207
     */
208
    private List<String> getSolrDocIds() throws SolrServerException {
209
        List<String> ids = solrIndex.getSolrIds();
210
        return ids;
211
    }
212
    
213
    /*
214
     * Get the ids of the metacat. If since and util are null, it will return all of them
215
     */
216
    private List<String> getMetadataIds(Date since, Date until) throws InvalidRequest, 
217
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
218
        List<String> ids = new ArrayList();
219
        ObjectList objects = null;
220
        int times = 0;
221
        while (true) {
222
            try {
223
                mNode.ping();
224
                break;
225
            } catch (Exception e) {
226
                if(times <= MAXWAITNUMBER) {
227
                    log.warn("IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
228
                                    " is not ready :" +e.getMessage()+"\nWe will try to access it 10 seconds later ");
229
                    try {
230
                        Thread.sleep(WAITTIME);
231
                    } catch (Exception ee) {
232
                        log.warn("IndexGenerator.getMetadataIds - the thread can't sleep for 10 seconds to wait the MNode");
233
                    }
234
                   
235
                } else {
236
                    throw new ServiceFailure("503", "IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
237
                                    " is not ready even though Metacat-index wailted for 30 minutes. We can't get the objects list from it and the building index can't happen this time");
238
                }
239
                
240
            }
241
            times++;
242
        }
243
        if(since == null && until == null) {
244
            objects = mNode.listObjects();
245
        } else {
246
            objects = mNode.listObjects(since, until, null, true, 0, Integer.MAX_VALUE);
247
        }
248
        if(objects != null) {
249
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
250
            if(objectInfoList != null) {
251
                for(ObjectInfo info : objectInfoList) {
252
                    if(info != null) {
253
                        Identifier identifier = info.getIdentifier();
254
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
255
                            ids.add(identifier.getValue());
256
                        }
257
                    }
258
                }
259
            }
260
        }
261
        return ids;
262
    }
263
    
264
    /*
265
     * Build up the mn base url
266
     */
267
    private String buildMNBaseURL() {
268
        String url = HTTP+Settings.getConfiguration().getString("server.name")+":"+
269
                     Settings.getConfiguration().getString("server.httpPort")+"/"+
270
                     Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
271
        log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+url);
272
        return url;
273
    }
274
    
275
    
276
    /*
277
     * Generate index for the id.
278
     */
279
    private void generateIndex(String id) throws Exception {
280
        if(id != null)  {
281
                SystemMetadata sysmeta = systemMetadataListener.getSystemMetadata(id);
282
                if(sysmeta != null) {
283
                        InputStream data = systemMetadataListener.getDataObject(id);
284
                        Identifier obsolete = sysmeta.getObsoletes();
285
                        List<String> obsoleteChain = null;
286
                        if(obsolete != null) {
287
                            obsoleteChain = systemMetadataListener.getObsoletes(id);
288
                        } 
289
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
290
                } else {
291
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
292
                }
293
           
294
        }
295
    }
296
}
(2-2/5)