Project

General

Profile

1 7606 tao
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28
29
import java.io.InputStream;
30
import java.util.ArrayList;
31
import java.util.Date;
32
import java.util.List;
33
import java.util.Vector;
34
35
import org.apache.commons.logging.Log;
36
import org.apache.commons.logging.LogFactory;
37
import org.apache.solr.client.solrj.SolrServerException;
38
import org.dataone.client.MNode;
39
import org.dataone.configuration.Settings;
40
import org.dataone.service.exceptions.InvalidRequest;
41
import org.dataone.service.exceptions.InvalidToken;
42
import org.dataone.service.exceptions.NotAuthorized;
43
import org.dataone.service.exceptions.NotImplemented;
44
import org.dataone.service.exceptions.ServiceFailure;
45
import org.dataone.service.types.v1.Identifier;
46 7607 tao
import org.dataone.service.types.v1.ObjectInfo;
47 7606 tao
import org.dataone.service.types.v1.ObjectList;
48
import org.dataone.service.types.v1.SystemMetadata;
49
50
51
/**
52
 * A class represents the object to generate massive solr indexes.
53
 * This can happen during an update of Metacat (generating index for all existing documents)
54
 * or regenerate index for those documents
55
 * failing to build index during the insert or update.
56
 *
57
 * @author tao
58
 *
59
 */
60 7613 tao
public class IndexGenerator implements Runnable {
61 7606 tao
62 7687 tao
    private static final int WAITTIME = 10000;
63
    private static final int MAXWAITNUMBER = 180;
64 7606 tao
    private static final String HTTP = "http://";
65
    private static final String MNAPPENDIX = "/d1/mn";
66
    private SolrIndex solrIndex = null;
67
    private SystemMetadataEventListener systemMetadataListener = null;
68
    private Log log = LogFactory.getLog(IndexGenerator.class);
69
    private MNode mNode = null;
70
71
    /**
72
     * Constructor
73
     * @param solrIndex
74
     * @param systemMetadataListener
75
     */
76
    public IndexGenerator(SolrIndex solrIndex, SystemMetadataEventListener systemMetadataListener) {
77
        this.solrIndex = solrIndex;
78
        this.systemMetadataListener = systemMetadataListener;
79
        this.mNode = new MNode(buildMNBaseURL());
80
    }
81
82
    /**
83
     * Build the index for all documents in Metacat without overwriting.
84
     * @throws SolrServerException
85
     * @throws ServiceFailure
86
     * @throws NotImplemented
87
     * @throws NotAuthorized
88
     * @throws InvalidToken
89
     * @throws InvalidRequest
90
     */
91
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized,
92
                            NotImplemented, ServiceFailure, SolrServerException {
93
        boolean force = false;
94
        indexAll(force);
95
    }
96
97
    /**
98
     * Build the index for all documents. If force is true, the existed index for documents
99
     * will be overwritten.
100
     * @param force
101
     * @throws SolrServerException
102
     * @throws ServiceFailure
103
     * @throws NotImplemented
104
     * @throws NotAuthorized
105
     * @throws InvalidToken
106
     * @throws InvalidRequest
107
     */
108
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
109
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
110
        Date since = null;
111
        Date until = null;
112
        index(since, until, force);
113
    }
114
115
    /**
116
     * Build the index for the docs which have been modified since the specified date.
117
     * @param since
118
     * @param force
119
     * @throws SolrServerException
120
     * @throws ServiceFailure
121
     * @throws NotImplemented
122
     * @throws NotAuthorized
123
     * @throws InvalidToken
124
     * @throws InvalidRequest
125
     */
126
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken,
127
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
128
        Date until = null;
129
        index(since, until, force);
130
    }
131
132
    /**
133
     *  Build the index for the docs which have been modified between the specified date.s
134
     * @param since
135
     * @param until
136
     * @param force
137
     * @throws SolrServerException
138
     * @throws ServiceFailure
139
     * @throws NotImplemented
140
     * @throws NotAuthorized
141
     * @throws InvalidToken
142
     * @throws InvalidRequest
143
     */
144
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest,
145
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
146
        List<String> solrIds = null;
147
        List<String> metacatIds = null;
148
        if(!force) {
149
            solrIds = getSolrDocIds();
150
        }
151 7607 tao
        log.info("the solr ids -----------------------------"+solrIds);
152 7606 tao
        metacatIds = getMetadataIds(since, until);
153 7607 tao
        log.info("the metacat ids -----------------------------"+metacatIds);
154 7688 tao
        if(metacatIds != null) {
155 7606 tao
            for(String metacatId : metacatIds) {
156
                if(metacatId != null) {
157
                    boolean buildIndex = true;
158
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
159
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
160
                        buildIndex = false;
161
                    }
162
                    if(buildIndex) {
163
                        try {
164
                            generateIndex(metacatId);
165
                        } catch (Exception e) {
166
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
167
                        }
168
169
                    }
170
                }
171
            }
172 7688 tao
        }
173 7606 tao
    }
174
175 7613 tao
    public void run() {
176
        try {
177
            indexAll();
178
        } catch (InvalidRequest e) {
179
            // TODO Auto-generated catch block
180
            //e.printStackTrace();
181
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
182
        } catch (InvalidToken e) {
183
            // TODO Auto-generated catch block
184
            //e.printStackTrace();
185
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
186
        } catch (NotAuthorized e) {
187
            // TODO Auto-generated catch block
188
            //e.printStackTrace();
189
        } catch (NotImplemented e) {
190
            // TODO Auto-generated catch block
191
            //e.printStackTrace();
192
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
193
        } catch (ServiceFailure e) {
194
            // TODO Auto-generated catch block
195
            //e.printStackTrace();
196
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
197
        } catch (SolrServerException e) {
198
            // TODO Auto-generated catch block
199
            //e.printStackTrace();
200
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
201
        }
202
    }
203
204 7606 tao
    /*
205
     * Get the indexed ids list from the solr server.
206
     * An empty list will be returned if there is no ids.
207
     */
208
    private List<String> getSolrDocIds() throws SolrServerException {
209
        List<String> ids = solrIndex.getSolrIds();
210
        return ids;
211
    }
212
213
    /*
214
     * Get the ids of the metacat. If since and util are null, it will return all of them
215
     */
216
    private List<String> getMetadataIds(Date since, Date until) throws InvalidRequest,
217
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
218 7607 tao
        List<String> ids = new ArrayList();
219 7606 tao
        ObjectList objects = null;
220 7687 tao
        int times = 0;
221
        while (true) {
222
            try {
223
                mNode.ping();
224
                break;
225
            } catch (Exception e) {
226
                if(times <= MAXWAITNUMBER) {
227
                    log.warn("IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
228
                                    " is not ready :" +e.getMessage()+"\nWe will try to access it 10 seconds later ");
229
                    try {
230
                        Thread.sleep(WAITTIME);
231
                    } catch (Exception ee) {
232
                        log.warn("IndexGenerator.getMetadataIds - the thread can't sleep for 10 seconds to wait the MNode");
233
                    }
234
235
                } else {
236
                    throw new ServiceFailure("503", "IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
237
                                    " is not ready even though Metacat-index wailted for 30 minutes. We can't get the objects list from it and the building index can't happen this time");
238
                }
239
240
            }
241
            times++;
242
        }
243 7606 tao
        if(since == null && until == null) {
244
            objects = mNode.listObjects();
245
        } else {
246
            objects = mNode.listObjects(since, until, null, true, 0, Integer.MAX_VALUE);
247
        }
248 7607 tao
        if(objects != null) {
249
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
250
            if(objectInfoList != null) {
251
                for(ObjectInfo info : objectInfoList) {
252
                    if(info != null) {
253
                        Identifier identifier = info.getIdentifier();
254
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
255
                            ids.add(identifier.getValue());
256
                        }
257
                    }
258
                }
259
            }
260
        }
261 7606 tao
        return ids;
262
    }
263
264
    /*
265
     * Build up the mn base url
266
     */
267
    private String buildMNBaseURL() {
268 7607 tao
        String url = HTTP+Settings.getConfiguration().getString("server.name")+":"+
269 7606 tao
                     Settings.getConfiguration().getString("server.httpPort")+"/"+
270 7607 tao
                     Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
271 7606 tao
        log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+url);
272
        return url;
273
    }
274
275
276
    /*
277
     * Generate index for the id.
278
     */
279
    private void generateIndex(String id) throws Exception {
280
        if(id != null)  {
281
                SystemMetadata sysmeta = systemMetadataListener.getSystemMetadata(id);
282
                if(sysmeta != null) {
283
                        InputStream data = systemMetadataListener.getDataObject(id);
284
                        Identifier obsolete = sysmeta.getObsoletes();
285 7627 tao
                        List<String> obsoleteChain = null;
286 7606 tao
                        if(obsolete != null) {
287 7627 tao
                            obsoleteChain = systemMetadataListener.getObsoletes(id);
288
                        }
289
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
290 7606 tao
                } else {
291
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
292
                }
293
294
        }
295
    }
296
}