Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

    
29
import java.io.InputStream;
30
import java.util.ArrayList;
31
import java.util.Date;
32
import java.util.List;
33
import java.util.Vector;
34

    
35
import org.apache.commons.logging.Log;
36
import org.apache.commons.logging.LogFactory;
37
import org.apache.solr.client.solrj.SolrServerException;
38
import org.dataone.client.MNode;
39
import org.dataone.configuration.Settings;
40
import org.dataone.service.exceptions.InvalidRequest;
41
import org.dataone.service.exceptions.InvalidToken;
42
import org.dataone.service.exceptions.NotAuthorized;
43
import org.dataone.service.exceptions.NotImplemented;
44
import org.dataone.service.exceptions.ServiceFailure;
45
import org.dataone.service.types.v1.Identifier;
46
import org.dataone.service.types.v1.ObjectInfo;
47
import org.dataone.service.types.v1.ObjectList;
48
import org.dataone.service.types.v1.SystemMetadata;
49

    
50

    
51
/**
52
 * A class represents the object to generate massive solr indexes.
53
 * This can happen during an update of Metacat (generating index for all existing documents)
54
 * or regenerate index for those documents
55
 * failing to build index during the insert or update.
56
 * 
57
 * @author tao
58
 *
59
 */
60
public class IndexGenerator implements Runnable {
61
    
62
    private static final String HTTP = "http://";
63
    private static final String MNAPPENDIX = "/d1/mn";
64
    private SolrIndex solrIndex = null;
65
    private SystemMetadataEventListener systemMetadataListener = null;
66
    private Log log = LogFactory.getLog(IndexGenerator.class);
67
    private MNode mNode = null;
68
    
69
    /**
70
     * Constructor
71
     * @param solrIndex
72
     * @param systemMetadataListener
73
     */
74
    public IndexGenerator(SolrIndex solrIndex, SystemMetadataEventListener systemMetadataListener) {
75
        this.solrIndex = solrIndex;
76
        this.systemMetadataListener = systemMetadataListener;
77
        this.mNode = new MNode(buildMNBaseURL());
78
    }
79
    
80
    /**
81
     * Build the index for all documents in Metacat without overwriting.
82
     * @throws SolrServerException 
83
     * @throws ServiceFailure 
84
     * @throws NotImplemented 
85
     * @throws NotAuthorized 
86
     * @throws InvalidToken 
87
     * @throws InvalidRequest 
88
     */
89
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
90
                            NotImplemented, ServiceFailure, SolrServerException {
91
        boolean force = false;
92
        indexAll(force);
93
    }
94
    
95
    /**
96
     * Build the index for all documents. If force is true, the existed index for documents
97
     * will be overwritten. 
98
     * @param force
99
     * @throws SolrServerException 
100
     * @throws ServiceFailure 
101
     * @throws NotImplemented 
102
     * @throws NotAuthorized 
103
     * @throws InvalidToken 
104
     * @throws InvalidRequest 
105
     */
106
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
107
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
108
        Date since = null;
109
        Date until = null;
110
        index(since, until, force);
111
    }
112
    
113
    /**
114
     * Build the index for the docs which have been modified since the specified date.
115
     * @param since
116
     * @param force 
117
     * @throws SolrServerException 
118
     * @throws ServiceFailure 
119
     * @throws NotImplemented 
120
     * @throws NotAuthorized 
121
     * @throws InvalidToken 
122
     * @throws InvalidRequest 
123
     */
124
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
125
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
126
        Date until = null;
127
        index(since, until, force);
128
    }
129
    
130
    /**
131
     *  Build the index for the docs which have been modified between the specified date.s
132
     * @param since
133
     * @param until
134
     * @param force
135
     * @throws SolrServerException 
136
     * @throws ServiceFailure 
137
     * @throws NotImplemented 
138
     * @throws NotAuthorized 
139
     * @throws InvalidToken 
140
     * @throws InvalidRequest 
141
     */
142
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
143
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
144
        List<String> solrIds = null;
145
        List<String> metacatIds = null;
146
        if(!force) {
147
            solrIds = getSolrDocIds();
148
        }
149
        log.info("the solr ids -----------------------------"+solrIds);
150
        metacatIds = getMetadataIds(since, until);
151
        log.info("the metacat ids -----------------------------"+metacatIds);
152
        if(metacatIds != null) {
153
            for(String metacatId : metacatIds) {
154
                if(metacatId != null) {
155
                    boolean buildIndex = true;
156
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
157
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
158
                        buildIndex = false;
159
                    }
160
                    if(buildIndex) {
161
                        try {
162
                            generateIndex(metacatId);
163
                        } catch (Exception e) {
164
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
165
                        }
166
                        
167
                    }
168
                }
169
            }
170
        }
171
    }
172
    
173
    public void run() {
174
        try {
175
            indexAll();
176
        } catch (InvalidRequest e) {
177
            // TODO Auto-generated catch block
178
            //e.printStackTrace();
179
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
180
        } catch (InvalidToken e) {
181
            // TODO Auto-generated catch block
182
            //e.printStackTrace();
183
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
184
        } catch (NotAuthorized e) {
185
            // TODO Auto-generated catch block
186
            //e.printStackTrace();
187
        } catch (NotImplemented e) {
188
            // TODO Auto-generated catch block
189
            //e.printStackTrace();
190
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
191
        } catch (ServiceFailure e) {
192
            // TODO Auto-generated catch block
193
            //e.printStackTrace();
194
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
195
        } catch (SolrServerException e) {
196
            // TODO Auto-generated catch block
197
            //e.printStackTrace();
198
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
199
        }
200
    }
201
    
202
    /*
203
     * Get the indexed ids list from the solr server.
204
     * An empty list will be returned if there is no ids.
205
     */
206
    private List<String> getSolrDocIds() throws SolrServerException {
207
        List<String> ids = solrIndex.getSolrIds();
208
        return ids;
209
    }
210
    
211
    /*
212
     * Get the ids of the metacat. If since and util are null, it will return all of them
213
     */
214
    private List<String> getMetadataIds(Date since, Date until) throws InvalidRequest, 
215
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
216
        List<String> ids = new ArrayList();
217
        ObjectList objects = null;
218
        if(since == null && until == null) {
219
            objects = mNode.listObjects();
220
        } else {
221
            objects = mNode.listObjects(since, until, null, true, 0, Integer.MAX_VALUE);
222
        }
223
        if(objects != null) {
224
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
225
            if(objectInfoList != null) {
226
                for(ObjectInfo info : objectInfoList) {
227
                    if(info != null) {
228
                        Identifier identifier = info.getIdentifier();
229
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
230
                            ids.add(identifier.getValue());
231
                        }
232
                    }
233
                }
234
            }
235
        }
236
        return ids;
237
    }
238
    
239
    /*
240
     * Build up the mn base url
241
     */
242
    private String buildMNBaseURL() {
243
        String url = HTTP+Settings.getConfiguration().getString("server.name")+":"+
244
                     Settings.getConfiguration().getString("server.httpPort")+"/"+
245
                     Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
246
        log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+url);
247
        return url;
248
    }
249
    
250
    
251
    /*
252
     * Generate index for the id.
253
     */
254
    private void generateIndex(String id) throws Exception {
255
        if(id != null)  {
256
                SystemMetadata sysmeta = systemMetadataListener.getSystemMetadata(id);
257
                if(sysmeta != null) {
258
                        InputStream data = systemMetadataListener.getDataObject(id);
259
                        Identifier obsolete = sysmeta.getObsoletes();
260
                        List<String> obsoleteChain = null;
261
                        if(obsolete != null) {
262
                            obsoleteChain = systemMetadataListener.getObsoletes(id);
263
                        } 
264
                        solrIndex.update(id, obsoleteChain, sysmeta, data);
265
                } else {
266
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
267
                }
268
           
269
        }
270
    }
271
}
(2-2/5)