Project

General

Profile

« Previous | Next » 

Revision 7606

Added by Jing Tao over 11 years ago

Add a new class which will generate index for the ids.

View differences:

metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/SolrIndex.java
393 393
    }
394 394
    
395 395
    /**
396
     * Get all indexed ids in the solr server.
397
     * @return
396
     * Get all indexed ids in the solr server. 
397
     * @return an empty list if there is no index.
398 398
     * @throws SolrServerException
399 399
     */
400 400
    public List<String> getSolrIds() throws SolrServerException {
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGenerator.java
1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: A class that gets Accession Number, check for uniqueness
4
 *             and register it into db
5
 *  Copyright: 2000 Regents of the University of California and the
6
 *             National Center for Ecological Analysis and Synthesis
7
 *    Authors: Jivka Bojilova, Matt Jones
8
 *
9
 *   '$Author: leinfelder $'
10
 *     '$Date: 2011-11-02 20:40:12 -0700 (Wed, 02 Nov 2011) $'
11
 * '$Revision: 6595 $'
12
 *
13
 * This program is free software; you can redistribute it and/or modify
14
 * it under the terms of the GNU General Public License as published by
15
 * the Free Software Foundation; either version 2 of the License, or
16
 * (at your option) any later version.
17
 *
18
 * This program is distributed in the hope that it will be useful,
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21
 * GNU General Public License for more details.
22
 *
23
 * You should have received a copy of the GNU General Public License
24
 * along with this program; if not, write to the Free Software
25
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
package edu.ucsb.nceas.metacat.index;
28

  
29
import java.io.InputStream;
30
import java.util.ArrayList;
31
import java.util.Date;
32
import java.util.List;
33
import java.util.Vector;
34

  
35
import org.apache.commons.logging.Log;
36
import org.apache.commons.logging.LogFactory;
37
import org.apache.solr.client.solrj.SolrServerException;
38
import org.dataone.client.MNode;
39
import org.dataone.configuration.Settings;
40
import org.dataone.service.exceptions.InvalidRequest;
41
import org.dataone.service.exceptions.InvalidToken;
42
import org.dataone.service.exceptions.NotAuthorized;
43
import org.dataone.service.exceptions.NotImplemented;
44
import org.dataone.service.exceptions.ServiceFailure;
45
import org.dataone.service.types.v1.Identifier;
46
import org.dataone.service.types.v1.ObjectList;
47
import org.dataone.service.types.v1.SystemMetadata;
48

  
49

  
50
/**
51
 * A class represents the object to generate massive solr indexes.
52
 * This can happen during an update of Metacat (generating index for all existing documents)
53
 * or regenerate index for those documents
54
 * failing to build index during the insert or update.
55
 * 
56
 * @author tao
57
 *
58
 */
59
public class IndexGenerator {
60
    
61
    private static final String HTTP = "http://";
62
    private static final String MNAPPENDIX = "/d1/mn";
63
    private SolrIndex solrIndex = null;
64
    private SystemMetadataEventListener systemMetadataListener = null;
65
    private Log log = LogFactory.getLog(IndexGenerator.class);
66
    private MNode mNode = null;
67
    
68
    /**
69
     * Constructor
70
     * @param solrIndex
71
     * @param systemMetadataListener
72
     */
73
    public IndexGenerator(SolrIndex solrIndex, SystemMetadataEventListener systemMetadataListener) {
74
        this.solrIndex = solrIndex;
75
        this.systemMetadataListener = systemMetadataListener;
76
        this.mNode = new MNode(buildMNBaseURL());
77
    }
78
    
79
    /**
80
     * Build the index for all documents in Metacat without overwriting.
81
     * @throws SolrServerException 
82
     * @throws ServiceFailure 
83
     * @throws NotImplemented 
84
     * @throws NotAuthorized 
85
     * @throws InvalidToken 
86
     * @throws InvalidRequest 
87
     */
88
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
89
                            NotImplemented, ServiceFailure, SolrServerException {
90
        boolean force = false;
91
        indexAll(force);
92
    }
93
    
94
    /**
95
     * Build the index for all documents. If force is true, the existed index for documents
96
     * will be overwritten. 
97
     * @param force
98
     * @throws SolrServerException 
99
     * @throws ServiceFailure 
100
     * @throws NotImplemented 
101
     * @throws NotAuthorized 
102
     * @throws InvalidToken 
103
     * @throws InvalidRequest 
104
     */
105
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
106
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
107
        Date since = null;
108
        Date until = null;
109
        index(since, until, force);
110
    }
111
    
112
    /**
113
     * Build the index for the docs which have been modified since the specified date.
114
     * @param since
115
     * @param force 
116
     * @throws SolrServerException 
117
     * @throws ServiceFailure 
118
     * @throws NotImplemented 
119
     * @throws NotAuthorized 
120
     * @throws InvalidToken 
121
     * @throws InvalidRequest 
122
     */
123
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
124
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
125
        Date until = null;
126
        index(since, until, force);
127
    }
128
    
129
    /**
130
     *  Build the index for the docs which have been modified between the specified date.s
131
     * @param since
132
     * @param until
133
     * @param force
134
     * @throws SolrServerException 
135
     * @throws ServiceFailure 
136
     * @throws NotImplemented 
137
     * @throws NotAuthorized 
138
     * @throws InvalidToken 
139
     * @throws InvalidRequest 
140
     */
141
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
142
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
143
        List<String> solrIds = null;
144
        List<String> metacatIds = null;
145
        if(!force) {
146
            solrIds = getSolrDocIds();
147
        }
148
        metacatIds = getMetadataIds(since, until);
149
        if(metacatIds != null) {
150
            for(String metacatId : metacatIds) {
151
                if(metacatId != null) {
152
                    boolean buildIndex = true;
153
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
154
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
155
                        buildIndex = false;
156
                    }
157
                    if(buildIndex) {
158
                        try {
159
                            generateIndex(metacatId);
160
                        } catch (Exception e) {
161
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
162
                        }
163
                        
164
                    }
165
                }
166
            }
167
        }
168
    }
169
    
170
    /*
171
     * Get the indexed ids list from the solr server.
172
     * An empty list will be returned if there is no ids.
173
     */
174
    private List<String> getSolrDocIds() throws SolrServerException {
175
        List<String> ids = solrIndex.getSolrIds();
176
        return ids;
177
    }
178
    
179
    /*
180
     * Get the ids of the metacat. If since and util are null, it will return all of them
181
     */
182
    private List<String> getMetadataIds(Date since, Date until) throws InvalidRequest, 
183
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
184
        List<String> ids = null;
185
        ObjectList objects = null;
186
        if(since == null && until == null) {
187
            objects = mNode.listObjects();
188
        } else {
189
            objects = mNode.listObjects(since, until, null, true, 0, Integer.MAX_VALUE);
190
        }
191
        return ids;
192
    }
193
    
194
    /*
195
     * Build up the mn base url
196
     */
197
    private String buildMNBaseURL() {
198
        String url = Settings.getConfiguration().getString("server.name")+":"+
199
                     Settings.getConfiguration().getString("server.httpPort")+"/"+
200
                     Settings.getConfiguration().getString("application.contex")+MNAPPENDIX;
201
        log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+url);
202
        return url;
203
    }
204
    
205
    
206
    /*
207
     * Generate index for the id.
208
     */
209
    private void generateIndex(String id) throws Exception {
210
        if(id != null)  {
211
                SystemMetadata sysmeta = systemMetadataListener.getSystemMetadata(id);
212
                if(sysmeta != null) {
213
                        InputStream data = systemMetadataListener.getDataObject(id);
214
                        Identifier obsolete = sysmeta.getObsoletes();
215
                        if(obsolete != null) {
216
                            List<String> obsoleteChain = systemMetadataListener.getObsoletes(id);
217
                            solrIndex.update(id, obsoleteChain, sysmeta, data);
218
                        } else {
219
                            solrIndex.insert(id, sysmeta, data);
220
                        }
221
                } else {
222
                    throw new Exception("IndexGenerator.generate - there is no found SystemMetadata associated with the id "+id);
223
                }
224
           
225
        }
226
    }
227
}

Also available in: Unified diff