Project

General

Profile

« Previous | Next » 

Revision 7806

Added by Jing Tao over 11 years ago

It will make the index only for those objects which were modified after the marked time.

View differences:

metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/event/IndexEventFileLog.java
133 133
     * previous timed indexing (including the ones in the timed indexing).
134 134
     * @return the list of failure identifiers. The null will be returned if no failure. 
135 135
     */
136
    public List<Identifier> getFailedPids() throws IndexEventLogException {
136
    public List<String> getFailedPids() throws IndexEventLogException {
137 137
        return null;
138 138
    }
139 139
    
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/event/IndexEventLog.java
58 58
     * previous timed indexing (including the ones in the timed indexing).
59 59
     * @return the list of failure identifiers. The null will be returned if no failure. 
60 60
     */
61
    public List<Identifier> getFailedPids() throws IndexEventLogException;
61
    public List<String> getFailedPids() throws IndexEventLogException;
62 62
    
63 63
    
64 64
    /**
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGenerator.java
57 57

  
58 58
import edu.ucsb.nceas.metacat.index.event.EventlogFactory;
59 59
import edu.ucsb.nceas.metacat.index.event.IndexEvent;
60
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException;
60 61

  
61 62

  
62 63
/**
......
119 120
     * @throws NotAuthorized 
120 121
     * @throws InvalidToken 
121 122
     * @throws InvalidRequest 
123
     * @throws IndexEventLogException 
124
     * @throws IllegalAccessException 
125
     * @throws InstantiationException 
126
     * @throws ClassNotFoundException 
122 127
     */
123
    public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
124
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
128
    /*public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized, 
129
                            NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
125 130
        boolean force = false;
126 131
        indexAll(force);
127
    }
132
    }*/
128 133
    
129 134
    /**
130
     * Build the index for all documents. If force is true, the existed index for documents
131
     * will be overwritten. 
132
     * @param force
135
     * Build the index for all documents.
133 136
     * @throws SolrServerException 
134 137
     * @throws ServiceFailure 
135 138
     * @throws NotImplemented 
136 139
     * @throws NotAuthorized 
137 140
     * @throws InvalidToken 
138 141
     * @throws InvalidRequest 
142
     * @throws IndexEventLogException 
143
     * @throws IllegalAccessException 
144
     * @throws InstantiationException 
145
     * @throws ClassNotFoundException 
139 146
     */
140
    public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
141
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
147
    public void indexAll() throws InvalidRequest, InvalidToken,
148
                NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
142 149
        Date since = null;
143 150
        Date until = null;
144
        index(since, until, force);
151
        index(since, until);
145 152
    }
146 153
    
147 154
    /**
148 155
     * Build the index for the docs which have been modified since the specified date.
149 156
     * @param since
150
     * @param force 
151 157
     * @throws SolrServerException 
152 158
     * @throws ServiceFailure 
153 159
     * @throws NotImplemented 
154 160
     * @throws NotAuthorized 
155 161
     * @throws InvalidToken 
156 162
     * @throws InvalidRequest 
163
     * @throws IndexEventLogException 
164
     * @throws IllegalAccessException 
165
     * @throws InstantiationException 
166
     * @throws ClassNotFoundException 
157 167
     */
158
    public void index(Date since, boolean force) throws InvalidRequest, InvalidToken, 
159
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
168
    public void index(Date since) throws InvalidRequest, InvalidToken, 
169
                    NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
160 170
        Date until = null;
161
        index(since, until, force);
171
        index(since, until);
162 172
    }
163 173
    
164 174
    /**
165 175
     *  Build the index for the docs which have been modified between the specified date.s
166 176
     * @param since
167 177
     * @param until
168
     * @param force
169 178
     * @throws SolrServerException 
170 179
     * @throws ServiceFailure 
171 180
     * @throws NotImplemented 
......
173 182
     * @throws InvalidToken 
174 183
     * @throws InvalidRequest 
175 184
     * @throws FileNotFoundException 
185
     * @throws IndexEventLogException 
186
     * @throws IllegalAccessException 
187
     * @throws InstantiationException 
188
     * @throws ClassNotFoundException 
176 189
     */
177
    public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest, 
178
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
190
    public void index(Date since, Date until) throws SolrServerException, InvalidRequest, 
191
                                                InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException {
192
        Date processedDate = null;
179 193
        List<String> solrIds = null;
180 194
        initSystemMetadataMap();
181 195
        initObjectPathMap();
182
        List[] allMetacatIds = getMetacatIds(since, until);
183
        List<String> otherMetacatIds = allMetacatIds[FIRST];
184
        List<String> resourceMapIds = allMetacatIds[SECOND];
185
        log.info("the metacat ids (exception resource map -----------------------------"+otherMetacatIds);
186
        log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds);
187
        if(!force) {
188
            solrIds = getSolrDocIds();
196
        List[] metacatIds = getMetacatIds(since, until);
197
        List<String> otherMetacatIds = metacatIds[FIRST];
198
        List<String> resourceMapIds =  metacatIds[SECOND];
199
        
200
        //figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds.
201
        Date latestOtherId = null;
202
        if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) {
203
            int size = otherMetacatIds.size();
204
            String id = otherMetacatIds.get(size-1);
205
            SystemMetadata sysmeta = getSystemMetadata(id);
206
            latestOtherId = sysmeta.getDateSysMetadataModified();
189 207
        }
190
        log.info("the solr ids -----------------------------"+solrIds);
191
        index(otherMetacatIds, solrIds, force);
192
        index(resourceMapIds, solrIds, force);
208
        Date latestResourceId = null;
209
        if (resourceMapIds != null && !resourceMapIds.isEmpty()) {
210
            int size = resourceMapIds.size();
211
            String id = resourceMapIds.get(size-1);
212
            SystemMetadata sysmeta = getSystemMetadata(id);
213
            latestResourceId = sysmeta.getDateSysMetadataModified();
214
        }
215
        if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) {
216
            processedDate = latestOtherId;
217
        } else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime()  <= latestResourceId.getTime()) {
218
            processedDate = latestResourceId;
219
        } else if (latestOtherId == null && latestResourceId != null) {
220
            processedDate = latestResourceId;
221
        } else if (latestOtherId != null && latestResourceId == null) {
222
            processedDate = latestOtherId;
223
        }
224
        
225
        //add the failedPids 
226
        List<String> failedPids = EventlogFactory.createIndexEventLog().getFailedPids();
227
        List<String> failedOtherIds = new ArrayList<String>();
228
        List<String> failedResourceMapIds = new ArrayList<String>();
229
        if(failedPids != null) {
230
            for(String id : failedPids) {
231
                SystemMetadata sysmeta = getSystemMetadata(id);
232
                if(sysmeta != null && !sysmeta.getArchived()) {
233
                    ObjectFormatIdentifier formatId =sysmeta.getFormatId();
234
                    if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
235
                        failedResourceMapIds.add(id);
236
                    } else {
237
                        failedOtherIds.add(id);
238
                    }
239
                }
240
            }
241
        }
242
        
243
        if(!failedOtherIds.isEmpty()) {
244
            failedOtherIds.addAll(otherMetacatIds);
245
        } else {
246
            failedOtherIds = otherMetacatIds;
247
        }
248
        
249
        if(!failedResourceMapIds.isEmpty()) {
250
            failedResourceMapIds.addAll(resourceMapIds);
251
        } else {
252
            failedResourceMapIds = resourceMapIds;
253
        }
254
        
255
        log.info("the metacat ids (exception resource map -----------------------------"+failedOtherIds);
256
        log.info("the metacat resroucemap ids -----------------------------"+failedResourceMapIds);
257
        index(failedOtherIds);
258
        index(failedResourceMapIds);
193 259
       
260
        //record the timed index.
261
        if(processedDate != null) {
262
            EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate);
263
        }
194 264
        
195 265
    }
196 266
    
197 267
    /*
198
     * Doing index when it is necessary when compare metacatids and solrids.
268
     * Doing index
199 269
     */
200
    private void index(List<String> metacatIds, List<String>solrIds, boolean force) {
270
    private void index(List<String> metacatIds) {
201 271
        if(metacatIds != null) {
202 272
            for(String metacatId : metacatIds) {
203 273
                if(metacatId != null) {
204
                    boolean buildIndex = true;
205
                    if(!force && solrIds != null && solrIds.contains(metacatId)) {
206
                        //solr already indexs the id and we don't force it to rebuild it, so set the buildIndex to be false
207
                        buildIndex = false;
208
                    }
209
                    if(buildIndex) {
210 274
                        try {
211 275
                            generateIndex(metacatId);
212 276
                        } catch (Exception e) {
213 277
                            log.error("IndexGenerator.index - Metacat Index couldn't generate the index for the id - "+metacatId+" because "+e.getMessage());
214 278
                        }
215 279
                        
216
                    }
280
                   
217 281
                }
218 282
            }
219 283
        }
......
230 294
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing start event :"+e.getMessage());
231 295
        }*/
232 296
        try {
233
            
234
            indexAll();
297
            Date since = EventlogFactory.createIndexEventLog().getLastProcessDate();
298
            index(since);
235 299
        } catch (InvalidRequest e) {
236 300
            // TODO Auto-generated catch block
237 301
            //e.printStackTrace();
......
265 329
            EventlogFactory.createIndexEventLog().write(event);
266 330
        } catch (Exception e) {
267 331
            log.error("IndexGenerator.run - IndexEventLog can't log the timed indexing finish event :"+e.getMessage());
268
        }*/
332
        }*/ catch (ClassNotFoundException e) {
333
            // TODO Auto-generated catch block
334
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
335
        } catch (InstantiationException e) {
336
            // TODO Auto-generated catch block
337
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
338
        } catch (IllegalAccessException e) {
339
            // TODO Auto-generated catch block
340
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
341
        } catch (IndexEventLogException e) {
342
            // TODO Auto-generated catch block
343
            log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
344
        }
269 345
    }
270 346
    
271 347
    /*
......
278 354
    }
279 355
    
280 356
    /*
281
     * Get an array of the list of ids of the metacat. If since and util are null, it will return all of them.
357
     * Get an array of the list of ids of the metacat which has the systemmetadata modification in the range.
358
     * 
359
     * If since and util are null, it will return all of them.
282 360
     * The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map.
283
     * The reason to split them is when we index the resource map, we need the index of the document in the resource map ready.
361
     * The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready.
362
     * The last element in the each list has the latest SystemMetadata modification date. But they are not sorted.
284 363
     */
285 364
    private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, 
286 365
                        InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
......
291 370
        ids[FIRST]= otherIds;
292 371
        ids[SECOND] = resourceMapIds;
293 372
        ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet();
373
        Date otherPreviousDate = null;
374
        Date resourceMapPreviousDate = null;
294 375
        if(metacatIds != null) {
295 376
            for(Identifier identifier : metacatIds) {
296 377
                if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
......
317 398
                            }
318 399
                        }
319 400
                        if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
320
                            resourceMapIds.add(identifier.getValue());
401
                            //for the resource map
402
                            if(!resourceMapIds.isEmpty()) {
403
                                if(sysDate.getTime() > resourceMapPreviousDate.getTime()) {
404
                                    resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one
405
                                } else {
406
                                    int size = resourceMapIds.size();//
407
                                    resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list.
408
                                }
409
                            } else {
410
                                resourceMapIds.add(identifier.getValue());
411
                            }
412
                            resourceMapPreviousDate = sysDate;
321 413
                        } else {
322
                            otherIds.add(identifier.getValue());
323
                        }
324
                    }
325
                }
326
            }
327
        }
328
        /*if(objects != null) {
329
            List<ObjectInfo> objectInfoList = objects.getObjectInfoList();
330
            if(objectInfoList != null) {
331
                for(ObjectInfo info : objectInfoList) {
332
                    if(info != null) {
333
                        Identifier identifier = info.getIdentifier();
334
                        if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) {
335
                            SystemMetadata sysmeta = getSystemMetadata(identifier.getValue());
336
                            if(sysmeta != null && !sysmeta.getArchived()) {
337
                                ObjectFormatIdentifier formatId =sysmeta.getFormatId();
338
                                //System.out.println("the object format id is "+formatId.getValue());
339
                                //System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces);
340
                                if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) {
341
                                    resourceMapIds.add(identifier.getValue());
414
                            if(!otherIds.isEmpty()) {
415
                                if(sysDate.getTime() > otherPreviousDate.getTime()) {
416
                                    otherIds.add(identifier.getValue());
342 417
                                } else {
343
                                    otherIds.add(identifier.getValue());
418
                                    int size = otherIds.size();
419
                                    otherIds.add(size-1, identifier.getValue());
344 420
                                }
421
                            } else {
422
                                otherIds.add(identifier.getValue());
345 423
                            }
346
                            //ids.add(identifier.getValue());
424
                            otherPreviousDate = sysDate;
347 425
                        }
426
                        
348 427
                    }
349 428
                }
350 429
            }
351
        }*/
430
        }
352 431
        return ids;
353 432
    }
354 433
    

Also available in: Unified diff