Revision 8352
Added by Jing Tao about 11 years ago
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGenerator.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright: 2013 Regents of the University of California and the |
|
3 |
* National Center for Ecological Analysis and Synthesis |
|
4 |
* |
|
5 |
* This program is free software; you can redistribute it and/or modify |
|
6 |
* it under the terms of the GNU General Public License as published by |
|
7 |
* the Free Software Foundation; either version 2 of the License, or |
|
8 |
* (at your option) any later version. |
|
9 |
* |
|
10 |
* This program is distributed in the hope that it will be useful, |
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 |
* GNU General Public License for more details. |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License |
|
16 |
* along with this program; if not, write to the Free Software |
|
17 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
18 |
*/ |
|
19 |
package edu.ucsb.nceas.metacat.index; |
|
20 |
|
|
21 |
import java.io.File; |
|
22 |
import java.io.FileInputStream; |
|
23 |
import java.io.FileNotFoundException; |
|
24 |
import java.io.IOException; |
|
25 |
import java.io.InputStream; |
|
26 |
import java.text.SimpleDateFormat; |
|
27 |
import java.util.ArrayList; |
|
28 |
import java.util.Calendar; |
|
29 |
import java.util.Collections; |
|
30 |
import java.util.Date; |
|
31 |
import java.util.List; |
|
32 |
import java.util.TimerTask; |
|
33 |
|
|
34 |
import javax.xml.parsers.ParserConfigurationException; |
|
35 |
import javax.xml.xpath.XPathExpressionException; |
|
36 |
|
|
37 |
import org.apache.commons.io.FileUtils; |
|
38 |
import org.apache.commons.logging.Log; |
|
39 |
import org.apache.commons.logging.LogFactory; |
|
40 |
import org.apache.solr.client.solrj.SolrServerException; |
|
41 |
import org.dataone.configuration.Settings; |
|
42 |
import org.dataone.service.exceptions.InvalidRequest; |
|
43 |
import org.dataone.service.exceptions.InvalidToken; |
|
44 |
import org.dataone.service.exceptions.NotAuthorized; |
|
45 |
import org.dataone.service.exceptions.NotFound; |
|
46 |
import org.dataone.service.exceptions.NotImplemented; |
|
47 |
import org.dataone.service.exceptions.ServiceFailure; |
|
48 |
import org.dataone.service.exceptions.UnsupportedType; |
|
49 |
import org.dataone.service.types.v1.Event; |
|
50 |
import org.dataone.service.types.v1.Identifier; |
|
51 |
import org.dataone.service.types.v1.ObjectFormatIdentifier; |
|
52 |
import org.dataone.service.types.v1.SystemMetadata; |
|
53 |
import org.dspace.foresite.OREParserException; |
|
54 |
import org.xml.sax.SAXException; |
|
55 |
|
|
56 |
import com.hazelcast.core.IMap; |
|
57 |
import com.hazelcast.core.ISet; |
|
58 |
|
|
59 |
import edu.ucsb.nceas.metacat.common.SolrServerFactory; |
|
60 |
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent; |
|
61 |
import edu.ucsb.nceas.metacat.index.event.EventlogFactory; |
|
62 |
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException; |
|
63 |
|
|
64 |
|
|
65 |
/** |
|
66 |
* A class represents the object to generate massive solr indexes. |
|
67 |
* This can happen during an update of Metacat (generating index for all existing documents) |
|
68 |
* or regenerate index for those documents |
|
69 |
* failing to build index during the insert or update. |
|
70 |
* |
|
71 |
* @author tao |
|
72 |
* |
|
73 |
*/ |
|
74 |
public class IndexGenerator extends TimerTask { |
|
75 |
|
|
76 |
private static final int FIRST =0; |
|
77 |
private static final int SECOND =1; |
|
78 |
private static final int THIRD = 2; |
|
79 |
private static final int FOURTH = 3; |
|
80 |
public static final int WAITTIME = 10000; |
|
81 |
public static final int MAXWAITNUMBER = 180; |
|
82 |
private static final String HTTP = "http://"; |
|
83 |
private static final String MNAPPENDIX = "/d1/mn"; |
|
84 |
private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace"; |
|
85 |
public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime"; |
|
86 |
public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts"; |
|
87 |
|
|
88 |
|
|
89 |
private SolrIndex solrIndex = null; |
|
90 |
//private SystemMetadataEventListener systemMetadataListener = null; |
|
91 |
private IMap<Identifier, SystemMetadata> systemMetadataMap; |
|
92 |
private IMap<Identifier, String> objectPathMap; |
|
93 |
private ISet<SystemMetadata> indexQueue; |
|
94 |
private Log log = LogFactory.getLog(IndexGenerator.class); |
|
95 |
//private MNode mNode = null; |
|
96 |
private static List<String> resourceMapNamespaces = null; |
|
97 |
|
|
98 |
/** |
|
99 |
* Constructor |
|
100 |
* @param solrIndex |
|
101 |
* @param systemMetadataListener |
|
102 |
*/ |
|
103 |
public IndexGenerator(SolrIndex solrIndex) { |
|
104 |
this.solrIndex = solrIndex; |
|
105 |
resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME); |
|
106 |
//this.systemMetadataListener = systemMetadataListener; |
|
107 |
//this.mNode = new MNode(buildMNBaseURL()); |
|
108 |
|
|
109 |
} |
|
110 |
|
|
111 |
|
|
112 |
|
|
113 |
/** |
|
114 |
* Build the index for all documents. |
|
115 |
* @throws SolrServerException |
|
116 |
* @throws ServiceFailure |
|
117 |
* @throws NotImplemented |
|
118 |
* @throws NotAuthorized |
|
119 |
* @throws InvalidToken |
|
120 |
* @throws InvalidRequest |
|
121 |
* @throws IndexEventLogException |
|
122 |
* @throws IllegalAccessException |
|
123 |
* @throws InstantiationException |
|
124 |
* @throws ClassNotFoundException |
|
125 |
* @throws ParserConfigurationException |
|
126 |
* @throws SAXException |
|
127 |
* @throws IOException |
|
128 |
* @throws UnsupportedType |
|
129 |
* @throws NotFound |
|
130 |
* @throws XPathExpressionException |
|
131 |
* @throws OREParserException |
|
132 |
*/ |
|
133 |
public void indexAll() throws InvalidRequest, InvalidToken, |
|
134 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException { |
|
135 |
Date since = null; |
|
136 |
Date until = null; |
|
137 |
index(since, until); |
|
138 |
} |
|
139 |
|
|
140 |
/** |
|
141 |
* Build the index for the docs which have been modified since the specified date. |
|
142 |
* @param since |
|
143 |
* @throws SolrServerException |
|
144 |
* @throws ServiceFailure |
|
145 |
* @throws NotImplemented |
|
146 |
* @throws NotAuthorized |
|
147 |
* @throws InvalidToken |
|
148 |
* @throws InvalidRequest |
|
149 |
* @throws IndexEventLogException |
|
150 |
* @throws IllegalAccessException |
|
151 |
* @throws InstantiationException |
|
152 |
* @throws ClassNotFoundException |
|
153 |
* @throws ParserConfigurationException |
|
154 |
* @throws SAXException |
|
155 |
* @throws IOException |
|
156 |
* @throws UnsupportedType |
|
157 |
* @throws NotFound |
|
158 |
* @throws XPathExpressionException |
|
159 |
* @throws OREParserException |
|
160 |
*/ |
|
161 |
public void index(Date since) throws InvalidRequest, InvalidToken, |
|
162 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException { |
|
163 |
Date until = null; |
|
164 |
index(since, until); |
|
165 |
} |
|
166 |
|
|
167 |
/** |
|
168 |
* Build the index for the docs which have been modified between the specified date.s |
|
169 |
* @param since |
|
170 |
* @param until |
|
171 |
* @throws SolrServerException |
|
172 |
* @throws ServiceFailure |
|
173 |
* @throws NotImplemented |
|
174 |
* @throws NotAuthorized |
|
175 |
* @throws InvalidToken |
|
176 |
* @throws InvalidRequest |
|
177 |
* @throws IndexEventLogException |
|
178 |
* @throws IllegalAccessException |
|
179 |
* @throws InstantiationException |
|
180 |
* @throws ClassNotFoundException |
|
181 |
* @throws ParserConfigurationException |
|
182 |
* @throws SAXException |
|
183 |
* @throws IOException |
|
184 |
* @throws UnsupportedType |
|
185 |
* @throws NotFound |
|
186 |
* @throws XPathExpressionException |
|
187 |
* @throws OREParserException |
|
188 |
*/ |
|
189 |
public void index(Date since, Date until) throws SolrServerException, InvalidRequest, |
|
190 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException { |
|
191 |
Date processedDate = null; |
|
192 |
List<String> solrIds = null; |
|
193 |
initSystemMetadataMap(); |
|
194 |
initObjectPathMap(); |
|
195 |
initIndexQueue(); |
|
196 |
List[] metacatIds = getMetacatIds(since, until); |
|
197 |
List<String> otherMetacatIds = metacatIds[FIRST]; |
|
198 |
List<String> resourceMapIds = metacatIds[SECOND]; |
|
199 |
//List<String> otherDeletedMetacatIds = metacatIds[THIRD]; |
|
200 |
//List<String> resourceMapDeletedIds = metacatIds[FOURTH]; |
|
201 |
|
|
202 |
//figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds. |
|
203 |
List<Long> maxCollection = new ArrayList<Long>(); |
|
204 |
Date latestOtherId = null; |
|
205 |
if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) { |
|
206 |
int size = otherMetacatIds.size(); |
|
207 |
String id = otherMetacatIds.get(size-1); |
|
208 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
209 |
latestOtherId = sysmeta.getDateSysMetadataModified(); |
|
210 |
maxCollection.add(new Long(latestOtherId.getTime())); |
|
211 |
} |
|
212 |
|
|
213 |
/*Date latestDeletedOtherIds = null; |
|
214 |
if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) { |
|
215 |
int size = otherDeletedMetacatIds.size(); |
|
216 |
String id = otherDeletedMetacatIds.get(size-1); |
|
217 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
218 |
latestDeletedOtherIds = sysmeta.getDateSysMetadataModified(); |
|
219 |
maxCollection.add(new Long(latestDeletedOtherIds.getTime())); |
|
220 |
}*/ |
|
221 |
|
|
222 |
Date latestResourceId = null; |
|
223 |
if (resourceMapIds != null && !resourceMapIds.isEmpty()) { |
|
224 |
int size = resourceMapIds.size(); |
|
225 |
String id = resourceMapIds.get(size-1); |
|
226 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
227 |
latestResourceId = sysmeta.getDateSysMetadataModified(); |
|
228 |
maxCollection.add(new Long(latestResourceId.getTime())); |
|
229 |
} |
|
230 |
|
|
231 |
/*Date latestDeletedResourceId = null; |
|
232 |
if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) { |
|
233 |
int size = resourceMapDeletedIds.size(); |
|
234 |
String id = resourceMapDeletedIds.get(size-1); |
|
235 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
236 |
latestDeletedResourceId = sysmeta.getDateSysMetadataModified(); |
|
237 |
maxCollection.add(new Long(latestDeletedResourceId.getTime())); |
|
238 |
}*/ |
|
239 |
|
|
240 |
if(!maxCollection.isEmpty()) { |
|
241 |
Long max = Collections.max(maxCollection); |
|
242 |
processedDate = new Date(max.longValue()); |
|
243 |
} |
|
244 |
/*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) { |
|
245 |
processedDate = latestOtherId; |
|
246 |
} else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime() <= latestResourceId.getTime()) { |
|
247 |
processedDate = latestResourceId; |
|
248 |
} else if (latestOtherId == null && latestResourceId != null) { |
|
249 |
processedDate = latestResourceId; |
|
250 |
} else if (latestOtherId != null && latestResourceId == null) { |
|
251 |
processedDate = latestOtherId; |
|
252 |
}*/ |
|
253 |
|
|
254 |
|
|
255 |
//add the failedPids |
|
256 |
List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null); |
|
257 |
List<String> failedOtherIds = new ArrayList<String>(); |
|
258 |
List<String> failedResourceMapIds = new ArrayList<String>(); |
|
259 |
if(failedEvents != null) { |
|
260 |
for(IndexEvent event : failedEvents) { |
|
261 |
String id = event.getIdentifier().getValue(); |
|
262 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
263 |
if(sysmeta != null) { |
|
264 |
ObjectFormatIdentifier formatId =sysmeta.getFormatId(); |
|
265 |
if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) { |
|
266 |
failedResourceMapIds.add(id); |
|
267 |
} else { |
|
268 |
failedOtherIds.add(id); |
|
269 |
} |
|
270 |
} |
|
271 |
} |
|
272 |
} |
|
273 |
//indexFailedIds(failedOtherIds); |
|
274 |
//indexFailedIds(failedResourceMapIds); |
|
275 |
|
|
276 |
index(failedOtherIds); |
|
277 |
index(failedResourceMapIds); |
|
278 |
|
|
279 |
/*if(!failedOtherIds.isEmpty()) { |
|
280 |
failedOtherIds.addAll(otherMetacatIds); |
|
281 |
} else { |
|
282 |
failedOtherIds = otherMetacatIds; |
|
283 |
} |
|
284 |
|
|
285 |
if(!failedResourceMapIds.isEmpty()) { |
|
286 |
failedResourceMapIds.addAll(resourceMapIds); |
|
287 |
} else { |
|
288 |
failedResourceMapIds = resourceMapIds; |
|
289 |
}*/ |
|
290 |
//log.info("the ids in index_event for reindex ( except the resourcemap)=====================================\n "+failedOtherIds); |
|
291 |
//log.info("the resourcemap ids in index_event for reindex =====================================\n "+failedResourceMapIds); |
|
292 |
log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds); |
|
293 |
//logFile(otherMetacatIds, "ids-for-timed-indexing-log"); |
|
294 |
//log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds); |
|
295 |
log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds); |
|
296 |
//logFile(resourceMapIds, "ids-for-timed-indexing-log"); |
|
297 |
//log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds); |
|
298 |
index(otherMetacatIds); |
|
299 |
//removeIndex(otherDeletedMetacatIds); |
|
300 |
index(resourceMapIds); |
|
301 |
//removeIndex(resourceMapDeletedIds); |
|
302 |
|
|
303 |
//record the timed index. |
|
304 |
if(processedDate != null) { |
|
305 |
EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate); |
|
306 |
} |
|
307 |
|
|
308 |
} |
|
309 |
|
|
310 |
/* |
|
311 |
* Write the docids which will be indexed into a file. |
|
312 |
*/ |
|
313 |
/*private void logFile(List<String> ids, String fileName) { |
|
314 |
if(ids != null) { |
|
315 |
try { |
|
316 |
String tempDir = System.getProperty("java.io.tmpdir"); |
|
317 |
log.info("the temp dir is ===================== "+tempDir); |
|
318 |
File idsForIndex = new File(tempDir, fileName); |
|
319 |
if(!idsForIndex.exists()) { |
|
320 |
idsForIndex.createNewFile(); |
|
321 |
} |
|
322 |
|
|
323 |
Date date = Calendar.getInstance().getTime(); |
|
324 |
SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z"); |
|
325 |
String dateStr = format.format(date); |
|
326 |
List<String> dateList = new ArrayList<String>(); |
|
327 |
dateList.add(dateStr); |
|
328 |
Boolean append = true; |
|
329 |
FileUtils.writeLines(idsForIndex, dateList, append);//write time string |
|
330 |
FileUtils.writeLines(idsForIndex, ids, append); |
|
331 |
} catch (Exception e) { |
|
332 |
log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage()); |
|
333 |
} |
|
334 |
|
|
335 |
} |
|
336 |
}*/ |
|
337 |
/* |
|
338 |
* Doing index |
|
339 |
*/ |
|
340 |
private void index(List<String> metacatIds) { |
|
341 |
if(metacatIds != null) { |
|
342 |
for(String metacatId : metacatIds) { |
|
343 |
if(metacatId != null) { |
|
344 |
generateIndex(metacatId); |
|
345 |
} |
|
346 |
} |
|
347 |
} |
|
348 |
} |
|
349 |
|
|
350 |
/* |
|
351 |
* Index those ids which failed in the process (We got them from the EventLog) |
|
352 |
*/ |
|
353 |
/*private void indexFailedIds(List<IndexEvent> events) { |
|
354 |
if(events != null) { |
|
355 |
for(IndexEvent event : events) { |
|
356 |
if(event != null) { |
|
357 |
Identifier identifier = event.getIdentifier(); |
|
358 |
if(identifier != null) { |
|
359 |
String id = identifier.getValue(); |
|
360 |
if(id != null) { |
|
361 |
Event action = event.getAction(); |
|
362 |
//if (action != null && action.equals(Event.CREATE)) { |
|
363 |
try { |
|
364 |
generateIndex(id); |
|
365 |
EventlogFactory.createIndexEventLog().remove(identifier); |
|
366 |
} catch (Exception e) { |
|
367 |
log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage()); |
|
368 |
} |
|
369 |
|
|
370 |
} |
|
371 |
} |
|
372 |
} |
|
373 |
} |
|
374 |
} |
|
375 |
}*/ |
|
376 |
|
|
377 |
public void run() { |
|
378 |
|
|
379 |
try { |
|
380 |
Date since = EventlogFactory.createIndexEventLog().getLastProcessDate(); |
|
381 |
index(since); |
|
382 |
} catch (InvalidRequest e) { |
|
383 |
// TODO Auto-generated catch block |
|
384 |
//e.printStackTrace(); |
|
385 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
386 |
} catch (InvalidToken e) { |
|
387 |
// TODO Auto-generated catch block |
|
388 |
//e.printStackTrace(); |
|
389 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
390 |
} catch (NotAuthorized e) { |
|
391 |
// TODO Auto-generated catch block |
|
392 |
//e.printStackTrace(); |
|
393 |
} catch (NotImplemented e) { |
|
394 |
// TODO Auto-generated catch block |
|
395 |
//e.printStackTrace(); |
|
396 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
397 |
} catch (ServiceFailure e) { |
|
398 |
// TODO Auto-generated catch block |
|
399 |
//e.printStackTrace(); |
|
400 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
401 |
} catch (SolrServerException e) { |
|
402 |
// TODO Auto-generated catch block |
|
403 |
//e.printStackTrace(); |
|
404 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
405 |
} catch (FileNotFoundException e) { |
|
406 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
407 |
} catch (ClassNotFoundException e) { |
|
408 |
// TODO Auto-generated catch block |
|
409 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
410 |
} catch (InstantiationException e) { |
|
411 |
// TODO Auto-generated catch block |
|
412 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
413 |
} catch (IllegalAccessException e) { |
|
414 |
// TODO Auto-generated catch block |
|
415 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
416 |
} catch (IndexEventLogException e) { |
|
417 |
// TODO Auto-generated catch block |
|
418 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
419 |
} catch (XPathExpressionException e) { |
|
420 |
// TODO Auto-generated catch block |
|
421 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
422 |
} catch (NotFound e) { |
|
423 |
// TODO Auto-generated catch block |
|
424 |
e.printStackTrace(); |
|
425 |
} catch (UnsupportedType e) { |
|
426 |
// TODO Auto-generated catch block |
|
427 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
428 |
} catch (IOException e) { |
|
429 |
// TODO Auto-generated catch block |
|
430 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
431 |
} catch (SAXException e) { |
|
432 |
// TODO Auto-generated catch block |
|
433 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
434 |
} catch (ParserConfigurationException e) { |
|
435 |
// TODO Auto-generated catch block |
|
436 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
437 |
} catch (OREParserException e) { |
|
438 |
// TODO Auto-generated catch block |
|
439 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
440 |
} |
|
441 |
} |
|
442 |
|
|
443 |
|
|
444 |
|
|
445 |
/* |
|
446 |
* Get an array of the list of ids of the metacat which has the systemmetadata modification in the range. |
|
447 |
* |
|
448 |
* If since and util are null, it will return all of them. |
|
449 |
* The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map. |
|
450 |
* The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready. |
|
451 |
* The last element in the each list has the latest SystemMetadata modification date. But they are not sorted. |
|
452 |
*/ |
|
453 |
private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, |
|
454 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException { |
|
455 |
String fileName = "ids-from-hazelcast"; |
|
456 |
List<String> resourceMapIds = new ArrayList(); |
|
457 |
//List<String> resourceMapDeletedIds = new ArrayList(); |
|
458 |
List<String> otherIds = new ArrayList(); |
|
459 |
//List<String> otherDeletedIds = new ArrayList(); |
|
460 |
List[] ids = new List[2]; |
|
461 |
ids[FIRST]= otherIds; |
|
462 |
ids[SECOND] = resourceMapIds; |
|
463 |
//ids[THIRD] = otherDeletedIds; |
|
464 |
//ids[FOURTH] = resourceMapDeletedIds; |
|
465 |
ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet(); |
|
466 |
Date otherPreviousDate = null; |
|
467 |
Date otherDeletedPreviousDate = null; |
|
468 |
Date resourceMapPreviousDate = null; |
|
469 |
Date resourceMapDeletedPreviousDate = null; |
|
470 |
if(metacatIds != null) { |
|
471 |
for(Identifier identifier : metacatIds) { |
|
472 |
if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) { |
|
473 |
List<String> idLog = new ArrayList<String>(); |
|
474 |
idLog.add(identifier.getValue()); |
|
475 |
//logFile(idLog, fileName); |
|
476 |
SystemMetadata sysmeta = getSystemMetadata(identifier.getValue()); |
|
477 |
if(sysmeta != null) { |
|
478 |
ObjectFormatIdentifier formatId =sysmeta.getFormatId(); |
|
479 |
//System.out.println("the object format id is "+formatId.getValue()); |
|
480 |
//System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces); |
|
481 |
boolean correctTimeRange = false; |
|
482 |
Date sysDate = sysmeta.getDateSysMetadataModified(); |
|
483 |
if(since == null && until == null) { |
|
484 |
correctTimeRange = true; |
|
485 |
} else if (since != null && until == null) { |
|
486 |
if(sysDate.getTime() > since.getTime()) { |
|
487 |
correctTimeRange = true; |
|
488 |
} |
|
489 |
} else if (since == null && until != null) { |
|
490 |
if(sysDate.getTime() < until.getTime()) { |
|
491 |
correctTimeRange = true; |
|
492 |
} |
|
493 |
} else if (since != null && until != null) { |
|
494 |
if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) { |
|
495 |
correctTimeRange = true; |
|
496 |
} |
|
497 |
} |
|
498 |
if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) { |
|
499 |
//for the resource map |
|
500 |
/*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) { |
|
501 |
//archived ids |
|
502 |
if(!resourceMapDeletedIds.isEmpty()) { |
|
503 |
if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) { |
|
504 |
resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one |
|
505 |
resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one |
|
506 |
} else { |
|
507 |
int size = resourceMapDeletedIds.size();// |
|
508 |
resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list. |
|
509 |
} |
|
510 |
} else { |
|
511 |
resourceMapDeletedIds.add(identifier.getValue()); |
|
512 |
resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate |
|
513 |
} |
|
514 |
} else {*/ |
|
515 |
// for all ids |
|
516 |
if(!resourceMapIds.isEmpty()) { |
|
517 |
if(sysDate.getTime() > resourceMapPreviousDate.getTime()) { |
|
518 |
resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one |
|
519 |
resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one |
|
520 |
} else { |
|
521 |
int size = resourceMapIds.size();// |
|
522 |
resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list. |
|
523 |
} |
|
524 |
} else { |
|
525 |
resourceMapIds.add(identifier.getValue()); |
|
526 |
resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate |
|
527 |
} |
|
528 |
//} |
|
529 |
} else if (correctTimeRange) { |
|
530 |
/*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) { |
|
531 |
//for the archived ids |
|
532 |
if(!otherDeletedIds.isEmpty()) { |
|
533 |
if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) { |
|
534 |
otherDeletedIds.add(identifier.getValue()); |
|
535 |
otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one |
|
536 |
} else { |
|
537 |
int size = otherDeletedIds.size(); |
|
538 |
otherDeletedIds.add(size-1, identifier.getValue()); |
|
539 |
} |
|
540 |
} else { |
|
541 |
otherDeletedIds.add(identifier.getValue()); |
|
542 |
otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate |
|
543 |
} |
|
544 |
} else {*/ |
|
545 |
//for all ids |
|
546 |
if(!otherIds.isEmpty()) { |
|
547 |
if(sysDate.getTime() > otherPreviousDate.getTime()) { |
|
548 |
otherIds.add(identifier.getValue()); |
|
549 |
otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one |
|
550 |
} else { |
|
551 |
int size = otherIds.size(); |
|
552 |
otherIds.add(size-1, identifier.getValue()); |
|
553 |
} |
|
554 |
} else { |
|
555 |
otherIds.add(identifier.getValue()); |
|
556 |
otherPreviousDate = sysDate;//init otherPreviousDate |
|
557 |
} |
|
558 |
//} |
|
559 |
} |
|
560 |
|
|
561 |
} |
|
562 |
} |
|
563 |
} |
|
564 |
} |
|
565 |
return ids; |
|
566 |
} |
|
567 |
|
|
568 |
/* |
|
569 |
* If the specified ObjectFormatIdentifier is a resrouce map namespace. |
|
570 |
*/ |
|
571 |
public static boolean isResourceMap(ObjectFormatIdentifier formatId) { |
|
572 |
boolean isResourceMap = false; |
|
573 |
if(formatId != null && resourceMapNamespaces != null) { |
|
574 |
for(String namespace : resourceMapNamespaces) { |
|
575 |
if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) { |
|
576 |
isResourceMap = true; |
|
577 |
break; |
|
578 |
} |
|
579 |
} |
|
580 |
} |
|
581 |
return isResourceMap; |
|
582 |
} |
|
583 |
|
|
584 |
|
|
585 |
|
|
586 |
/* |
|
587 |
* Generate index for the id. |
|
588 |
*/ |
|
589 |
private void generateIndex(String id) { |
|
590 |
//if id is null and sysmeta will be null. If sysmeta is null, it will be caught in solrIndex.update |
|
591 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
592 |
Identifier pid = new Identifier(); |
|
593 |
pid.setValue(id); |
|
594 |
solrIndex.update(pid, sysmeta); |
|
595 |
|
|
596 |
} |
|
597 |
|
|
598 |
/* |
|
599 |
* Remove the solr index for the list of ids |
|
600 |
*/ |
|
601 |
/*private void removeIndex(List<String> ids) { |
|
602 |
if(ids!= null) { |
|
603 |
for(String id :ids) { |
|
604 |
try { |
|
605 |
removeIndex(id); |
|
606 |
} catch (Exception e) { |
|
607 |
IndexEvent event = new IndexEvent(); |
|
608 |
Identifier pid = new Identifier(); |
|
609 |
pid.setValue(id); |
|
610 |
event.setIdentifier(pid); |
|
611 |
event.setDate(Calendar.getInstance().getTime()); |
|
612 |
event.setAction(Event.DELETE); |
|
613 |
String error = "IndexGenerator.index - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage(); |
|
614 |
event.setDescription(error); |
|
615 |
try { |
|
616 |
EventlogFactory.createIndexEventLog().write(event); |
|
617 |
} catch (Exception ee) { |
|
618 |
log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage()); |
|
619 |
} |
|
620 |
log.error(error); |
|
621 |
} |
|
622 |
|
|
623 |
} |
|
624 |
} |
|
625 |
}*/ |
|
626 |
|
|
627 |
/* |
|
628 |
* Remove the index for the id |
|
629 |
*/ |
|
630 |
/*private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException, OREParserException { |
|
631 |
if(id != null) { |
|
632 |
//solrIndex.remove(id); |
|
633 |
} |
|
634 |
}*/ |
|
635 |
|
|
636 |
/* |
|
637 |
* Initialize the system metadata map |
|
638 |
*/ |
|
639 |
private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{ |
|
640 |
int times = 0; |
|
641 |
if(systemMetadataMap == null) { |
|
642 |
systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap(); |
|
643 |
} |
|
644 |
} |
|
645 |
|
|
646 |
/* |
|
647 |
* We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service |
|
648 |
*/ |
|
649 |
private void initObjectPathMap() throws FileNotFoundException, ServiceFailure { |
|
650 |
if(objectPathMap == null) { |
|
651 |
objectPathMap = DistributedMapsFactory.getObjectPathMap(); |
|
652 |
} |
|
653 |
} |
|
654 |
|
|
655 |
|
|
656 |
|
|
657 |
/* |
|
658 |
* Initialize the index queue |
|
659 |
*/ |
|
660 |
private void initIndexQueue() throws FileNotFoundException, ServiceFailure { |
|
661 |
if(indexQueue == null) { |
|
662 |
indexQueue = DistributedMapsFactory.getIndexQueue(); |
|
663 |
} |
|
664 |
} |
|
665 |
/** |
|
666 |
* Get an InputStream as the data object for the specific pid. |
|
667 |
* @param pid |
|
668 |
* @return |
|
669 |
* @throws FileNotFoundException |
|
670 |
*/ |
|
671 |
private InputStream getDataObject(String pid) throws FileNotFoundException { |
|
672 |
Identifier identifier = new Identifier(); |
|
673 |
identifier.setValue(pid); |
|
674 |
String objectPath = objectPathMap.get(identifier); |
|
675 |
InputStream data = null; |
|
676 |
data = new FileInputStream(objectPath); |
|
677 |
return data; |
|
678 |
|
|
679 |
} |
|
680 |
|
|
681 |
/** |
|
682 |
* Get the SystemMetadata for the specified id from the distributed Map. |
|
683 |
* The null maybe is returned if there is no system metadata found. |
|
684 |
* @param id the specified id. |
|
685 |
* @return the SystemMetadata associated with the id. |
|
686 |
*/ |
|
687 |
private SystemMetadata getSystemMetadata(String id) { |
|
688 |
SystemMetadata metadata = null; |
|
689 |
if(systemMetadataMap != null && id != null) { |
|
690 |
Identifier identifier = new Identifier(); |
|
691 |
identifier.setValue(id); |
|
692 |
metadata = systemMetadataMap.get(identifier); |
|
693 |
} |
|
694 |
return metadata; |
|
695 |
} |
|
696 |
|
|
697 |
/** |
|
698 |
* Get the obsoletes chain of the specified id. The returned list doesn't include |
|
699 |
* the specified id itself. The newer version has the lower index number in the list. |
|
700 |
* Empty list will be returned if there is no document to be obsoleted by this id. |
|
701 |
* @param id |
|
702 |
* @return |
|
703 |
*/ |
|
704 |
private List<String> getObsoletes(String id) { |
|
705 |
List<String> obsoletes = new ArrayList<String>(); |
|
706 |
while (id != null) { |
|
707 |
SystemMetadata metadata = getSystemMetadata(id); |
|
708 |
id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code. |
|
709 |
if(metadata != null) { |
|
710 |
Identifier identifier = metadata.getObsoletes(); |
|
711 |
if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) { |
|
712 |
obsoletes.add(identifier.getValue()); |
|
713 |
id = identifier.getValue(); |
|
714 |
} |
|
715 |
} |
|
716 |
} |
|
717 |
return obsoletes; |
|
718 |
} |
|
719 |
|
|
720 |
/** |
|
721 |
* Overwrite and do nothing |
|
722 |
*/ |
|
723 |
public boolean cancel() { |
|
724 |
return true; |
|
725 |
} |
|
726 |
|
|
727 |
} |
metacat-index/src/test/java/edu/ucsb/nceas/metacat/index/IndexGeneratorIT.java | ||
---|---|---|
36 | 36 |
SolrIndex solrIndex = generateSolrIndex(); |
37 | 37 |
SystemMetadataEventListener systeMetaListener = new SystemMetadataEventListener(solrIndex); |
38 | 38 |
systeMetaListener.start(); |
39 |
IndexGenerator generator = new IndexGenerator(solrIndex);
|
|
39 |
IndexGeneratorTimerTask generator = new IndexGeneratorTimerTask(solrIndex);
|
|
40 | 40 |
generator.indexAll(); |
41 | 41 |
String result = SolrIndexIT.doQuery(solrIndex.getSolrServer()); |
42 | 42 |
systeMetaListener.stop(); |
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/IndexGeneratorTimerTask.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright: 2013 Regents of the University of California and the |
|
3 |
* National Center for Ecological Analysis and Synthesis |
|
4 |
* |
|
5 |
* This program is free software; you can redistribute it and/or modify |
|
6 |
* it under the terms of the GNU General Public License as published by |
|
7 |
* the Free Software Foundation; either version 2 of the License, or |
|
8 |
* (at your option) any later version. |
|
9 |
* |
|
10 |
* This program is distributed in the hope that it will be useful, |
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 |
* GNU General Public License for more details. |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License |
|
16 |
* along with this program; if not, write to the Free Software |
|
17 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
18 |
*/ |
|
19 |
package edu.ucsb.nceas.metacat.index; |
|
20 |
|
|
21 |
import java.io.File; |
|
22 |
import java.io.FileInputStream; |
|
23 |
import java.io.FileNotFoundException; |
|
24 |
import java.io.IOException; |
|
25 |
import java.io.InputStream; |
|
26 |
import java.text.SimpleDateFormat; |
|
27 |
import java.util.ArrayList; |
|
28 |
import java.util.Calendar; |
|
29 |
import java.util.Collections; |
|
30 |
import java.util.Date; |
|
31 |
import java.util.List; |
|
32 |
import java.util.TimerTask; |
|
33 |
|
|
34 |
import javax.xml.parsers.ParserConfigurationException; |
|
35 |
import javax.xml.xpath.XPathExpressionException; |
|
36 |
|
|
37 |
import org.apache.commons.io.FileUtils; |
|
38 |
import org.apache.commons.logging.Log; |
|
39 |
import org.apache.commons.logging.LogFactory; |
|
40 |
import org.apache.solr.client.solrj.SolrServerException; |
|
41 |
import org.dataone.configuration.Settings; |
|
42 |
import org.dataone.service.exceptions.InvalidRequest; |
|
43 |
import org.dataone.service.exceptions.InvalidToken; |
|
44 |
import org.dataone.service.exceptions.NotAuthorized; |
|
45 |
import org.dataone.service.exceptions.NotFound; |
|
46 |
import org.dataone.service.exceptions.NotImplemented; |
|
47 |
import org.dataone.service.exceptions.ServiceFailure; |
|
48 |
import org.dataone.service.exceptions.UnsupportedType; |
|
49 |
import org.dataone.service.types.v1.Event; |
|
50 |
import org.dataone.service.types.v1.Identifier; |
|
51 |
import org.dataone.service.types.v1.ObjectFormatIdentifier; |
|
52 |
import org.dataone.service.types.v1.SystemMetadata; |
|
53 |
import org.dspace.foresite.OREParserException; |
|
54 |
import org.xml.sax.SAXException; |
|
55 |
|
|
56 |
import com.hazelcast.core.IMap; |
|
57 |
import com.hazelcast.core.ISet; |
|
58 |
|
|
59 |
import edu.ucsb.nceas.metacat.common.SolrServerFactory; |
|
60 |
import edu.ucsb.nceas.metacat.common.index.event.IndexEvent; |
|
61 |
import edu.ucsb.nceas.metacat.index.event.EventlogFactory; |
|
62 |
import edu.ucsb.nceas.metacat.index.event.IndexEventLogException; |
|
63 |
|
|
64 |
|
|
65 |
/** |
|
66 |
* A class represents the object to generate massive solr indexes. |
|
67 |
* This can happen during an update of Metacat (generating index for all existing documents) |
|
68 |
* or regenerate index for those documents |
|
69 |
* failing to build index during the insert or update. |
|
70 |
* |
|
71 |
* @author tao |
|
72 |
* |
|
73 |
*/ |
|
74 |
public class IndexGeneratorTimerTask extends TimerTask { |
|
75 |
|
|
76 |
private static final int FIRST =0; |
|
77 |
private static final int SECOND =1; |
|
78 |
private static final int THIRD = 2; |
|
79 |
private static final int FOURTH = 3; |
|
80 |
public static final int WAITTIME = 10000; |
|
81 |
public static final int MAXWAITNUMBER = 180; |
|
82 |
private static final String HTTP = "http://"; |
|
83 |
private static final String MNAPPENDIX = "/d1/mn"; |
|
84 |
private static final String RESOURCEMAPPROPERYNAME = "index.resourcemap.namespace"; |
|
85 |
public static final String WAITIMEPOPERTYNAME = "index.regenerate.start.waitingtime"; |
|
86 |
public static final String MAXATTEMPTSPROPERTYNAME = "index.regenerate.start.maxattempts"; |
|
87 |
|
|
88 |
|
|
89 |
private SolrIndex solrIndex = null; |
|
90 |
//private SystemMetadataEventListener systemMetadataListener = null; |
|
91 |
private IMap<Identifier, SystemMetadata> systemMetadataMap; |
|
92 |
private IMap<Identifier, String> objectPathMap; |
|
93 |
private ISet<SystemMetadata> indexQueue; |
|
94 |
private Log log = LogFactory.getLog(IndexGeneratorTimerTask.class); |
|
95 |
//private MNode mNode = null; |
|
96 |
private static List<String> resourceMapNamespaces = null; |
|
97 |
|
|
98 |
/** |
|
99 |
* Constructor |
|
100 |
* @param solrIndex |
|
101 |
* @param systemMetadataListener |
|
102 |
*/ |
|
103 |
public IndexGeneratorTimerTask(SolrIndex solrIndex) { |
|
104 |
this.solrIndex = solrIndex; |
|
105 |
resourceMapNamespaces = Settings.getConfiguration().getList(RESOURCEMAPPROPERYNAME); |
|
106 |
//this.systemMetadataListener = systemMetadataListener; |
|
107 |
//this.mNode = new MNode(buildMNBaseURL()); |
|
108 |
|
|
109 |
} |
|
110 |
|
|
111 |
|
|
112 |
|
|
113 |
/** |
|
114 |
* Build the index for all documents. |
|
115 |
* @throws SolrServerException |
|
116 |
* @throws ServiceFailure |
|
117 |
* @throws NotImplemented |
|
118 |
* @throws NotAuthorized |
|
119 |
* @throws InvalidToken |
|
120 |
* @throws InvalidRequest |
|
121 |
* @throws IndexEventLogException |
|
122 |
* @throws IllegalAccessException |
|
123 |
* @throws InstantiationException |
|
124 |
* @throws ClassNotFoundException |
|
125 |
* @throws ParserConfigurationException |
|
126 |
* @throws SAXException |
|
127 |
* @throws IOException |
|
128 |
* @throws UnsupportedType |
|
129 |
* @throws NotFound |
|
130 |
* @throws XPathExpressionException |
|
131 |
* @throws OREParserException |
|
132 |
*/ |
|
133 |
public void indexAll() throws InvalidRequest, InvalidToken, |
|
134 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException { |
|
135 |
Date since = null; |
|
136 |
Date until = null; |
|
137 |
index(since, until); |
|
138 |
} |
|
139 |
|
|
140 |
/** |
|
141 |
* Build the index for the docs which have been modified since the specified date. |
|
142 |
* @param since |
|
143 |
* @throws SolrServerException |
|
144 |
* @throws ServiceFailure |
|
145 |
* @throws NotImplemented |
|
146 |
* @throws NotAuthorized |
|
147 |
* @throws InvalidToken |
|
148 |
* @throws InvalidRequest |
|
149 |
* @throws IndexEventLogException |
|
150 |
* @throws IllegalAccessException |
|
151 |
* @throws InstantiationException |
|
152 |
* @throws ClassNotFoundException |
|
153 |
* @throws ParserConfigurationException |
|
154 |
* @throws SAXException |
|
155 |
* @throws IOException |
|
156 |
* @throws UnsupportedType |
|
157 |
* @throws NotFound |
|
158 |
* @throws XPathExpressionException |
|
159 |
* @throws OREParserException |
|
160 |
*/ |
|
161 |
public void index(Date since) throws InvalidRequest, InvalidToken, |
|
162 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException { |
|
163 |
Date until = null; |
|
164 |
index(since, until); |
|
165 |
} |
|
166 |
|
|
167 |
/** |
|
168 |
* Build the index for the docs which have been modified between the specified date.s |
|
169 |
* @param since |
|
170 |
* @param until |
|
171 |
* @throws SolrServerException |
|
172 |
* @throws ServiceFailure |
|
173 |
* @throws NotImplemented |
|
174 |
* @throws NotAuthorized |
|
175 |
* @throws InvalidToken |
|
176 |
* @throws InvalidRequest |
|
177 |
* @throws IndexEventLogException |
|
178 |
* @throws IllegalAccessException |
|
179 |
* @throws InstantiationException |
|
180 |
* @throws ClassNotFoundException |
|
181 |
* @throws ParserConfigurationException |
|
182 |
* @throws SAXException |
|
183 |
* @throws IOException |
|
184 |
* @throws UnsupportedType |
|
185 |
* @throws NotFound |
|
186 |
* @throws XPathExpressionException |
|
187 |
* @throws OREParserException |
|
188 |
*/ |
|
189 |
public void index(Date since, Date until) throws SolrServerException, InvalidRequest, |
|
190 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, ClassNotFoundException, InstantiationException, IllegalAccessException, IndexEventLogException, XPathExpressionException, NotFound, UnsupportedType, IOException, SAXException, ParserConfigurationException, OREParserException { |
|
191 |
Date processedDate = null; |
|
192 |
List<String> solrIds = null; |
|
193 |
initSystemMetadataMap(); |
|
194 |
initObjectPathMap(); |
|
195 |
initIndexQueue(); |
|
196 |
List[] metacatIds = getMetacatIds(since, until); |
|
197 |
List<String> otherMetacatIds = metacatIds[FIRST]; |
|
198 |
List<String> resourceMapIds = metacatIds[SECOND]; |
|
199 |
//List<String> otherDeletedMetacatIds = metacatIds[THIRD]; |
|
200 |
//List<String> resourceMapDeletedIds = metacatIds[FOURTH]; |
|
201 |
|
|
202 |
//figure out the procesedDate by comparing the last element of otherMetacatIds and resourceMapIds. |
|
203 |
List<Long> maxCollection = new ArrayList<Long>(); |
|
204 |
Date latestOtherId = null; |
|
205 |
if (otherMetacatIds != null && !otherMetacatIds.isEmpty()) { |
|
206 |
int size = otherMetacatIds.size(); |
|
207 |
String id = otherMetacatIds.get(size-1); |
|
208 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
209 |
latestOtherId = sysmeta.getDateSysMetadataModified(); |
|
210 |
maxCollection.add(new Long(latestOtherId.getTime())); |
|
211 |
} |
|
212 |
|
|
213 |
/*Date latestDeletedOtherIds = null; |
|
214 |
if (otherDeletedMetacatIds != null && !otherDeletedMetacatIds.isEmpty()) { |
|
215 |
int size = otherDeletedMetacatIds.size(); |
|
216 |
String id = otherDeletedMetacatIds.get(size-1); |
|
217 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
218 |
latestDeletedOtherIds = sysmeta.getDateSysMetadataModified(); |
|
219 |
maxCollection.add(new Long(latestDeletedOtherIds.getTime())); |
|
220 |
}*/ |
|
221 |
|
|
222 |
Date latestResourceId = null; |
|
223 |
if (resourceMapIds != null && !resourceMapIds.isEmpty()) { |
|
224 |
int size = resourceMapIds.size(); |
|
225 |
String id = resourceMapIds.get(size-1); |
|
226 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
227 |
latestResourceId = sysmeta.getDateSysMetadataModified(); |
|
228 |
maxCollection.add(new Long(latestResourceId.getTime())); |
|
229 |
} |
|
230 |
|
|
231 |
/*Date latestDeletedResourceId = null; |
|
232 |
if(resourceMapDeletedIds != null && !resourceMapDeletedIds.isEmpty()) { |
|
233 |
int size = resourceMapDeletedIds.size(); |
|
234 |
String id = resourceMapDeletedIds.get(size-1); |
|
235 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
236 |
latestDeletedResourceId = sysmeta.getDateSysMetadataModified(); |
|
237 |
maxCollection.add(new Long(latestDeletedResourceId.getTime())); |
|
238 |
}*/ |
|
239 |
|
|
240 |
if(!maxCollection.isEmpty()) { |
|
241 |
Long max = Collections.max(maxCollection); |
|
242 |
processedDate = new Date(max.longValue()); |
|
243 |
} |
|
244 |
/*if(latestOtherId != null && latestResourceId != null && latestOtherId.getTime() > latestResourceId.getTime()) { |
|
245 |
processedDate = latestOtherId; |
|
246 |
} else if (latestOtherId != null && latestResourceId != null && latestOtherId.getTime() <= latestResourceId.getTime()) { |
|
247 |
processedDate = latestResourceId; |
|
248 |
} else if (latestOtherId == null && latestResourceId != null) { |
|
249 |
processedDate = latestResourceId; |
|
250 |
} else if (latestOtherId != null && latestResourceId == null) { |
|
251 |
processedDate = latestOtherId; |
|
252 |
}*/ |
|
253 |
|
|
254 |
|
|
255 |
//add the failedPids |
|
256 |
List<IndexEvent> failedEvents = EventlogFactory.createIndexEventLog().getEvents(null, null, null, null); |
|
257 |
List<String> failedOtherIds = new ArrayList<String>(); |
|
258 |
List<String> failedResourceMapIds = new ArrayList<String>(); |
|
259 |
if(failedEvents != null) { |
|
260 |
for(IndexEvent event : failedEvents) { |
|
261 |
String id = event.getIdentifier().getValue(); |
|
262 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
263 |
if(sysmeta != null) { |
|
264 |
ObjectFormatIdentifier formatId =sysmeta.getFormatId(); |
|
265 |
if(formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) { |
|
266 |
failedResourceMapIds.add(id); |
|
267 |
} else { |
|
268 |
failedOtherIds.add(id); |
|
269 |
} |
|
270 |
} |
|
271 |
} |
|
272 |
} |
|
273 |
//indexFailedIds(failedOtherIds); |
|
274 |
//indexFailedIds(failedResourceMapIds); |
|
275 |
|
|
276 |
index(failedOtherIds); |
|
277 |
index(failedResourceMapIds); |
|
278 |
|
|
279 |
/*if(!failedOtherIds.isEmpty()) { |
|
280 |
failedOtherIds.addAll(otherMetacatIds); |
|
281 |
} else { |
|
282 |
failedOtherIds = otherMetacatIds; |
|
283 |
} |
|
284 |
|
|
285 |
if(!failedResourceMapIds.isEmpty()) { |
|
286 |
failedResourceMapIds.addAll(resourceMapIds); |
|
287 |
} else { |
|
288 |
failedResourceMapIds = resourceMapIds; |
|
289 |
}*/ |
|
290 |
//log.info("the ids in index_event for reindex ( except the resourcemap)=====================================\n "+failedOtherIds); |
|
291 |
//log.info("the resourcemap ids in index_event for reindex =====================================\n "+failedResourceMapIds); |
|
292 |
log.info("the metacat ids (except the resource map ids)-----------------------------"+otherMetacatIds); |
|
293 |
//logFile(otherMetacatIds, "ids-for-timed-indexing-log"); |
|
294 |
//log.info("the deleted metacat ids (except the resource map ids)-----------------------------"+otherDeletedMetacatIds); |
|
295 |
log.info("the metacat resroucemap ids -----------------------------"+resourceMapIds); |
|
296 |
//logFile(resourceMapIds, "ids-for-timed-indexing-log"); |
|
297 |
//log.info("the deleted metacat resroucemap ids -----------------------------"+resourceMapDeletedIds); |
|
298 |
index(otherMetacatIds); |
|
299 |
//removeIndex(otherDeletedMetacatIds); |
|
300 |
index(resourceMapIds); |
|
301 |
//removeIndex(resourceMapDeletedIds); |
|
302 |
|
|
303 |
//record the timed index. |
|
304 |
if(processedDate != null) { |
|
305 |
EventlogFactory.createIndexEventLog().setLastProcessDate(processedDate); |
|
306 |
} |
|
307 |
|
|
308 |
} |
|
309 |
|
|
310 |
/* |
|
311 |
* Write the docids which will be indexed into a file. |
|
312 |
*/ |
|
313 |
/*private void logFile(List<String> ids, String fileName) { |
|
314 |
if(ids != null) { |
|
315 |
try { |
|
316 |
String tempDir = System.getProperty("java.io.tmpdir"); |
|
317 |
log.info("the temp dir is ===================== "+tempDir); |
|
318 |
File idsForIndex = new File(tempDir, fileName); |
|
319 |
if(!idsForIndex.exists()) { |
|
320 |
idsForIndex.createNewFile(); |
|
321 |
} |
|
322 |
|
|
323 |
Date date = Calendar.getInstance().getTime(); |
|
324 |
SimpleDateFormat format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z"); |
|
325 |
String dateStr = format.format(date); |
|
326 |
List<String> dateList = new ArrayList<String>(); |
|
327 |
dateList.add(dateStr); |
|
328 |
Boolean append = true; |
|
329 |
FileUtils.writeLines(idsForIndex, dateList, append);//write time string |
|
330 |
FileUtils.writeLines(idsForIndex, ids, append); |
|
331 |
} catch (Exception e) { |
|
332 |
log.warn("IndexGenerator.logFile - Couldn't log the ids which will be indexed since - "+e.getMessage()); |
|
333 |
} |
|
334 |
|
|
335 |
} |
|
336 |
}*/ |
|
337 |
/* |
|
338 |
* Doing index |
|
339 |
*/ |
|
340 |
private void index(List<String> metacatIds) { |
|
341 |
if(metacatIds != null) { |
|
342 |
for(String metacatId : metacatIds) { |
|
343 |
if(metacatId != null) { |
|
344 |
generateIndex(metacatId); |
|
345 |
} |
|
346 |
} |
|
347 |
} |
|
348 |
} |
|
349 |
|
|
350 |
/* |
|
351 |
* Index those ids which failed in the process (We got them from the EventLog) |
|
352 |
*/ |
|
353 |
/*private void indexFailedIds(List<IndexEvent> events) { |
|
354 |
if(events != null) { |
|
355 |
for(IndexEvent event : events) { |
|
356 |
if(event != null) { |
|
357 |
Identifier identifier = event.getIdentifier(); |
|
358 |
if(identifier != null) { |
|
359 |
String id = identifier.getValue(); |
|
360 |
if(id != null) { |
|
361 |
Event action = event.getAction(); |
|
362 |
//if (action != null && action.equals(Event.CREATE)) { |
|
363 |
try { |
|
364 |
generateIndex(id); |
|
365 |
EventlogFactory.createIndexEventLog().remove(identifier); |
|
366 |
} catch (Exception e) { |
|
367 |
log.error("IndexGenerator.indexFailedIds - Metacat Index couldn't generate the index for the id - "+id+" because "+e.getMessage()); |
|
368 |
} |
|
369 |
|
|
370 |
} |
|
371 |
} |
|
372 |
} |
|
373 |
} |
|
374 |
} |
|
375 |
}*/ |
|
376 |
|
|
377 |
public void run() { |
|
378 |
|
|
379 |
try { |
|
380 |
Date since = EventlogFactory.createIndexEventLog().getLastProcessDate(); |
|
381 |
index(since); |
|
382 |
} catch (InvalidRequest e) { |
|
383 |
// TODO Auto-generated catch block |
|
384 |
//e.printStackTrace(); |
|
385 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
386 |
} catch (InvalidToken e) { |
|
387 |
// TODO Auto-generated catch block |
|
388 |
//e.printStackTrace(); |
|
389 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
390 |
} catch (NotAuthorized e) { |
|
391 |
// TODO Auto-generated catch block |
|
392 |
//e.printStackTrace(); |
|
393 |
} catch (NotImplemented e) { |
|
394 |
// TODO Auto-generated catch block |
|
395 |
//e.printStackTrace(); |
|
396 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
397 |
} catch (ServiceFailure e) { |
|
398 |
// TODO Auto-generated catch block |
|
399 |
//e.printStackTrace(); |
|
400 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
401 |
} catch (SolrServerException e) { |
|
402 |
// TODO Auto-generated catch block |
|
403 |
//e.printStackTrace(); |
|
404 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
405 |
} catch (FileNotFoundException e) { |
|
406 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
407 |
} catch (ClassNotFoundException e) { |
|
408 |
// TODO Auto-generated catch block |
|
409 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
410 |
} catch (InstantiationException e) { |
|
411 |
// TODO Auto-generated catch block |
|
412 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
413 |
} catch (IllegalAccessException e) { |
|
414 |
// TODO Auto-generated catch block |
|
415 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
416 |
} catch (IndexEventLogException e) { |
|
417 |
// TODO Auto-generated catch block |
|
418 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
419 |
} catch (XPathExpressionException e) { |
|
420 |
// TODO Auto-generated catch block |
|
421 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
422 |
} catch (NotFound e) { |
|
423 |
// TODO Auto-generated catch block |
|
424 |
e.printStackTrace(); |
|
425 |
} catch (UnsupportedType e) { |
|
426 |
// TODO Auto-generated catch block |
|
427 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
428 |
} catch (IOException e) { |
|
429 |
// TODO Auto-generated catch block |
|
430 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
431 |
} catch (SAXException e) { |
|
432 |
// TODO Auto-generated catch block |
|
433 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
434 |
} catch (ParserConfigurationException e) { |
|
435 |
// TODO Auto-generated catch block |
|
436 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
437 |
} catch (OREParserException e) { |
|
438 |
// TODO Auto-generated catch block |
|
439 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage()); |
|
440 |
} |
|
441 |
} |
|
442 |
|
|
443 |
|
|
444 |
|
|
445 |
/* |
|
446 |
* Get an array of the list of ids of the metacat which has the systemmetadata modification in the range. |
|
447 |
* |
|
448 |
* If since and util are null, it will return all of them. |
|
449 |
* The first element of the list is the ids except the resource map. The second elements of the list is the ids of the resource map. |
|
450 |
* The reason to split them is when we index the resource map, we need the index of the documents in the resource map ready. |
|
451 |
* The last element in the each list has the latest SystemMetadata modification date. But they are not sorted. |
|
452 |
*/ |
|
453 |
private List[] getMetacatIds(Date since, Date until) throws InvalidRequest, |
|
454 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException { |
|
455 |
String fileName = "ids-from-hazelcast"; |
|
456 |
List<String> resourceMapIds = new ArrayList(); |
|
457 |
//List<String> resourceMapDeletedIds = new ArrayList(); |
|
458 |
List<String> otherIds = new ArrayList(); |
|
459 |
//List<String> otherDeletedIds = new ArrayList(); |
|
460 |
List[] ids = new List[2]; |
|
461 |
ids[FIRST]= otherIds; |
|
462 |
ids[SECOND] = resourceMapIds; |
|
463 |
//ids[THIRD] = otherDeletedIds; |
|
464 |
//ids[FOURTH] = resourceMapDeletedIds; |
|
465 |
ISet<Identifier> metacatIds = DistributedMapsFactory.getIdentifiersSet(); |
|
466 |
Date otherPreviousDate = null; |
|
467 |
Date otherDeletedPreviousDate = null; |
|
468 |
Date resourceMapPreviousDate = null; |
|
469 |
Date resourceMapDeletedPreviousDate = null; |
|
470 |
if(metacatIds != null) { |
|
471 |
for(Identifier identifier : metacatIds) { |
|
472 |
if(identifier != null && identifier.getValue() != null && !identifier.getValue().equals("")) { |
|
473 |
List<String> idLog = new ArrayList<String>(); |
|
474 |
idLog.add(identifier.getValue()); |
|
475 |
//logFile(idLog, fileName); |
|
476 |
SystemMetadata sysmeta = getSystemMetadata(identifier.getValue()); |
|
477 |
if(sysmeta != null) { |
|
478 |
ObjectFormatIdentifier formatId =sysmeta.getFormatId(); |
|
479 |
//System.out.println("the object format id is "+formatId.getValue()); |
|
480 |
//System.out.println("the ============ resourcMapNamespaces"+resourceMapNamespaces); |
|
481 |
boolean correctTimeRange = false; |
|
482 |
Date sysDate = sysmeta.getDateSysMetadataModified(); |
|
483 |
if(since == null && until == null) { |
|
484 |
correctTimeRange = true; |
|
485 |
} else if (since != null && until == null) { |
|
486 |
if(sysDate.getTime() > since.getTime()) { |
|
487 |
correctTimeRange = true; |
|
488 |
} |
|
489 |
} else if (since == null && until != null) { |
|
490 |
if(sysDate.getTime() < until.getTime()) { |
|
491 |
correctTimeRange = true; |
|
492 |
} |
|
493 |
} else if (since != null && until != null) { |
|
494 |
if(sysDate.getTime() > since.getTime() && sysDate.getTime() < until.getTime()) { |
|
495 |
correctTimeRange = true; |
|
496 |
} |
|
497 |
} |
|
498 |
if(correctTimeRange && formatId != null && formatId.getValue() != null && resourceMapNamespaces != null && isResourceMap(formatId)) { |
|
499 |
//for the resource map |
|
500 |
/*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) { |
|
501 |
//archived ids |
|
502 |
if(!resourceMapDeletedIds.isEmpty()) { |
|
503 |
if(sysDate.getTime() > resourceMapDeletedPreviousDate.getTime()) { |
|
504 |
resourceMapDeletedIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one |
|
505 |
resourceMapDeletedPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one |
|
506 |
} else { |
|
507 |
int size = resourceMapDeletedIds.size();// |
|
508 |
resourceMapDeletedIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list. |
|
509 |
} |
|
510 |
} else { |
|
511 |
resourceMapDeletedIds.add(identifier.getValue()); |
|
512 |
resourceMapDeletedPreviousDate = sysDate;//init resourcemapPreviousDate |
|
513 |
} |
|
514 |
} else {*/ |
|
515 |
// for all ids |
|
516 |
if(!resourceMapIds.isEmpty()) { |
|
517 |
if(sysDate.getTime() > resourceMapPreviousDate.getTime()) { |
|
518 |
resourceMapIds.add(identifier.getValue());//append to the end of the list if current is later than the previous one |
|
519 |
resourceMapPreviousDate = sysDate;//reset resourceMapPreviousDate to the bigger one |
|
520 |
} else { |
|
521 |
int size = resourceMapIds.size();// |
|
522 |
resourceMapIds.add(size -1, identifier.getValue());//keep the previous one at the end of the list. |
|
523 |
} |
|
524 |
} else { |
|
525 |
resourceMapIds.add(identifier.getValue()); |
|
526 |
resourceMapPreviousDate = sysDate;//init resourcemapPreviousDate |
|
527 |
} |
|
528 |
//} |
|
529 |
} else if (correctTimeRange) { |
|
530 |
/*if(sysmeta.getArchived() || sysmeta.getObsoletedBy() != null) { |
|
531 |
//for the archived ids |
|
532 |
if(!otherDeletedIds.isEmpty()) { |
|
533 |
if(sysDate.getTime() > otherDeletedPreviousDate.getTime()) { |
|
534 |
otherDeletedIds.add(identifier.getValue()); |
|
535 |
otherDeletedPreviousDate = sysDate;//reset otherDeletedPreviousDate to the bigger one |
|
536 |
} else { |
|
537 |
int size = otherDeletedIds.size(); |
|
538 |
otherDeletedIds.add(size-1, identifier.getValue()); |
|
539 |
} |
|
540 |
} else { |
|
541 |
otherDeletedIds.add(identifier.getValue()); |
|
542 |
otherDeletedPreviousDate = sysDate;//init otherDeletedPreviousDate |
|
543 |
} |
|
544 |
} else {*/ |
|
545 |
//for all ids |
|
546 |
if(!otherIds.isEmpty()) { |
|
547 |
if(sysDate.getTime() > otherPreviousDate.getTime()) { |
|
548 |
otherIds.add(identifier.getValue()); |
|
549 |
otherPreviousDate = sysDate;//reset otherPreviousDate to the bigger one |
|
550 |
} else { |
|
551 |
int size = otherIds.size(); |
|
552 |
otherIds.add(size-1, identifier.getValue()); |
|
553 |
} |
|
554 |
} else { |
|
555 |
otherIds.add(identifier.getValue()); |
|
556 |
otherPreviousDate = sysDate;//init otherPreviousDate |
|
557 |
} |
|
558 |
//} |
|
559 |
} |
|
560 |
|
|
561 |
} |
|
562 |
} |
|
563 |
} |
|
564 |
} |
|
565 |
return ids; |
|
566 |
} |
|
567 |
|
|
568 |
/* |
|
569 |
* If the specified ObjectFormatIdentifier is a resrouce map namespace. |
|
570 |
*/ |
|
571 |
public static boolean isResourceMap(ObjectFormatIdentifier formatId) { |
|
572 |
boolean isResourceMap = false; |
|
573 |
if(formatId != null && resourceMapNamespaces != null) { |
|
574 |
for(String namespace : resourceMapNamespaces) { |
|
575 |
if(namespace != null && formatId.getValue() != null && !formatId.getValue().trim().equals("") && formatId.getValue().equals(namespace)) { |
|
576 |
isResourceMap = true; |
|
577 |
break; |
|
578 |
} |
|
579 |
} |
|
580 |
} |
|
581 |
return isResourceMap; |
|
582 |
} |
|
583 |
|
|
584 |
|
|
585 |
|
|
586 |
/* |
|
587 |
* Generate index for the id. |
|
588 |
*/ |
|
589 |
private void generateIndex(String id) { |
|
590 |
//if id is null and sysmeta will be null. If sysmeta is null, it will be caught in solrIndex.update |
|
591 |
SystemMetadata sysmeta = getSystemMetadata(id); |
|
592 |
Identifier pid = new Identifier(); |
|
593 |
pid.setValue(id); |
|
594 |
solrIndex.update(pid, sysmeta); |
|
595 |
|
|
596 |
} |
|
597 |
|
|
598 |
/* |
|
599 |
* Remove the solr index for the list of ids |
|
600 |
*/ |
|
601 |
/*private void removeIndex(List<String> ids) { |
|
602 |
if(ids!= null) { |
|
603 |
for(String id :ids) { |
|
604 |
try { |
|
605 |
removeIndex(id); |
|
606 |
} catch (Exception e) { |
|
607 |
IndexEvent event = new IndexEvent(); |
|
608 |
Identifier pid = new Identifier(); |
|
609 |
pid.setValue(id); |
|
610 |
event.setIdentifier(pid); |
|
611 |
event.setDate(Calendar.getInstance().getTime()); |
|
612 |
event.setAction(Event.DELETE); |
|
613 |
String error = "IndexGenerator.index - Metacat Index couldn't remove the index for the id - "+id+" because "+e.getMessage(); |
|
614 |
event.setDescription(error); |
|
615 |
try { |
|
616 |
EventlogFactory.createIndexEventLog().write(event); |
|
617 |
} catch (Exception ee) { |
|
618 |
log.error("SolrIndex.insertToIndex - IndexEventLog can't log the index deleting event :"+ee.getMessage()); |
|
619 |
} |
|
620 |
log.error(error); |
|
621 |
} |
|
622 |
|
|
623 |
} |
|
624 |
} |
|
625 |
}*/ |
|
626 |
|
|
627 |
/* |
|
628 |
* Remove the index for the id |
|
629 |
*/ |
|
630 |
/*private void removeIndex(String id) throws ServiceFailure, XPathExpressionException, NotImplemented, NotFound, UnsupportedType, IOException, SolrServerException, SAXException, ParserConfigurationException, OREParserException { |
|
631 |
if(id != null) { |
|
632 |
//solrIndex.remove(id); |
|
633 |
} |
|
634 |
}*/ |
|
635 |
|
|
636 |
/* |
|
637 |
* Initialize the system metadata map |
|
638 |
*/ |
|
639 |
private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{ |
|
640 |
int times = 0; |
|
641 |
if(systemMetadataMap == null) { |
|
642 |
systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap(); |
|
643 |
} |
|
644 |
} |
|
645 |
|
|
646 |
/* |
|
647 |
* We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service |
|
648 |
*/ |
|
649 |
private void initObjectPathMap() throws FileNotFoundException, ServiceFailure { |
|
650 |
if(objectPathMap == null) { |
|
651 |
objectPathMap = DistributedMapsFactory.getObjectPathMap(); |
|
652 |
} |
|
653 |
} |
|
654 |
|
|
655 |
|
|
656 |
|
|
657 |
/* |
|
658 |
* Initialize the index queue |
|
659 |
*/ |
|
660 |
private void initIndexQueue() throws FileNotFoundException, ServiceFailure { |
|
661 |
if(indexQueue == null) { |
|
662 |
indexQueue = DistributedMapsFactory.getIndexQueue(); |
|
663 |
} |
|
664 |
} |
|
665 |
/** |
|
666 |
* Get an InputStream as the data object for the specific pid. |
|
667 |
* @param pid |
|
668 |
* @return |
|
669 |
* @throws FileNotFoundException |
|
670 |
*/ |
|
671 |
private InputStream getDataObject(String pid) throws FileNotFoundException { |
|
672 |
Identifier identifier = new Identifier(); |
|
673 |
identifier.setValue(pid); |
|
674 |
String objectPath = objectPathMap.get(identifier); |
|
675 |
InputStream data = null; |
|
676 |
data = new FileInputStream(objectPath); |
|
677 |
return data; |
|
678 |
|
|
679 |
} |
|
680 |
|
|
681 |
/** |
|
682 |
* Get the SystemMetadata for the specified id from the distributed Map. |
|
683 |
* The null maybe is returned if there is no system metadata found. |
|
684 |
* @param id the specified id. |
|
685 |
* @return the SystemMetadata associated with the id. |
|
686 |
*/ |
|
687 |
private SystemMetadata getSystemMetadata(String id) { |
|
688 |
SystemMetadata metadata = null; |
|
689 |
if(systemMetadataMap != null && id != null) { |
|
690 |
Identifier identifier = new Identifier(); |
|
691 |
identifier.setValue(id); |
|
692 |
metadata = systemMetadataMap.get(identifier); |
|
693 |
} |
|
694 |
return metadata; |
|
695 |
} |
|
696 |
|
|
697 |
/** |
|
698 |
* Get the obsoletes chain of the specified id. The returned list doesn't include |
|
699 |
* the specified id itself. The newer version has the lower index number in the list. |
|
700 |
* Empty list will be returned if there is no document to be obsoleted by this id. |
|
701 |
* @param id |
|
702 |
* @return |
|
703 |
*/ |
|
704 |
private List<String> getObsoletes(String id) { |
|
705 |
List<String> obsoletes = new ArrayList<String>(); |
|
706 |
while (id != null) { |
|
707 |
SystemMetadata metadata = getSystemMetadata(id); |
|
708 |
id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code. |
|
709 |
if(metadata != null) { |
|
710 |
Identifier identifier = metadata.getObsoletes(); |
|
711 |
if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) { |
|
712 |
obsoletes.add(identifier.getValue()); |
|
713 |
id = identifier.getValue(); |
|
714 |
} |
|
715 |
} |
|
716 |
} |
|
717 |
return obsoletes; |
|
718 |
} |
|
719 |
|
|
720 |
/** |
|
721 |
* Overwrite and do nothing |
|
722 |
*/ |
|
723 |
public boolean cancel() { |
|
724 |
return true; |
|
725 |
} |
|
726 |
|
|
727 |
} |
metacat-index/src/main/java/edu/ucsb/nceas/metacat/index/ApplicationController.java | ||
---|---|---|
55 | 55 |
private static ApplicationContext context = null; |
56 | 56 |
private String springConfigFileURL = "/index-processor-context.xml"; |
57 | 57 |
private String metacatPropertiesFile = null; |
58 |
private static int waitingTime = IndexGenerator.WAITTIME; |
|
59 |
private static int maxAttempts = IndexGenerator.MAXWAITNUMBER; |
|
58 |
private static int waitingTime = IndexGeneratorTimerTask.WAITTIME;
|
|
59 |
private static int maxAttempts = IndexGeneratorTimerTask.MAXWAITNUMBER;
|
|
60 | 60 |
private static long period = DEFAULTINTERVAL; |
61 | 61 |
Log log = LogFactory.getLog(ApplicationController.class); |
62 | 62 |
|
... | ... | |
206 | 206 |
if(period > 0) { |
207 | 207 |
SolrIndex index = solrIndexes.get(FIRST); |
208 | 208 |
//SystemMetadataEventListener listener = sysmetaListeners.get(FIRST); |
209 |
IndexGenerator generator = new IndexGenerator(index);
|
|
209 |
IndexGeneratorTimerTask generator = new IndexGeneratorTimerTask(index);
|
|
210 | 210 |
//Thread indexThread = new Thread(generator); |
211 | 211 |
//indexThread.start(); |
212 | 212 |
Timer indexTimer = new Timer(); |
Also available in: Unified diff
Rename the IndexGenerator to IndexGeneratorTimerTask.