26 |
26 |
*/
|
27 |
27 |
package edu.ucsb.nceas.metacat.index;
|
28 |
28 |
|
|
29 |
import java.io.FileInputStream;
|
|
30 |
import java.io.FileNotFoundException;
|
29 |
31 |
import java.io.InputStream;
|
30 |
32 |
import java.util.ArrayList;
|
31 |
33 |
import java.util.Date;
|
... | ... | |
47 |
49 |
import org.dataone.service.types.v1.ObjectList;
|
48 |
50 |
import org.dataone.service.types.v1.SystemMetadata;
|
49 |
51 |
|
|
52 |
import com.hazelcast.core.IMap;
|
50 |
53 |
|
|
54 |
|
51 |
55 |
/**
|
52 |
56 |
* A class represents the object to generate massive solr indexes.
|
53 |
57 |
* This can happen during an update of Metacat (generating index for all existing documents)
|
... | ... | |
64 |
68 |
private static final String HTTP = "http://";
|
65 |
69 |
private static final String MNAPPENDIX = "/d1/mn";
|
66 |
70 |
private SolrIndex solrIndex = null;
|
67 |
|
private SystemMetadataEventListener systemMetadataListener = null;
|
|
71 |
//private SystemMetadataEventListener systemMetadataListener = null;
|
|
72 |
private IMap<Identifier, SystemMetadata> systemMetadataMap;
|
|
73 |
|
|
74 |
private IMap<Identifier, String> objectPathMap;
|
68 |
75 |
private Log log = LogFactory.getLog(IndexGenerator.class);
|
69 |
76 |
private MNode mNode = null;
|
70 |
77 |
|
... | ... | |
73 |
80 |
* @param solrIndex
|
74 |
81 |
* @param systemMetadataListener
|
75 |
82 |
*/
|
76 |
|
public IndexGenerator(SolrIndex solrIndex, SystemMetadataEventListener systemMetadataListener) {
|
|
83 |
public IndexGenerator(SolrIndex solrIndex) {
|
77 |
84 |
this.solrIndex = solrIndex;
|
78 |
|
this.systemMetadataListener = systemMetadataListener;
|
|
85 |
//this.systemMetadataListener = systemMetadataListener;
|
79 |
86 |
this.mNode = new MNode(buildMNBaseURL());
|
80 |
87 |
}
|
81 |
88 |
|
... | ... | |
89 |
96 |
* @throws InvalidRequest
|
90 |
97 |
*/
|
91 |
98 |
public void indexAll() throws InvalidRequest, InvalidToken, NotAuthorized,
|
92 |
|
NotImplemented, ServiceFailure, SolrServerException {
|
|
99 |
NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
|
93 |
100 |
boolean force = false;
|
94 |
101 |
indexAll(force);
|
95 |
102 |
}
|
... | ... | |
106 |
113 |
* @throws InvalidRequest
|
107 |
114 |
*/
|
108 |
115 |
public void indexAll(boolean force) throws InvalidRequest, InvalidToken,
|
109 |
|
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
|
|
116 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
|
110 |
117 |
Date since = null;
|
111 |
118 |
Date until = null;
|
112 |
119 |
index(since, until, force);
|
... | ... | |
124 |
131 |
* @throws InvalidRequest
|
125 |
132 |
*/
|
126 |
133 |
public void index(Date since, boolean force) throws InvalidRequest, InvalidToken,
|
127 |
|
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException {
|
|
134 |
NotAuthorized, NotImplemented, ServiceFailure, SolrServerException, FileNotFoundException {
|
128 |
135 |
Date until = null;
|
129 |
136 |
index(since, until, force);
|
130 |
137 |
}
|
... | ... | |
140 |
147 |
* @throws NotAuthorized
|
141 |
148 |
* @throws InvalidToken
|
142 |
149 |
* @throws InvalidRequest
|
|
150 |
* @throws FileNotFoundException
|
143 |
151 |
*/
|
144 |
152 |
public void index(Date since, Date until, boolean force) throws SolrServerException, InvalidRequest,
|
145 |
|
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure {
|
|
153 |
InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, FileNotFoundException {
|
146 |
154 |
List<String> solrIds = null;
|
147 |
155 |
List<String> metacatIds = null;
|
148 |
156 |
if(!force) {
|
... | ... | |
151 |
159 |
log.info("the solr ids -----------------------------"+solrIds);
|
152 |
160 |
metacatIds = getMetadataIds(since, until);
|
153 |
161 |
log.info("the metacat ids -----------------------------"+metacatIds);
|
|
162 |
initSystemMetadataMap();
|
|
163 |
initObjectPathMap();
|
154 |
164 |
if(metacatIds != null) {
|
155 |
165 |
for(String metacatId : metacatIds) {
|
156 |
166 |
if(metacatId != null) {
|
... | ... | |
198 |
208 |
// TODO Auto-generated catch block
|
199 |
209 |
//e.printStackTrace();
|
200 |
210 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
|
|
211 |
} catch (FileNotFoundException e) {
|
|
212 |
log.error("IndexGenerator.run - Metadata-Index couldn't generate indexes for those documents which haven't been indexed : "+e.getMessage());
|
201 |
213 |
}
|
202 |
214 |
}
|
203 |
215 |
|
... | ... | |
233 |
245 |
}
|
234 |
246 |
|
235 |
247 |
} else {
|
236 |
|
throw new ServiceFailure("503", "IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
|
|
248 |
throw new ServiceFailure("0000", "IndexGenerator.getMetadataIds - the mnode "+ mNode.getNodeBaseServiceUrl()+
|
237 |
249 |
" is not ready even though Metacat-index wailted for 30 minutes. We can't get the objects list from it and the building index can't happen this time");
|
238 |
250 |
}
|
239 |
251 |
|
... | ... | |
265 |
277 |
* Build up the mn base url
|
266 |
278 |
*/
|
267 |
279 |
private String buildMNBaseURL() {
|
268 |
|
String url = HTTP+Settings.getConfiguration().getString("server.name")+":"+
|
269 |
|
Settings.getConfiguration().getString("server.httpPort")+"/"+
|
270 |
|
Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
|
271 |
|
log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+url);
|
272 |
|
return url;
|
|
280 |
String httpPort = Settings.getConfiguration().getString("server.httpPort");
|
|
281 |
String serverURL = "http://";
|
|
282 |
if(httpPort.equals("443") || httpPort.equals("8443"))
|
|
283 |
{
|
|
284 |
serverURL = "https://";
|
|
285 |
}
|
|
286 |
serverURL = serverURL+Settings.getConfiguration().getString("server.name");
|
|
287 |
if (!httpPort.equals("80")) {
|
|
288 |
serverURL += ":" + httpPort;
|
|
289 |
}
|
|
290 |
serverURL = serverURL +"/"+ Settings.getConfiguration().getString("application.context")+MNAPPENDIX;
|
|
291 |
log.info("IndexGenerator.buildMNBaseURL - the base url of MNode is "+serverURL);
|
|
292 |
return serverURL;
|
273 |
293 |
}
|
274 |
294 |
|
275 |
295 |
|
... | ... | |
278 |
298 |
*/
|
279 |
299 |
private void generateIndex(String id) throws Exception {
|
280 |
300 |
if(id != null) {
|
281 |
|
SystemMetadata sysmeta = systemMetadataListener.getSystemMetadata(id);
|
282 |
|
if(sysmeta != null) {
|
283 |
|
InputStream data = systemMetadataListener.getDataObject(id);
|
|
301 |
SystemMetadata sysmeta = getSystemMetadata(id);
|
|
302 |
//only update none-archived id.
|
|
303 |
if(sysmeta != null && !sysmeta.getArchived()) {
|
|
304 |
InputStream data = getDataObject(id);
|
284 |
305 |
Identifier obsolete = sysmeta.getObsoletes();
|
285 |
306 |
List<String> obsoleteChain = null;
|
286 |
307 |
if(obsolete != null) {
|
287 |
|
obsoleteChain = systemMetadataListener.getObsoletes(id);
|
|
308 |
obsoleteChain = getObsoletes(id);
|
288 |
309 |
}
|
289 |
310 |
solrIndex.update(id, obsoleteChain, sysmeta, data);
|
290 |
311 |
} else {
|
... | ... | |
293 |
314 |
|
294 |
315 |
}
|
295 |
316 |
}
|
|
317 |
|
|
318 |
/*
|
|
319 |
* Initialize the system metadata map
|
|
320 |
*/
|
|
321 |
private void initSystemMetadataMap() throws FileNotFoundException, ServiceFailure{
|
|
322 |
int times = 0;
|
|
323 |
if(systemMetadataMap == null) {
|
|
324 |
while(true) {
|
|
325 |
try {
|
|
326 |
systemMetadataMap = DistributedMapsFactory.getSystemMetadataMap();
|
|
327 |
break;
|
|
328 |
} catch (FileNotFoundException e) {
|
|
329 |
throw e;
|
|
330 |
} catch (ServiceFailure e) {
|
|
331 |
if(times <= MAXWAITNUMBER) {
|
|
332 |
log.warn("IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready : "
|
|
333 |
+e.getMessage()+"\nWe will try to access it 10 seconds later ");
|
|
334 |
try {
|
|
335 |
Thread.sleep(WAITTIME);
|
|
336 |
} catch (Exception ee) {
|
|
337 |
log.warn("IndexGenerator.initSystemMetadataMap - the thread can't sleep for 10 seconds to wait the hazelcast service");
|
|
338 |
}
|
|
339 |
|
|
340 |
} else {
|
|
341 |
throw new ServiceFailure("0000", "IndexGenerator.initSystemMetadataMap - the hazelcast service is not ready even though Metacat-index wailted for 30 minutes. We can't get the system metadata from it and the building index can't happen this time");
|
|
342 |
}
|
|
343 |
}
|
|
344 |
times++;
|
|
345 |
}
|
|
346 |
}
|
|
347 |
}
|
|
348 |
|
|
349 |
/*
|
|
350 |
* We should call this method after calling initSystemMetadataMap since this method doesn't have the mechanism to wait the readiness of the hazelcast service
|
|
351 |
*/
|
|
352 |
private void initObjectPathMap() throws FileNotFoundException, ServiceFailure {
|
|
353 |
if(objectPathMap == null) {
|
|
354 |
objectPathMap = DistributedMapsFactory.getObjectPathMap();
|
|
355 |
}
|
|
356 |
}
|
|
357 |
/**
|
|
358 |
* Get an InputStream as the data object for the specific pid.
|
|
359 |
* @param pid
|
|
360 |
* @return
|
|
361 |
* @throws FileNotFoundException
|
|
362 |
*/
|
|
363 |
private InputStream getDataObject(String pid) throws FileNotFoundException {
|
|
364 |
Identifier identifier = new Identifier();
|
|
365 |
identifier.setValue(pid);
|
|
366 |
String objectPath = objectPathMap.get(identifier);
|
|
367 |
InputStream data = null;
|
|
368 |
data = new FileInputStream(objectPath);
|
|
369 |
return data;
|
|
370 |
|
|
371 |
}
|
|
372 |
|
|
373 |
/**
|
|
374 |
* Get the SystemMetadata for the specified id from the distributed Map.
|
|
375 |
* The null maybe is returned if there is no system metadata found.
|
|
376 |
* @param id the specified id.
|
|
377 |
* @return the SystemMetadata associated with the id.
|
|
378 |
*/
|
|
379 |
private SystemMetadata getSystemMetadata(String id) {
|
|
380 |
SystemMetadata metadata = null;
|
|
381 |
if(systemMetadataMap != null && id != null) {
|
|
382 |
Identifier identifier = new Identifier();
|
|
383 |
identifier.setValue(id);
|
|
384 |
metadata = systemMetadataMap.get(identifier);
|
|
385 |
}
|
|
386 |
return metadata;
|
|
387 |
}
|
|
388 |
|
|
389 |
/**
|
|
390 |
* Get the obsoletes chain of the specified id. The returned list doesn't include
|
|
391 |
* the specified id itself. The newer version has the lower index number in the list.
|
|
392 |
* Empty list will be returned if there is no document to be obsoleted by this id.
|
|
393 |
* @param id
|
|
394 |
* @return
|
|
395 |
*/
|
|
396 |
private List<String> getObsoletes(String id) {
|
|
397 |
List<String> obsoletes = new ArrayList<String>();
|
|
398 |
while (id != null) {
|
|
399 |
SystemMetadata metadata = getSystemMetadata(id);
|
|
400 |
id = null;//set it to be null in order to stop the while loop if the id can't be assinged to a new value in the following code.
|
|
401 |
if(metadata != null) {
|
|
402 |
Identifier identifier = metadata.getObsoletes();
|
|
403 |
if(identifier != null && identifier.getValue() != null && !identifier.getValue().trim().equals("")) {
|
|
404 |
obsoletes.add(identifier.getValue());
|
|
405 |
id = identifier.getValue();
|
|
406 |
}
|
|
407 |
}
|
|
408 |
}
|
|
409 |
return obsoletes;
|
|
410 |
}
|
|
411 |
|
296 |
412 |
}
|
Add code to wait the readiness of the hazelcast service.