Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: Implements a service for managing a Hazelcast cluster member
4
 *  Copyright: 2011 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Christopher Jones
7
 * 
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2012-05-30 17:53:27 -0700 (Wed, 30 May 2012) $'
10
 * '$Revision: 7206 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.dataone.hazelcast;
28

    
29
import java.io.FileNotFoundException;
30
import java.sql.SQLException;
31
import java.util.HashSet;
32
import java.util.Iterator;
33
import java.util.List;
34
import java.util.Set;
35
import java.util.concurrent.ExecutorService;
36
import java.util.concurrent.Executors;
37
import java.util.concurrent.locks.Lock;
38

    
39
import org.apache.log4j.Logger;
40
import org.dataone.service.exceptions.InvalidSystemMetadata;
41
import org.dataone.service.types.v1.Identifier;
42
import org.dataone.service.types.v1.Node;
43
import org.dataone.service.types.v1.NodeReference;
44
import org.dataone.service.types.v1.SystemMetadata;
45

    
46
import com.hazelcast.config.Config;
47
import com.hazelcast.config.FileSystemXmlConfig;
48
import com.hazelcast.core.EntryEvent;
49
import com.hazelcast.core.EntryListener;
50
import com.hazelcast.core.Hazelcast;
51
import com.hazelcast.core.HazelcastInstance;
52
import com.hazelcast.core.IMap;
53
import com.hazelcast.core.ISet;
54
import com.hazelcast.core.LifecycleEvent;
55
import com.hazelcast.core.LifecycleListener;
56
import com.hazelcast.core.Member;
57
import com.hazelcast.core.MembershipEvent;
58
import com.hazelcast.core.MembershipListener;
59
import com.hazelcast.partition.Partition;
60
import com.hazelcast.partition.PartitionService;
61

    
62
import edu.ucsb.nceas.metacat.IdentifierManager;
63
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
64
import edu.ucsb.nceas.metacat.properties.PropertyService;
65
import edu.ucsb.nceas.metacat.shared.BaseService;
66
import edu.ucsb.nceas.metacat.shared.ServiceException;
67
import edu.ucsb.nceas.metacat.util.DocumentUtil;
68
import edu.ucsb.nceas.utilities.FileUtil;
69
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
70
/**
71
 * The Hazelcast service enables Metacat as a Hazelcast cluster member
72
 */
73
public class HazelcastService extends BaseService
74
  implements EntryListener<Identifier, SystemMetadata>, MembershipListener, LifecycleListener {
75
  
76
  private static final String SINCE_PROPERTY = "dateSysMetadataModified";
77

    
78
/* The instance of the logging class */
79
  private static Logger logMetacat = Logger.getLogger(HazelcastService.class);
80
  
81
  /* The singleton instance of the hazelcast service */
82
  private static HazelcastService hzService = null;
83
  
84
  /* The Hazelcast configuration */
85
  private Config hzConfig;
86
  
87
  /* The instance of the Hazelcast client */
88
//  private HazelcastClient hzClient;
89

    
90
  /* The name of the DataONE Hazelcast cluster group */
91
  private String groupName;
92

    
93
  /* The name of the DataONE Hazelcast cluster password */
94
  private String groupPassword;
95
  
96
  /* The name of the DataONE Hazelcast cluster IP addresses */
97
  private String addressList;
98
  
99
  /* The name of the node map */
100
  private String nodeMap;
101

    
102
  /* The name of the system metadata map */
103
  private String systemMetadataMap;
104
  
105
  /* The Hazelcast distributed task id generator namespace */
106
  private String taskIds;
107
  
108
  /* The Hazelcast distributed node map */
109
  private IMap<NodeReference, Node> nodes;
110

    
111
  /* The Hazelcast distributed system metadata map */
112
  private IMap<Identifier, SystemMetadata> systemMetadata;
113
  
114
  /* The name of the identifiers set */
115
  private String identifiersSet;
116
  
117
  /* The Hazelcast distributed identifiers set */
118
  private ISet<Identifier> identifiers;
119

    
120
  private HazelcastInstance hzInstance;
121
      
122
  /*
123
   * Constructor: Creates an instance of the hazelcast service. Since
124
   * this uses a singleton pattern, use getInstance() to gain the instance.
125
   */
126
  private HazelcastService() {
127
    
128
    super();
129
    _serviceName="HazelcastService";
130
    
131
    try {
132
      init();
133
      
134
    } catch (ServiceException se) {
135
      logMetacat.error("There was a problem creating the HazelcastService. " +
136
                       "The error message was: " + se.getMessage());
137
      
138
    }
139
    
140
  }
141
  
142
  /**
143
   *  Get the instance of the HazelcastService that has been instantiated,
144
   *  or instantiate one if it has not been already.
145
   *
146
   * @return hazelcastService - The instance of the hazelcast service
147
   */
148
  public static HazelcastService getInstance(){
149
    
150
    if ( hzService == null ) {
151
      
152
      hzService = new HazelcastService();
153
      
154
    }
155
    return hzService;
156
  }
157
  
158
  /**
159
   * Initializes the Hazelcast service
160
   */
161
  public void init() throws ServiceException {
162
    
163
    logMetacat.debug("HazelcastService.init() called.");
164
    
165
	String configFileName = null;
166
	Config config = null;
167
	try {
168
		configFileName = PropertyService.getProperty("dataone.hazelcast.configFilePath");
169
		config = new FileSystemXmlConfig(configFileName);
170
	} catch (Exception e) {
171
		configFileName = PropertyService.CONFIG_FILE_DIR + FileUtil.getFS() + "hazelcast.xml";
172
		logMetacat.warn("Custom Hazelcast configuration not defined, using default: " + configFileName);
173
		// make sure we have the config
174
		try {
175
			config = new FileSystemXmlConfig(configFileName);
176
		} catch (FileNotFoundException e1) {
177
			String msg = e.getMessage();
178
			logMetacat.error(msg);
179
			throw new ServiceException(msg);
180
		}
181
	}
182

    
183
	Hazelcast.init(config);
184
  this.hzInstance = Hazelcast.getDefaultInstance();
185
  
186
  	logMetacat.debug("Initialized hzInstance");
187

    
188
    // Get configuration properties on instantiation
189
    try {
190
      groupName = 
191
        PropertyService.getProperty("dataone.hazelcast.processCluster.groupName");
192
      groupPassword = 
193
        PropertyService.getProperty("dataone.hazelcast.processCluster.password");
194
      addressList = 
195
        PropertyService.getProperty("dataone.hazelcast.processCluster.instances");
196
      systemMetadataMap = 
197
        PropertyService.getProperty("dataone.hazelcast.storageCluster.systemMetadataMap");
198
      identifiersSet = PropertyService.getProperty("dataone.hazelcast.storageCluster.identifiersSet");
199
//    nodeMap = 
200
//    PropertyService.getProperty("dataone.hazelcast.processCluster.nodesMap");
201
      // Become a DataONE-process cluster client
202
//      String[] addresses = addressList.split(",");
203
//      hzClient = 
204
//        HazelcastClient.newHazelcastClient(this.groupName, this.groupPassword, addresses);
205
//      nodes = hzClient.getMap(nodeMap);
206
      
207
      // Get a reference to the shared system metadata map as a cluster member
208
      // NOTE: this loads the map from the backing store and can take a long time for large collections
209
      systemMetadata = Hazelcast.getMap(systemMetadataMap);
210
      
211
      logMetacat.debug("Initialized systemMetadata");
212

    
213
      // Get a reference to the shared identifiers set as a cluster member
214
      identifiers = Hazelcast.getSet(identifiersSet);
215
      identifiers.addAll(loadAllKeys());
216
      
217
      logMetacat.debug("Initialized identifiers");
218
      
219
      // Listen for changes to the system metadata map
220
      systemMetadata.addEntryListener(this, true);
221
      
222
      // Listen for members added/removed
223
      hzInstance.getCluster().addMembershipListener(this);
224
      
225
      // Listen for lifecycle state changes
226
      hzInstance.getLifecycleService().addLifecycleListener(this);
227
      
228
    } catch (PropertyNotFoundException e) {
229

    
230
      String msg = "Couldn't find Hazelcast properties for the DataONE clusters. " +
231
        "The error message was: " + e.getMessage();
232
      logMetacat.error(msg);
233
      
234
    }
235
    
236
    // make sure we have all metadata locally
237
    try {
238
        logMetacat.debug("Temporarily shutting down which resynching system metadata");
239

    
240
    	// pause to make us not own any keys
241
    	hzInstance.getLifecycleService().shutdown();
242
    	// synch on restart
243
        resynchInThread();
244
	} catch (Exception e) {
245
		String msg = "Problem resynchronizing system metadata. " + e.getMessage();
246
		logMetacat.error(msg, e);
247
	}
248
        
249
  }
250
  
251
  /**
252
   * Get the system metadata map
253
   * 
254
   * @return systemMetadata - the hazelcast map of system metadata
255
   * @param identifier - the identifier of the object as a string
256
   */
257
  public IMap<Identifier,SystemMetadata> getSystemMetadataMap() {
258
	  return systemMetadata;
259
  }
260
  
261
  /**
262
   * Get the identifiers set
263
   * @return identifiers - the set of unique DataONE identifiers in the cluster
264
   */
265
  public ISet<Identifier> getIdentifiers() {
266
      return identifiers;
267
      
268
  }
269

    
270
  /**
271
   * When Metacat changes the underlying store, we need to refresh the
272
   * in-memory representation of it.
273
   * @param guid
274
   */
275
  public void refreshSystemMetadataEntry(String guid) {
276
	Identifier identifier = new Identifier();
277
	identifier.setValue(guid);
278
	// force hazelcast to update system metadata in memory from the store
279
	HazelcastService.getInstance().getSystemMetadataMap().evict(identifier);
280
	
281
  }
282

    
283
  public Lock getLock(String identifier) {
284
    
285
    Lock lock = null;
286
    
287
    try {
288
        lock = getInstance().getHazelcastInstance().getLock(identifier);
289
        
290
    } catch (RuntimeException e) {
291
        logMetacat.info("Couldn't get a lock for identifier " + 
292
            identifier + " !!");
293
    }
294
    return lock;
295
      
296
  }
297
  
298
  /**
299
   * Get the DataONE hazelcast node map
300
   * @return nodes - the hazelcast map of nodes
301
   */
302
//  public IMap<NodeReference, Node> getNodesMap() {
303
//	  return nodes;
304
//  }
305
  
306
  /**
307
   * Indicate whether or not this service is refreshable.
308
   *
309
   * @return refreshable - the boolean refreshable status
310
   */
311
  public boolean refreshable() {
312
    // TODO: Determine the consequences of restarting the Hazelcast instance
313
    // Set this to true if it's okay to drop from the cluster, lose the maps,
314
    // and start back up again
315
    return false;
316
    
317
  }
318
  
319
  /**
320
   * Stop the HazelcastService. When stopped, the service will no longer
321
   * respond to requests.
322
   */
323
  public void stop() throws ServiceException {
324
    
325
    Hazelcast.getLifecycleService().shutdown();
326
    
327
  }
328

    
329
  public HazelcastInstance getHazelcastInstance() {
330
      return this.hzInstance;
331
      
332
  }
333
  
334
  /**
335
   * Refresh the Hazelcast service by restarting it
336
   */
337
  @Override
338
  protected void doRefresh() throws ServiceException {
339

    
340
    // TODO: verify that the correct config file is still used
341
    Hazelcast.getLifecycleService().restart();
342
    
343
  }
344
  
345
  /**
346
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
347
	 * added events in the hzSystemMetadata map. Evaluate the entry and create
348
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
349
	 * 
350
	 * @param event - The EntryEvent that occurred
351
	 */
352
	@Override
353
	public void entryAdded(EntryEvent<Identifier, SystemMetadata> event) {
354
	  
355
	  logMetacat.info("SystemMetadata entry added event on identifier " + 
356
	      event.getKey().getValue());
357
		// handle as update - that method will create if necessary
358
		entryUpdated(event);
359

    
360
	}
361

    
362
	/**
363
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
364
	 * evicted events in the hzSystemMetadata map.  Evaluate the entry and create
365
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
366
	 * 
367
	 * @param event - The EntryEvent that occurred
368
	 */
369
	@Override
370
	public void entryEvicted(EntryEvent<Identifier, SystemMetadata> event) {
371

    
372
      logMetacat.info("SystemMetadata entry evicted event on identifier " + 
373
          event.getKey().getValue());
374
      
375
	    // ensure identifiers are listed in the hzIdentifiers set
376
      if ( !identifiers.contains(event.getKey()) ) {
377
          identifiers.add(event.getKey());
378
      }
379
	  
380
	}
381
	
382
	/**
383
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
384
	 * removed events in the hzSystemMetadata map.  Evaluate the entry and create
385
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
386
	 * 
387
	 * @param event - The EntryEvent that occurred
388
	 */
389
	@Override
390
	public void entryRemoved(EntryEvent<Identifier, SystemMetadata> event) {
391
		
392
    logMetacat.info("SystemMetadata entry removed event on identifier " + 
393
        event.getKey().getValue());
394

    
395
	  // we typically don't remove objects in Metacat, but can remove System Metadata
396
		IdentifierManager.getInstance().deleteSystemMetadata(event.getValue().getIdentifier().getValue());
397

    
398
    // keep the hzIdentifiers set in sync with the systemmetadata table
399
    if ( identifiers.contains(event.getKey()) ) {
400
        identifiers.remove(event.getKey());
401
        
402
    }
403

    
404
	}
405
	
406
	/**
407
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
408
	 * updated events in the hzSystemMetadata map.  Evaluate the entry and create
409
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
410
	 * 
411
	 * @param event - The EntryEvent that occurred
412
	 */
413
	@Override
414
	public void entryUpdated(EntryEvent<Identifier, SystemMetadata> event) {
415

    
416
		logMetacat.debug("Entry added/updated to System Metadata map: " + event.getKey().getValue());
417
		PartitionService partitionService = Hazelcast.getPartitionService();
418
		Partition partition = partitionService.getPartition(event.getKey());
419
		Member ownerMember = partition.getOwner();
420
		SystemMetadata sysmeta = event.getValue();
421
		if (!ownerMember.localMember()) {
422
			if (sysmeta == null) {
423
				logMetacat.warn("No SystemMetadata provided in the event, getting from shared map: " + event.getKey().getValue());
424
				sysmeta = getSystemMetadataMap().get(event.getKey());
425
				if (sysmeta == null) {
426
					// this is a problem
427
					logMetacat.error("Could not find SystemMetadata in shared map for: " + event.getKey().getValue());
428
					// TODO: should probably return at this point since the save will fail
429
				}
430
			}
431
			// need to pull the entry into the local store
432
			saveLocally(event.getValue());
433
		}
434

    
435
		// ensure identifiers are listed in the hzIdentifiers set
436
		if (!identifiers.contains(event.getKey())) {
437
			identifiers.add(event.getKey());
438
		}
439

    
440
	}
441
	
442
	/**
443
	 * Save SystemMetadata to local store if needed
444
	 * @param sm
445
	 */
446
	private void saveLocally(SystemMetadata sm) {
447
		logMetacat.debug("Saving entry locally: " + sm.getIdentifier().getValue());
448
		try {
449

    
450
			IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
451

    
452
		} catch (McdbDocNotFoundException e) {
453
			logMetacat.error("Could not save System Metadata to local store.", e);
454
			
455
		} catch (SQLException e) {
456
	      logMetacat.error("Could not save System Metadata to local store.", e);
457
	      
458
	    } catch (InvalidSystemMetadata e) {
459
	        logMetacat.error("Could not save System Metadata to local store.", e);
460
	        
461
	    }
462
	}
463
	
464
	/**
465
	 * Checks the local backing store for missing SystemMetadata,
466
	 * retrieves those entries from the shared map if they exist,
467
	 * and saves them locally.
468
	 */
469
	private void synchronizeLocalStore() {
470
		List<String> localIds = IdentifierManager.getInstance().getLocalIdsWithNoSystemMetadata(true, -1);
471
		if (localIds != null) {
472
			logMetacat.debug("Member missing SystemMetadata entries, count = " + localIds.size());
473
			for (String localId: localIds) {
474
				logMetacat.debug("Processing system metadata for localId: " + localId);
475
				try {
476
					String docid = DocumentUtil.getSmartDocId(localId);
477
					int rev = DocumentUtil.getRevisionFromAccessionNumber(localId);
478
					String guid = IdentifierManager.getInstance().getGUID(docid, rev);
479
					logMetacat.debug("Found mapped guid: " + guid);
480
					Identifier pid = new Identifier();
481
					pid.setValue(guid);
482
					SystemMetadata sm = systemMetadata.get(pid);
483
					logMetacat.debug("Found shared system metadata for guid: " + guid);
484
					saveLocally(sm);
485
					logMetacat.debug("Saved shared system metadata locally for guid: " + guid);
486
				} catch (Exception e) {
487
					logMetacat.error("Could not save shared SystemMetadata entry locally, localId: " + localId, e);
488
				}
489
			}
490
		}
491
	}
492
	
493
	/**
494
	 * Make sure we have a copy of every entry in the shared map.
495
	 * We use lazy loading and therefore the CNs may not all be in sync when one
496
	 * comes back online after an extended period of being offline
497
	 * @throws Exception
498
	 */
499
	private void resynch() throws Exception {
500
		// loop through all the [shared] entries and save any missing ones locally
501
		List<String> localPids = IdentifierManager.getInstance().getAllSystemMetadataGUIDs();
502
		logMetacat.warn("local pid count: " + localPids.size() + ", shared pid count: " + identifiers.size());
503
		Iterator<Identifier> sharedPids = identifiers.iterator();
504
		while (sharedPids.hasNext()) {
505
			Identifier pid = sharedPids.next();
506
			logMetacat.debug("checking for shared pid locally: " + pid.getValue());
507
			if (!localPids.contains(pid.getValue())) {
508
				logMetacat.warn("shared pid does not exist locally: " + pid.getValue());
509
				SystemMetadata sm = systemMetadata.get(pid);
510
				if (sm != null) {
511
					// this may throw an exception if loading it to the shared map triggered entry updated events
512
					saveLocally(sm);
513
				} else {
514
					logMetacat.error("SystemMetadata for pid is null: " + pid.getValue());
515
				}
516
			} else {
517
				logMetacat.debug("shared pid already exisits locally: " + pid.getValue());
518
			}
519
		}
520
	}
521
	
522
	private void resynchInThread() {
523
		logMetacat.debug("launching system metadata resynch in a thread");
524
		ExecutorService executor = Executors.newSingleThreadExecutor();
525
		executor.execute(new Runnable() {
526
			@Override
527
			public void run() {
528
				try {
529
					resynch();
530
					// now we can take ownership of keys
531
					hzInstance.getLifecycleService().restart();
532
				} catch (Exception e) {
533
					logMetacat.error("Error in resynchInThread: " + e.getMessage(), e);
534
				}
535
			}
536
		});
537
		executor.shutdown();
538
	}
539

    
540
	/**
541
	 * When there is missing SystemMetadata on the local member,
542
	 * we retrieve it from the shared map and add it to the local
543
	 * backing store for safe keeping.
544
	 */
545
	@Override
546
	public void memberAdded(MembershipEvent event) {
547
		Member member = event.getMember();
548
		logMetacat.debug("Member added to cluster: " + member.getInetSocketAddress());
549
		boolean isLocal = member.localMember();
550
		if (isLocal) {
551
			logMetacat.debug("Member islocal: " + member.getInetSocketAddress());
552
			synchronizeLocalStore();
553
		}
554
	}
555

    
556
	@Override
557
	public void memberRemoved(MembershipEvent event) {
558
		// TODO Auto-generated method stub
559
		
560
	}
561

    
562
	/**
563
	 * In cases where this cluster is paused, we want to 
564
	 * check that the local store accurately reflects the shared 
565
	 * SystemMetadata map
566
	 * @param event
567
	 */
568
	@Override
569
	public void stateChanged(LifecycleEvent event) {
570
		logMetacat.debug("HZ LifecycleEvent.state: " + event.getState());
571
		if (event.getState().equals(LifecycleEvent.LifecycleState.RESUMED)) {
572
			logMetacat.debug("HZ LifecycleEvent.state is RESUMED, calling synchronizeLocalStore()");
573
			synchronizeLocalStore();
574
		}
575
	}
576

    
577
	/**
578
	 * Load all System Metadata keys from the backing store
579
	 * @return set of pids
580
	 */
581
	private Set<Identifier> loadAllKeys() {
582

    
583
		Set<Identifier> pids = new HashSet<Identifier>();
584
		
585
		try {
586
			
587
			// ALTERNATIVE 1: this has more overhead than just looking at the GUIDs
588
//			ObjectList ol = IdentifierManager.getInstance().querySystemMetadata(
589
//					null, //startTime, 
590
//					null, //endTime, 
591
//					null, //objectFormatId, 
592
//					false, //replicaStatus, 
593
//					0, //start, 
594
//					-1 //count
595
//					);
596
//			for (ObjectInfo o: ol.getObjectInfoList()) {
597
//				Identifier pid = o.getIdentifier();
598
//				if ( !pids.contains(pid) ) {
599
//					pids.add(pid);
600
//				}				
601
//			}
602
			
603
			// ALTERNATIVE method: look up all the Identifiers from the table
604
			List<String> guids = IdentifierManager.getInstance().getAllSystemMetadataGUIDs();
605
			for (String guid: guids){
606
				Identifier pid = new Identifier();
607
				pid.setValue(guid);
608
				pids.add(pid);
609
			}
610
			
611
		} catch (Exception e) {
612
			throw new RuntimeException(e.getMessage(), e);
613
			
614
		}
615
		
616
		return pids;
617
	}
618

    
619
}
(1-1/3)