Project

General

Profile

1
/**
2
 *  '$RCSfile$'
3
 *    Purpose: Implements a service for managing a Hazelcast cluster member
4
 *  Copyright: 2011 Regents of the University of California and the
5
 *             National Center for Ecological Analysis and Synthesis
6
 *    Authors: Christopher Jones
7
 * 
8
 *   '$Author: leinfelder $'
9
 *     '$Date: 2012-05-30 20:07:44 -0700 (Wed, 30 May 2012) $'
10
 * '$Revision: 7208 $'
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25
 */
26

    
27
package edu.ucsb.nceas.metacat.dataone.hazelcast;
28

    
29
import java.io.FileNotFoundException;
30
import java.sql.SQLException;
31
import java.util.HashSet;
32
import java.util.Iterator;
33
import java.util.List;
34
import java.util.Set;
35
import java.util.concurrent.ExecutorService;
36
import java.util.concurrent.Executors;
37
import java.util.concurrent.locks.Lock;
38

    
39
import org.apache.log4j.Logger;
40
import org.dataone.service.exceptions.InvalidSystemMetadata;
41
import org.dataone.service.types.v1.Identifier;
42
import org.dataone.service.types.v1.Node;
43
import org.dataone.service.types.v1.NodeReference;
44
import org.dataone.service.types.v1.SystemMetadata;
45

    
46
import com.hazelcast.config.Config;
47
import com.hazelcast.config.FileSystemXmlConfig;
48
import com.hazelcast.core.EntryEvent;
49
import com.hazelcast.core.EntryListener;
50
import com.hazelcast.core.Hazelcast;
51
import com.hazelcast.core.HazelcastInstance;
52
import com.hazelcast.core.IMap;
53
import com.hazelcast.core.ISet;
54
import com.hazelcast.core.LifecycleEvent;
55
import com.hazelcast.core.LifecycleListener;
56
import com.hazelcast.core.Member;
57
import com.hazelcast.core.MembershipEvent;
58
import com.hazelcast.core.MembershipListener;
59
import com.hazelcast.partition.Partition;
60
import com.hazelcast.partition.PartitionService;
61

    
62
import edu.ucsb.nceas.metacat.IdentifierManager;
63
import edu.ucsb.nceas.metacat.McdbDocNotFoundException;
64
import edu.ucsb.nceas.metacat.properties.PropertyService;
65
import edu.ucsb.nceas.metacat.shared.BaseService;
66
import edu.ucsb.nceas.metacat.shared.ServiceException;
67
import edu.ucsb.nceas.metacat.util.DocumentUtil;
68
import edu.ucsb.nceas.utilities.FileUtil;
69
import edu.ucsb.nceas.utilities.PropertyNotFoundException;
70
/**
71
 * The Hazelcast service enables Metacat as a Hazelcast cluster member
72
 */
73
public class HazelcastService extends BaseService
74
  implements EntryListener<Identifier, SystemMetadata>, MembershipListener, LifecycleListener {
75
  
76
  private static final String SINCE_PROPERTY = "dateSysMetadataModified";
77

    
78
/* The instance of the logging class */
79
  private static Logger logMetacat = Logger.getLogger(HazelcastService.class);
80
  
81
  /* The singleton instance of the hazelcast service */
82
  private static HazelcastService hzService = null;
83
  
84
  /* The Hazelcast configuration */
85
  private Config hzConfig;
86
  
87
  /* The instance of the Hazelcast client */
88
//  private HazelcastClient hzClient;
89

    
90
  /* The name of the DataONE Hazelcast cluster group */
91
  private String groupName;
92

    
93
  /* The name of the DataONE Hazelcast cluster password */
94
  private String groupPassword;
95
  
96
  /* The name of the DataONE Hazelcast cluster IP addresses */
97
  private String addressList;
98
  
99
  /* The name of the node map */
100
  private String nodeMap;
101

    
102
  /* The name of the system metadata map */
103
  private String systemMetadataMap;
104
  
105
  /* The Hazelcast distributed task id generator namespace */
106
  private String taskIds;
107
  
108
  /* The Hazelcast distributed node map */
109
  private IMap<NodeReference, Node> nodes;
110

    
111
  /* The Hazelcast distributed system metadata map */
112
  private IMap<Identifier, SystemMetadata> systemMetadata;
113
  
114
  /* The name of the identifiers set */
115
  private String identifiersSet;
116
  
117
  /* The Hazelcast distributed identifiers set */
118
  private ISet<Identifier> identifiers;
119

    
120
  private HazelcastInstance hzInstance;
121
      
122
  /*
123
   * Constructor: Creates an instance of the hazelcast service. Since
124
   * this uses a singleton pattern, use getInstance() to gain the instance.
125
   */
126
  private HazelcastService() {
127
    
128
    super();
129
    _serviceName="HazelcastService";
130
    
131
    try {
132
      init();
133
      
134
    } catch (ServiceException se) {
135
      logMetacat.error("There was a problem creating the HazelcastService. " +
136
                       "The error message was: " + se.getMessage());
137
      
138
    }
139
    
140
  }
141
  
142
  /**
143
   *  Get the instance of the HazelcastService that has been instantiated,
144
   *  or instantiate one if it has not been already.
145
   *
146
   * @return hazelcastService - The instance of the hazelcast service
147
   */
148
  public static HazelcastService getInstance(){
149
    
150
    if ( hzService == null ) {
151
      
152
      hzService = new HazelcastService();
153
      
154
    }
155
    return hzService;
156
  }
157
  
158
  /**
159
   * Initializes the Hazelcast service
160
   */
161
  public void init() throws ServiceException {
162
    
163
    logMetacat.debug("HazelcastService.init() called.");
164
    
165
	String configFileName = null;
166
	Config config = null;
167
	try {
168
		configFileName = PropertyService.getProperty("dataone.hazelcast.configFilePath");
169
		config = new FileSystemXmlConfig(configFileName);
170
	} catch (Exception e) {
171
		configFileName = PropertyService.CONFIG_FILE_DIR + FileUtil.getFS() + "hazelcast.xml";
172
		logMetacat.warn("Custom Hazelcast configuration not defined, using default: " + configFileName);
173
		// make sure we have the config
174
		try {
175
			config = new FileSystemXmlConfig(configFileName);
176
		} catch (FileNotFoundException e1) {
177
			String msg = e.getMessage();
178
			logMetacat.error(msg);
179
			throw new ServiceException(msg);
180
		}
181
	}
182

    
183
	Hazelcast.init(config);
184
  this.hzInstance = Hazelcast.getDefaultInstance();
185
  
186
  	logMetacat.debug("Initialized hzInstance");
187

    
188
    // Get configuration properties on instantiation
189
    try {
190
      groupName = 
191
        PropertyService.getProperty("dataone.hazelcast.processCluster.groupName");
192
      groupPassword = 
193
        PropertyService.getProperty("dataone.hazelcast.processCluster.password");
194
      addressList = 
195
        PropertyService.getProperty("dataone.hazelcast.processCluster.instances");
196
      systemMetadataMap = 
197
        PropertyService.getProperty("dataone.hazelcast.storageCluster.systemMetadataMap");
198
      identifiersSet = PropertyService.getProperty("dataone.hazelcast.storageCluster.identifiersSet");
199
//    nodeMap = 
200
//    PropertyService.getProperty("dataone.hazelcast.processCluster.nodesMap");
201
      // Become a DataONE-process cluster client
202
//      String[] addresses = addressList.split(",");
203
//      hzClient = 
204
//        HazelcastClient.newHazelcastClient(this.groupName, this.groupPassword, addresses);
205
//      nodes = hzClient.getMap(nodeMap);
206
      
207
      // Get a reference to the shared system metadata map as a cluster member
208
      // NOTE: this loads the map from the backing store and can take a long time for large collections
209
      systemMetadata = Hazelcast.getMap(systemMetadataMap);
210
      
211
      logMetacat.debug("Initialized systemMetadata");
212

    
213
      // Get a reference to the shared identifiers set as a cluster member
214
      // NOTE: this takes a long time to complete
215
      identifiers = Hazelcast.getSet(identifiersSet);
216
      identifiers.addAll(loadAllKeys());
217
      
218
      logMetacat.debug("Initialized identifiers");
219
      
220
      // Listen for changes to the system metadata map
221
      systemMetadata.addEntryListener(this, true);
222
      
223
      // Listen for members added/removed
224
      hzInstance.getCluster().addMembershipListener(this);
225
      
226
      // Listen for lifecycle state changes
227
      hzInstance.getLifecycleService().addLifecycleListener(this);
228
      
229
    } catch (PropertyNotFoundException e) {
230

    
231
      String msg = "Couldn't find Hazelcast properties for the DataONE clusters. " +
232
        "The error message was: " + e.getMessage();
233
      logMetacat.error(msg);
234
      
235
    }
236
    
237
    // make sure we have all metadata locally
238
    try {
239
    	// synch on restart
240
        resynchInThread();
241
	} catch (Exception e) {
242
		String msg = "Problem resynchronizing system metadata. " + e.getMessage();
243
		logMetacat.error(msg, e);
244
	}
245
        
246
  }
247
  
248
  /**
249
   * Get the system metadata map
250
   * 
251
   * @return systemMetadata - the hazelcast map of system metadata
252
   * @param identifier - the identifier of the object as a string
253
   */
254
  public IMap<Identifier,SystemMetadata> getSystemMetadataMap() {
255
	  return systemMetadata;
256
  }
257
  
258
  /**
259
   * Get the identifiers set
260
   * @return identifiers - the set of unique DataONE identifiers in the cluster
261
   */
262
  public ISet<Identifier> getIdentifiers() {
263
      return identifiers;
264
      
265
  }
266

    
267
  /**
268
   * When Metacat changes the underlying store, we need to refresh the
269
   * in-memory representation of it.
270
   * @param guid
271
   */
272
  public void refreshSystemMetadataEntry(String guid) {
273
	Identifier identifier = new Identifier();
274
	identifier.setValue(guid);
275
	// force hazelcast to update system metadata in memory from the store
276
	HazelcastService.getInstance().getSystemMetadataMap().evict(identifier);
277
	
278
  }
279

    
280
  public Lock getLock(String identifier) {
281
    
282
    Lock lock = null;
283
    
284
    try {
285
        lock = getInstance().getHazelcastInstance().getLock(identifier);
286
        
287
    } catch (RuntimeException e) {
288
        logMetacat.info("Couldn't get a lock for identifier " + 
289
            identifier + " !!");
290
    }
291
    return lock;
292
      
293
  }
294
  
295
  /**
296
   * Get the DataONE hazelcast node map
297
   * @return nodes - the hazelcast map of nodes
298
   */
299
//  public IMap<NodeReference, Node> getNodesMap() {
300
//	  return nodes;
301
//  }
302
  
303
  /**
304
   * Indicate whether or not this service is refreshable.
305
   *
306
   * @return refreshable - the boolean refreshable status
307
   */
308
  public boolean refreshable() {
309
    // TODO: Determine the consequences of restarting the Hazelcast instance
310
    // Set this to true if it's okay to drop from the cluster, lose the maps,
311
    // and start back up again
312
    return false;
313
    
314
  }
315
  
316
  /**
317
   * Stop the HazelcastService. When stopped, the service will no longer
318
   * respond to requests.
319
   */
320
  public void stop() throws ServiceException {
321
    
322
    Hazelcast.getLifecycleService().shutdown();
323
    
324
  }
325

    
326
  public HazelcastInstance getHazelcastInstance() {
327
      return this.hzInstance;
328
      
329
  }
330
  
331
  /**
332
   * Refresh the Hazelcast service by restarting it
333
   */
334
  @Override
335
  protected void doRefresh() throws ServiceException {
336

    
337
    // TODO: verify that the correct config file is still used
338
    Hazelcast.getLifecycleService().restart();
339
    
340
  }
341
  
342
  /**
343
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
344
	 * added events in the hzSystemMetadata map. Evaluate the entry and create
345
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
346
	 * 
347
	 * @param event - The EntryEvent that occurred
348
	 */
349
	@Override
350
	public void entryAdded(EntryEvent<Identifier, SystemMetadata> event) {
351
	  
352
	  logMetacat.info("SystemMetadata entry added event on identifier " + 
353
	      event.getKey().getValue());
354
		// handle as update - that method will create if necessary
355
		entryUpdated(event);
356

    
357
	}
358

    
359
	/**
360
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
361
	 * evicted events in the hzSystemMetadata map.  Evaluate the entry and create
362
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
363
	 * 
364
	 * @param event - The EntryEvent that occurred
365
	 */
366
	@Override
367
	public void entryEvicted(EntryEvent<Identifier, SystemMetadata> event) {
368

    
369
      logMetacat.info("SystemMetadata entry evicted event on identifier " + 
370
          event.getKey().getValue());
371
      
372
	    // ensure identifiers are listed in the hzIdentifiers set
373
      if ( !identifiers.contains(event.getKey()) ) {
374
          identifiers.add(event.getKey());
375
      }
376
	  
377
	}
378
	
379
	/**
380
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
381
	 * removed events in the hzSystemMetadata map.  Evaluate the entry and create
382
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
383
	 * 
384
	 * @param event - The EntryEvent that occurred
385
	 */
386
	@Override
387
	public void entryRemoved(EntryEvent<Identifier, SystemMetadata> event) {
388
		
389
    logMetacat.info("SystemMetadata entry removed event on identifier " + 
390
        event.getKey().getValue());
391

    
392
	  // we typically don't remove objects in Metacat, but can remove System Metadata
393
		IdentifierManager.getInstance().deleteSystemMetadata(event.getValue().getIdentifier().getValue());
394

    
395
    // keep the hzIdentifiers set in sync with the systemmetadata table
396
    if ( identifiers.contains(event.getKey()) ) {
397
        identifiers.remove(event.getKey());
398
        
399
    }
400

    
401
	}
402
	
403
	/**
404
	 * Implement the EntryListener interface for Hazelcast, reponding to entry
405
	 * updated events in the hzSystemMetadata map.  Evaluate the entry and create
406
	 * CNReplicationTasks as appropriate (for DATA, METADATA, RESOURCE)
407
	 * 
408
	 * @param event - The EntryEvent that occurred
409
	 */
410
	@Override
411
	public void entryUpdated(EntryEvent<Identifier, SystemMetadata> event) {
412

    
413
		logMetacat.debug("Entry added/updated to System Metadata map: " + event.getKey().getValue());
414
		PartitionService partitionService = Hazelcast.getPartitionService();
415
		Partition partition = partitionService.getPartition(event.getKey());
416
		Member ownerMember = partition.getOwner();
417
		SystemMetadata sysmeta = event.getValue();
418
		if (!ownerMember.localMember()) {
419
			if (sysmeta == null) {
420
				logMetacat.warn("No SystemMetadata provided in the event, getting from shared map: " + event.getKey().getValue());
421
				sysmeta = getSystemMetadataMap().get(event.getKey());
422
				if (sysmeta == null) {
423
					// this is a problem
424
					logMetacat.error("Could not find SystemMetadata in shared map for: " + event.getKey().getValue());
425
					// TODO: should probably return at this point since the save will fail
426
				}
427
			}
428
			// need to pull the entry into the local store
429
			saveLocally(event.getValue());
430
		}
431

    
432
		// ensure identifiers are listed in the hzIdentifiers set
433
		if (!identifiers.contains(event.getKey())) {
434
			identifiers.add(event.getKey());
435
		}
436

    
437
	}
438
	
439
	/**
440
	 * Save SystemMetadata to local store if needed
441
	 * @param sm
442
	 */
443
	private void saveLocally(SystemMetadata sm) {
444
		logMetacat.debug("Saving entry locally: " + sm.getIdentifier().getValue());
445
		try {
446

    
447
			IdentifierManager.getInstance().insertOrUpdateSystemMetadata(sm);
448

    
449
		} catch (McdbDocNotFoundException e) {
450
			logMetacat.error("Could not save System Metadata to local store.", e);
451
			
452
		} catch (SQLException e) {
453
	      logMetacat.error("Could not save System Metadata to local store.", e);
454
	      
455
	    } catch (InvalidSystemMetadata e) {
456
	        logMetacat.error("Could not save System Metadata to local store.", e);
457
	        
458
	    }
459
	}
460
	
461
	/**
462
	 * Checks the local backing store for missing SystemMetadata,
463
	 * retrieves those entries from the shared map if they exist,
464
	 * and saves them locally.
465
	 */
466
	private void synchronizeLocalStore() {
467
		List<String> localIds = IdentifierManager.getInstance().getLocalIdsWithNoSystemMetadata(true, -1);
468
		if (localIds != null) {
469
			logMetacat.debug("Member missing SystemMetadata entries, count = " + localIds.size());
470
			for (String localId: localIds) {
471
				logMetacat.debug("Processing system metadata for localId: " + localId);
472
				try {
473
					String docid = DocumentUtil.getSmartDocId(localId);
474
					int rev = DocumentUtil.getRevisionFromAccessionNumber(localId);
475
					String guid = IdentifierManager.getInstance().getGUID(docid, rev);
476
					logMetacat.debug("Found mapped guid: " + guid);
477
					Identifier pid = new Identifier();
478
					pid.setValue(guid);
479
					SystemMetadata sm = systemMetadata.get(pid);
480
					logMetacat.debug("Found shared system metadata for guid: " + guid);
481
					saveLocally(sm);
482
					logMetacat.debug("Saved shared system metadata locally for guid: " + guid);
483
				} catch (Exception e) {
484
					logMetacat.error("Could not save shared SystemMetadata entry locally, localId: " + localId, e);
485
				}
486
			}
487
		}
488
	}
489
	
490
	/**
491
	 * Make sure we have a copy of every entry in the shared map.
492
	 * We use lazy loading and therefore the CNs may not all be in sync when one
493
	 * comes back online after an extended period of being offline
494
	 * @throws Exception
495
	 */
496
	private void resynch() throws Exception {
497
		// loop through all the [shared] entries and save any missing ones locally
498
		List<String> localPids = IdentifierManager.getInstance().getAllSystemMetadataGUIDs();
499
		logMetacat.warn("local pid count: " + localPids.size() + ", shared pid count: " + identifiers.size());
500
		Iterator<Identifier> sharedPids = identifiers.iterator();
501
		while (sharedPids.hasNext()) {
502
			Identifier pid = sharedPids.next();
503
			logMetacat.debug("checking for shared pid locally: " + pid.getValue());
504
			if (!localPids.contains(pid.getValue())) {
505
				logMetacat.warn("shared pid does not exist locally: " + pid.getValue());
506
				SystemMetadata sm = systemMetadata.get(pid);
507
				if (sm != null) {
508
					// this may throw an exception if loading it to the shared map triggered entry updated events
509
					saveLocally(sm);
510
				} else {
511
					logMetacat.error("SystemMetadata for pid is null: " + pid.getValue());
512
					Partition partition = hzInstance.getPartitionService().getPartition(pid);
513
					Member owner = partition.getOwner();
514
					owner.localMember();
515
					logMetacat.warn("owner of pid: " + pid.getValue() + " isLocal: " + owner.localMember() + " at " + owner.getInetSocketAddress().getAddress());
516

    
517
				}
518
			} else {
519
				logMetacat.debug("shared pid already exisits locally: " + pid.getValue());
520
			}
521
		}
522
	}
523
	
524
	private void resynchInThread() {
525
		logMetacat.debug("launching system metadata resynch in a thread");
526
		ExecutorService executor = Executors.newSingleThreadExecutor();
527
		executor.execute(new Runnable() {
528
			@Override
529
			public void run() {
530
				try {
531
					resynch();
532
				} catch (Exception e) {
533
					logMetacat.error("Error in resynchInThread: " + e.getMessage(), e);
534
				}
535
			}
536
		});
537
		executor.shutdown();
538
	}
539

    
540
	/**
541
	 * When there is missing SystemMetadata on the local member,
542
	 * we retrieve it from the shared map and add it to the local
543
	 * backing store for safe keeping.
544
	 */
545
	@Override
546
	public void memberAdded(MembershipEvent event) {
547
		Member member = event.getMember();
548
		logMetacat.debug("Member added to cluster: " + member.getInetSocketAddress());
549
		boolean isLocal = member.localMember();
550
		if (isLocal) {
551
			logMetacat.debug("Member islocal: " + member.getInetSocketAddress());
552
			synchronizeLocalStore();
553
		}
554
	}
555

    
556
	@Override
557
	public void memberRemoved(MembershipEvent event) {
558
		// TODO Auto-generated method stub
559
		
560
	}
561

    
562
	/**
563
	 * In cases where this cluster is paused, we want to 
564
	 * check that the local store accurately reflects the shared 
565
	 * SystemMetadata map
566
	 * @param event
567
	 */
568
	@Override
569
	public void stateChanged(LifecycleEvent event) {
570
		logMetacat.debug("HZ LifecycleEvent.state: " + event.getState());
571
		if (event.getState().equals(LifecycleEvent.LifecycleState.RESUMED)) {
572
			logMetacat.debug("HZ LifecycleEvent.state is RESUMED, calling synchronizeLocalStore()");
573
			synchronizeLocalStore();
574
		}
575
	}
576

    
577
	/**
578
	 * Load all System Metadata keys from the backing store
579
	 * @return set of pids
580
	 */
581
	private Set<Identifier> loadAllKeys() {
582

    
583
		Set<Identifier> pids = new HashSet<Identifier>();
584
		
585
		try {
586
			
587
			// ALTERNATIVE 1: this has more overhead than just looking at the GUIDs
588
//			ObjectList ol = IdentifierManager.getInstance().querySystemMetadata(
589
//					null, //startTime, 
590
//					null, //endTime, 
591
//					null, //objectFormatId, 
592
//					false, //replicaStatus, 
593
//					0, //start, 
594
//					-1 //count
595
//					);
596
//			for (ObjectInfo o: ol.getObjectInfoList()) {
597
//				Identifier pid = o.getIdentifier();
598
//				if ( !pids.contains(pid) ) {
599
//					pids.add(pid);
600
//				}				
601
//			}
602
			
603
			// ALTERNATIVE method: look up all the Identifiers from the table
604
			List<String> guids = IdentifierManager.getInstance().getAllSystemMetadataGUIDs();
605
			for (String guid: guids){
606
				Identifier pid = new Identifier();
607
				pid.setValue(guid);
608
				pids.add(pid);
609
			}
610
			
611
		} catch (Exception e) {
612
			throw new RuntimeException(e.getMessage(), e);
613
			
614
		}
615
		
616
		return pids;
617
	}
618

    
619
}
(1-1/3)