序
本文主要研究一下elasticsearch的LagDetector
LagDetector
elasticsearch-7.0.1/server/src/main/java/org/elasticsearch/cluster/coordination/LagDetector.java
/**
* A publication can succeed and complete before all nodes have applied the published state and acknowledged it; however we need every node
* eventually either to apply the published state (or a later state) or be removed from the cluster. This component achieves this by
* removing any lagging nodes from the cluster after a timeout.
*/
public class LagDetector {
private static final Logger logger = LogManager.getLogger(LagDetector.class);
// the timeout for each node to apply a cluster state update after the leader has applied it, before being removed from the cluster
public static final Setting<TimeValue> CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING =
Setting.timeSetting("cluster.follower_lag.timeout",
TimeValue.timeValueMillis(90000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
private final TimeValue clusterStateApplicationTimeout;
private final Consumer<DiscoveryNode> onLagDetected;
private final Supplier<DiscoveryNode> localNodeSupplier;
private final ThreadPool threadPool;
private final Map<DiscoveryNode, NodeAppliedStateTracker> appliedStateTrackersByNode = newConcurrentMap();
public LagDetector(final Settings settings, final ThreadPool threadPool, final Consumer<DiscoveryNode> onLagDetected,
final Supplier<DiscoveryNode> localNodeSupplier) {
this.threadPool = threadPool;
this.clusterStateApplicationTimeout = CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING.get(settings);
this.onLagDetected = onLagDetected;
this.localNodeSupplier = localNodeSupplier;
}
public void setTrackedNodes(final Iterable<DiscoveryNode> discoveryNodes) {
final Set<DiscoveryNode> discoveryNodeSet = new HashSet<>();
discoveryNodes.forEach(discoveryNodeSet::add);
discoveryNodeSet.remove(localNodeSupplier.get());
appliedStateTrackersByNode.keySet().retainAll(discoveryNodeSet);
discoveryNodeSet.forEach(node -> appliedStateTrackersByNode.putIfAbsent(node, new NodeAppliedStateTracker(node)));
}
public void clearTrackedNodes() {
appliedStateTrackersByNode.clear();
}
public void setAppliedVersion(final DiscoveryNode discoveryNode, final long appliedVersion) {
final NodeAppliedStateTracker nodeAppliedStateTracker = appliedStateTrackersByNode.get(discoveryNode);
if (nodeAppliedStateTracker == null) {
// Received an ack from a node that a later publication has removed (or we are no longer master). No big deal.
logger.trace("node {} applied version {} but this node's version is not being tracked", discoveryNode, appliedVersion);
} else {
nodeAppliedStateTracker.increaseAppliedVersion(appliedVersion);
}
}
public void startLagDetector(final long version) {
final List<NodeAppliedStateTracker> laggingTrackers
= appliedStateTrackersByNode.values().stream().filter(t -> t.appliedVersionLessThan(version)).collect(Collectors.toList());
if (laggingTrackers.isEmpty()) {
logger.trace("lag detection for version {} is unnecessary: {}", version, appliedStateTrackersByNode.values());
} else {
logger.debug("starting lag detector for version {}: {}", version, laggingTrackers);
threadPool.scheduleUnlessShuttingDown(clusterStateApplicationTimeout, Names.GENERIC, new Runnable() {
@Override
public void run() {
laggingTrackers.forEach(t -> t.checkForLag(version));
}
@Override
public String toString() {
return "lag detector for version " + version + " on " + laggingTrackers;
}
});
}
}
@Override
public String toString() {
return "LagDetector{" +
"clusterStateApplicationTimeout=" + clusterStateApplicationTimeout +
", appliedStateTrackersByNode=" + appliedStateTrackersByNode.values() +
'}';
}
// for assertions
Set<DiscoveryNode> getTrackedNodes() {
return Collections.unmodifiableSet(appliedStateTrackersByNode.keySet());
}
private class NodeAppliedStateTracker {
private final DiscoveryNode discoveryNode;
private final AtomicLong appliedVersion = new AtomicLong();
NodeAppliedStateTracker(final DiscoveryNode discoveryNode) {
this.discoveryNode = discoveryNode;
}
void increaseAppliedVersion(long appliedVersion) {
long maxAppliedVersion = this.appliedVersion.updateAndGet(v -> Math.max(v, appliedVersion));
logger.trace("{} applied version {}, max now {}", this, appliedVersion, maxAppliedVersion);
}
boolean appliedVersionLessThan(final long version) {
return appliedVersion.get() < version;
}
@Override
public String toString() {
return "NodeAppliedStateTracker{" +
"discoveryNode=" + discoveryNode +
", appliedVersion=" + appliedVersion +
'}';
}
void checkForLag(final long version) {
if (appliedStateTrackersByNode.get(discoveryNode) != this) {
logger.trace("{} no longer active when checking version {}", this, version);
return;
}
long appliedVersion = this.appliedVersion.get();
if (version <= appliedVersion) {
logger.trace("{} satisfied when checking version {}, node applied version {}", this, version, appliedVersion);
return;
}
logger.debug("{}, detected lag at version {}, node has only applied version {}", this, version, appliedVersion);
onLagDetected.accept(discoveryNode);
}
}
}
複製程式碼
- LagDetector用於檢測並移除lagging nodes,其構造器讀取cluster.follower_lag.timeout配置,預設為90000ms,最小值為1ms
- startLagDetector首先獲取從appliedStateTrackersByNode中過濾出appliedVersion小於指定version的laggingTrackers,之後延時clusterStateApplicationTimeout執行檢測,其run方法會遍歷laggingTrackers,挨個執行器NodeAppliedStateTracker.checkForLag方法
- NodeAppliedStateTracker的checkForLag方法首先進行version判斷,最後呼叫onLagDetected.accept(discoveryNode)
Coordinator
elasticsearch-7.0.1/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java
public class Coordinator extends AbstractLifecycleComponent implements Discovery {
//......
public Coordinator(String nodeName, Settings settings, ClusterSettings clusterSettings, TransportService transportService,
NamedWriteableRegistry namedWriteableRegistry, AllocationService allocationService, MasterService masterService,
Supplier<CoordinationState.PersistedState> persistedStateSupplier, SeedHostsProvider seedHostsProvider,
ClusterApplier clusterApplier, Collection<BiConsumer<DiscoveryNode, ClusterState>> onJoinValidators, Random random) {
this.settings = settings;
this.transportService = transportService;
this.masterService = masterService;
this.allocationService = allocationService;
this.onJoinValidators = JoinTaskExecutor.addBuiltInJoinValidators(onJoinValidators);
this.singleNodeDiscovery = DiscoveryModule.SINGLE_NODE_DISCOVERY_TYPE.equals(DiscoveryModule.DISCOVERY_TYPE_SETTING.get(settings));
this.joinHelper = new JoinHelper(settings, allocationService, masterService, transportService,
this::getCurrentTerm, this::getStateForMasterService, this::handleJoinRequest, this::joinLeaderInTerm, this.onJoinValidators);
this.persistedStateSupplier = persistedStateSupplier;
this.noMasterBlockService = new NoMasterBlockService(settings, clusterSettings);
this.lastKnownLeader = Optional.empty();
this.lastJoin = Optional.empty();
this.joinAccumulator = new InitialJoinAccumulator();
this.publishTimeout = PUBLISH_TIMEOUT_SETTING.get(settings);
this.random = random;
this.electionSchedulerFactory = new ElectionSchedulerFactory(settings, random, transportService.getThreadPool());
this.preVoteCollector = new PreVoteCollector(transportService, this::startElection, this::updateMaxTermSeen);
configuredHostsResolver = new SeedHostsResolver(nodeName, settings, transportService, seedHostsProvider);
this.peerFinder = new CoordinatorPeerFinder(settings, transportService,
new HandshakingTransportAddressConnector(settings, transportService), configuredHostsResolver);
this.publicationHandler = new PublicationTransportHandler(transportService, namedWriteableRegistry,
this::handlePublishRequest, this::handleApplyCommit);
this.leaderChecker = new LeaderChecker(settings, transportService, getOnLeaderFailure());
this.followersChecker = new FollowersChecker(settings, transportService, this::onFollowerCheckRequest, this::removeNode);
this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger);
this.clusterApplier = clusterApplier;
masterService.setClusterStateSupplier(this::getStateForMasterService);
this.reconfigurator = new Reconfigurator(settings, clusterSettings);
this.clusterBootstrapService = new ClusterBootstrapService(settings, transportService, this::getFoundPeers,
this::isInitialConfigurationSet, this::setInitialConfiguration);
this.discoveryUpgradeService = new DiscoveryUpgradeService(settings, transportService,
this::isInitialConfigurationSet, joinHelper, peerFinder::getFoundPeers, this::setInitialConfiguration);
this.lagDetector = new LagDetector(settings, transportService.getThreadPool(), n -> removeNode(n, "lagging"),
transportService::getLocalNode);
this.clusterFormationFailureHelper = new ClusterFormationFailureHelper(settings, this::getClusterFormationState,
transportService.getThreadPool(), joinHelper::logLastFailedJoinAttempt);
}
private void removeNode(DiscoveryNode discoveryNode, String reason) {
synchronized (mutex) {
if (mode == Mode.LEADER) {
masterService.submitStateUpdateTask("node-left",
new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason),
ClusterStateTaskConfig.build(Priority.IMMEDIATE),
nodeRemovalExecutor,
nodeRemovalExecutor);
}
}
}
//......
}
複製程式碼
- Coordinator的構造器建立了LagDetector,其Consumer<DiscoveryNode>執行的是removeNode方法,該方法在當前mode為LEADER的時候會執行NodeRemovalClusterStateTaskExecutor.Task
小結
- LagDetector用於檢測並移除lagging nodes,其構造器讀取cluster.follower_lag.timeout配置,預設為90000ms,最小值為1ms
- startLagDetector首先獲取從appliedStateTrackersByNode中過濾出appliedVersion小於指定version的laggingTrackers,之後延時clusterStateApplicationTimeout執行檢測,其run方法會遍歷laggingTrackers,挨個執行器NodeAppliedStateTracker.checkForLag方法;NodeAppliedStateTracker的checkForLag方法首先進行version判斷,最後呼叫onLagDetected.accept(discoveryNode)
- Coordinator的構造器建立了LagDetector,其Consumer<DiscoveryNode>執行的是removeNode方法,該方法在當前mode為LEADER的時候會執行NodeRemovalClusterStateTaskExecutor.Task