ServiceStack.Redis的原始碼分析(連線與連線池)

志存高遠,腳踏實地發表於2022-02-21

前幾天在生產環境上redis建立連線方面的故障,分析過程中對ServiceStack.Redis的連線建立和連線池機制有了進一步瞭解。問題分析結束後,通過此文系統的將學習到的知識點整理出來。

從連線池獲取RedisClient的流程

業務程式中通過PooledRedisClientManager物件的GetClient()方法獲取客戶端物件,就以此處的原始碼作為入口:
檢視程式碼
public IRedisClient GetClient()
        {
            RedisClient redisClient = null;
            DateTime now = DateTime.Now;
            for (; ; )
            {
                if (!this.deactiveClientQueue.TryPop(out redisClient))
                {
                    if (this.redisClientSize >= this.maxRedisClient)
                    {
                        Thread.Sleep(3);
                        if (this.PoolTimeout != null && (DateTime.Now - now).TotalMilliseconds >= (double)this.PoolTimeout.Value)
                        {
                            break;
                        }
                    }
                    else
                    {
                        redisClient = this.CreateRedisClient();
                        if (redisClient != null)
                        {
                            goto Block_5;
                        }
                    }
                }
                else
                {
                    if (!redisClient.HadExceptions)
                    {
                        goto Block_6;
                    }
                    List<RedisClient> obj = this.writeClients;
                    lock (obj)
                    {
                        this.writeClients.Remove(redisClient);
                        this.redisClientSize--;
                    }
                    RedisState.DisposeDeactivatedClient(redisClient);
                }
            }
            bool flag2 = true;
            if (flag2)
            {
                throw new TimeoutException("Redis Timeout expired. The timeout period elapsed prior to obtaining a connection from the pool. This may have occurred because all pooled connections were in use.");
            }
            return redisClient;
        Block_5:
            this.writeClients.Add(redisClient);
            return redisClient;
        Block_6:
            redisClient.Active = true;
            this.InitClient(redisClient);
            return redisClient;
        }

此方法的主體是死迴圈,主要實現了這幾項功能:

  • this.deactiveClientQueue代表空閒的Client集合,是ConcurrentStack<RedisClient>型別的。
  • 當this.deactiveClientQueue能夠Pop出redisClient時,則跳轉到Block_6分支:標記redisClient.Active屬性,並執行this.InitClient(redisClient),然後將redisClient例項返回。
  • 當this.deactiveClientQueue沒有可以Pop的元素時,首先執行Client數量上限的判斷this.redisClientSize >= this.maxRedisClient;
    • 如果未到達上限,則執行redisClient = this.CreateRedisClient();
    • 如果達到上限,則先休眠3毫秒,然後判斷是否超過連線池超時時間this.PoolTimeout,單位毫秒。超時的話直接break中斷迴圈,不超時的話繼續下一次for迴圈。

上述流程就是從連線池獲取Client的主要流程,其中this.deactiveClientQueue相當於“Client池”。需要注意this.PoolTimeout的含義是當連線池耗盡時呼叫方等待的時間。

上述過程通過流程圖表示為:

ServiceStack.Redis的原始碼分析(連線與連線池)

建立新Client的過程:CreateRedisClient()

原始碼如下:

檢視程式碼
  private RedisClient CreateRedisClient()
		{
			if (this.redisClientSize >= this.maxRedisClient)
			{
				return null;
			}
			object obj = this.lckObj;
			RedisClient result;
			lock (obj)
			{
				if (this.redisClientSize >= this.maxRedisClient)
				{
					result = null;
				}
				else
				{
					Random random = new Random((int)DateTime.Now.Ticks);
					RedisClient newClient = this.InitNewClient(this.RedisResolver.CreateMasterClient(random.Next(100)));
					newClient.OnDispose += delegate()
					{
						if (!newClient.HadExceptions)
						{
							List<RedisClient> obj2 = this.writeClients;
							lock (obj2)
							{
								if (!newClient.HadExceptions)
								{
									try
									{
										this.deactiveClientQueue.Push(newClient);
										return;
									}
									catch
									{
										this.writeClients.Remove(newClient);
										this.redisClientSize--;
										RedisState.DisposeDeactivatedClient(newClient);
									}
								}
							}
						}
						this.writeClients.Remove(newClient);
						this.redisClientSize--;
						RedisState.DisposeDeactivatedClient(newClient);
					};
					this.redisClientSize++;
					result = newClient;
				}
			}
			return result;
		}

基於併發的考慮,建立新Client的流程需要增加併發鎖限制,即lock (obj)處。此時如果多個執行緒都進入CreateRedisClient()方法,則只有一個執行緒實際執行,其它執行緒阻塞等待鎖釋放。這個現象可以通過windbg的syncblk、clrstack命令分析檢視。其餘的部分就是繼續呼叫this.InitNewClient(this.RedisResolver.CreateMasterClient(random.Next(100)))建立物件,並對newClient的OnDispose事件增加了處理邏輯。需要說明的是此處OnDispose事件並不是傳統意義的析構,而是呼叫方用完此RedisClient物件後,用於將其回收到連線池的操作,即:newClient物件沒有異常的前提下, 將其Push到this.deactiveClientQueue棧裡,連線池就是此處回收擴充的。

this.InitNewClient()方法解讀

此處是對新建立的RedisClient物件初始化,包括Id、Active等,並繼續呼叫this.InitClient()進一步初始化。

this.RedisResolver.CreateMasterClient()解讀

this.redisResolver是IRedisResolver介面型別,原始碼中有三種實現,如下截圖。此處以生產常見的哨兵模式為例進行分析。

ServiceStack.Redis的原始碼分析(連線與連線池)

RedisSentinelResolver類對應的就是哨兵模式,其相關操作原始碼如下:

檢視程式碼
public RedisClient CreateMasterClient(int desiredIndex)
		{
			return this.CreateRedisClient(this.GetReadWriteHost(desiredIndex), true);
		}
		public RedisEndpoint GetReadWriteHost(int desiredIndex)
		{
			return this.sentinel.GetMaster() ?? this.masters[desiredIndex % this.masters.Length];
		}

		public virtual RedisClient CreateRedisClient(RedisEndpoint config, bool master)
		{
			RedisClient result = this.ClientFactory(config);
			if (master)
			{
				RedisServerRole redisServerRole = RedisServerRole.Unknown;
				try
				{
					using (RedisClient redisClient = this.ClientFactory(config))
					{
						redisClient.ConnectTimeout = 5000;
						redisClient.ReceiveTimeout = 5000;
						redisServerRole = redisClient.GetServerRole();
						if (redisServerRole == RedisServerRole.Master)
						{
							this.lastValidMasterFromSentinelAt = DateTime.UtcNow;
							return result;
						}
					}
				}
				catch (Exception exception)
				{
					Interlocked.Increment(ref RedisState.TotalInvalidMasters);
					using (RedisClient redisClient2 = this.ClientFactory(config))
					{
						redisClient2.ConnectTimeout = 5000;
						redisClient2.ReceiveTimeout = 5000;
						if (redisClient2.GetHostString() == this.lastInvalidMasterHost)
						{
							object obj = this.oLock;
							lock (obj)
							{
								if (DateTime.UtcNow - this.lastValidMasterFromSentinelAt > this.sentinel.WaitBeforeForcingMasterFailover)
								{
									this.lastInvalidMasterHost = null;
									this.lastValidMasterFromSentinelAt = DateTime.UtcNow;
									RedisSentinelResolver.log.Error("Valid master was not found at '{0}' within '{1}'. Sending SENTINEL failover...".Fmt(redisClient2.GetHostString(), this.sentinel.WaitBeforeForcingMasterFailover), exception);
									Interlocked.Increment(ref RedisState.TotalForcedMasterFailovers);
									this.sentinel.ForceMasterFailover();
									Thread.Sleep(this.sentinel.WaitBetweenFailedHosts);
									redisServerRole = redisClient2.GetServerRole();
								}
								goto IL_16E;
							}
						}
						this.lastInvalidMasterHost = redisClient2.GetHostString();
						IL_16E:;
					}
				}
				if (redisServerRole != RedisServerRole.Master && RedisConfig.VerifyMasterConnections)
				{
					try
					{
						Stopwatch stopwatch = Stopwatch.StartNew();
						for (;;)
						{
							try
							{
								RedisEndpoint master2 = this.sentinel.GetMaster();
								using (RedisClient redisClient3 = this.ClientFactory(master2))
								{
									redisClient3.ReceiveTimeout = 5000;
									redisClient3.ConnectTimeout = this.sentinel.SentinelWorkerConnectTimeoutMs;
									if (redisClient3.GetServerRole() == RedisServerRole.Master)
									{
										this.lastValidMasterFromSentinelAt = DateTime.UtcNow;
										return this.ClientFactory(master2);
									}
									Interlocked.Increment(ref RedisState.TotalInvalidMasters);
								}
							}
							catch
							{
							}
							if (stopwatch.Elapsed > this.sentinel.MaxWaitBetweenFailedHosts)
							{
								break;
							}
							Thread.Sleep(this.sentinel.WaitBetweenFailedHosts);
						}
						throw new TimeoutException("Max Wait Between Sentinel Lookups Elapsed: {0}".Fmt(this.sentinel.MaxWaitBetweenFailedHosts.ToString()));
					}
					catch (Exception exception2)
					{
						RedisSentinelResolver.log.Error("Redis Master Host '{0}' is {1}. Resetting allHosts...".Fmt(config.GetHostString(), redisServerRole), exception2);
						List<RedisEndpoint> list = new List<RedisEndpoint>();
						List<RedisEndpoint> list2 = new List<RedisEndpoint>();
						RedisClient redisClient4 = null;
						foreach (RedisEndpoint redisEndpoint in this.allHosts)
						{
							try
							{
								using (RedisClient redisClient5 = this.ClientFactory(redisEndpoint))
								{
									redisClient5.ReceiveTimeout = 5000;
									redisClient5.ConnectTimeout = RedisConfig.HostLookupTimeoutMs;
									RedisServerRole serverRole = redisClient5.GetServerRole();
									if (serverRole != RedisServerRole.Master)
									{
										if (serverRole == RedisServerRole.Slave)
										{
											list2.Add(redisEndpoint);
										}
									}
									else
									{
										list.Add(redisEndpoint);
										if (redisClient4 == null)
										{
											redisClient4 = this.ClientFactory(redisEndpoint);
										}
									}
								}
							}
							catch
							{
							}
						}
						if (redisClient4 == null)
						{
							Interlocked.Increment(ref RedisState.TotalNoMastersFound);
							string message = "No master found in: " + string.Join(", ", this.allHosts.Map((RedisEndpoint x) => x.GetHostString()));
							RedisSentinelResolver.log.Error(message);
							throw new Exception(message);
						}
						this.ResetMasters(list);
						this.ResetSlaves(list2);
						return redisClient4;
					}
					return result;
				}
				return result;
			}
			return result;
		}

其中GetReadWriteHost()方法的邏輯是:優先使用this.sentinel.GetMaster()得到的主節點資訊。如果GetMaster()失敗,則從現有的主節點集合masters中隨機選擇一個進行連線。

然後進入CreateRedisClient()方法內:

  • 首先通過this.ClientFactory()工廠建立物件redisClient,工廠內部實現了計數和new RedisClient()操作。沒有太多內容。
  • 然後是執行redisClient.GetServerRole(),代表向伺服器核實當前連線的節點確實是Master角色。如果確認,則直接返回給呼叫方。【如果傳送查詢請求的過程出現異常,且符合一定條件,則會發起故障轉移請求,即this.sentinel.ForceMasterFailover();】
  • 如果當前連線的不是Master角色的節點,則多次呼叫this.sentinel.GetMaster()查詢Master節點資訊並重新例項化RedisClient物件;
  • 如果超時仍然未能連線到Master節點,則會進入catch異常處理流程,遍歷this.allHosts全部節點並更新對應的節點角色。

至此,通過上述的流程,最終能夠得到master節點的RedisClient物件,並返回給呼叫方。 

上述過程中,還有幾處方法的實現比較重要和複雜,下面對其一一解釋說明:

RedisSentinel類的GetMaster()實現原理解析

呼叫處很簡單,但是此方法的實現操作挺多,RedisSentinel類 原始碼如下:

檢視程式碼
public RedisEndpoint GetMaster()
		{
			RedisSentinelWorker validSentinelWorker = this.GetValidSentinelWorker();
			RedisSentinelWorker obj = validSentinelWorker;
			RedisEndpoint result;
			lock (obj)
			{
				string masterHost = validSentinelWorker.GetMasterHost(this.masterName);
				if (this.ScanForOtherSentinels && DateTime.UtcNow - this.lastSentinelsRefresh > this.RefreshSentinelHostsAfter)
				{
					this.RefreshActiveSentinels();
				}
				result = ((masterHost != null) ? ((this.HostFilter != null) ? this.HostFilter(masterHost) : masterHost).ToRedisEndpoint(null) : null);
			}
			return result;
		}

		private RedisSentinelWorker GetValidSentinelWorker()
		{
			if (this.isDisposed)
			{
				throw new ObjectDisposedException(base.GetType().Name);
			}
			if (this.worker != null)
			{
				return this.worker;
			}
			RedisException innerException = null;
			while (this.worker == null && this.ShouldRetry())
			{
				try
				{
					this.worker = this.GetNextSentinel();
					this.GetSentinelInfo();
					this.worker.BeginListeningForConfigurationChanges();
					this.failures = 0;
					return this.worker;
				}
				catch (RedisException ex)
				{
					if (this.OnWorkerError != null)
					{
						this.OnWorkerError(ex);
					}
					innerException = ex;
					this.worker = null;
					this.failures++;
					Interlocked.Increment(ref RedisState.TotalFailedSentinelWorkers);
				}
			}
			this.failures = 0;
			Thread.Sleep(this.WaitBetweenFailedHosts);
			throw new RedisException("No Redis Sentinels were available", innerException);
		}
		private RedisSentinelWorker GetNextSentinel()
		{
			object obj = this.oLock;
			RedisSentinelWorker result;
			lock (obj)
			{
				if (this.worker != null)
				{
					this.worker.Dispose();
					this.worker = null;
				}
				int num = this.sentinelIndex + 1;
				this.sentinelIndex = num;
				if (num >= this.SentinelEndpoints.Length)
				{
					this.sentinelIndex = 0;
				}
				result = new RedisSentinelWorker(this, this.SentinelEndpoints[this.sentinelIndex])
				{
					OnSentinelError = new Action<Exception>(this.OnSentinelError)
				};
			}
			return result;
		}
		private void OnSentinelError(Exception ex)
		{
			if (this.worker != null)
			{
				RedisSentinel.Log.Error("Error on existing SentinelWorker, reconnecting...");
				if (this.OnWorkerError != null)
				{
					this.OnWorkerError(ex);
				}
				this.worker = this.GetNextSentinel();
				this.worker.BeginListeningForConfigurationChanges();
			}
		}

先通過GetValidSentinelWorker()獲得RedisSentinelWorker物件。此方法的實現包含了重試機制的控制,最終是通過this.GetNextSentinel()方法給this.worker欄位,即RedisSentinelWorker物件例項。

而GetNextSentinel()方法內部包含了同步鎖、呼叫this.worker.Dispose()、隨機選擇哨兵節點、例項化RedisSentinelWorker物件等操作。

後面是對validSentinelWorker進行加鎖,然後繼續執行string masterHost = validSentinelWorker.GetMasterHost(this.masterName);

對應的RedisSentinelWorker類的程式碼如下:

檢視程式碼
		internal string GetMasterHost(string masterName)
		{
			string result;
			try
			{
				result = this.GetMasterHostInternal(masterName);
			}
			catch (Exception obj)
			{
				if (this.OnSentinelError != null)
				{
					this.OnSentinelError(obj);
				}
				result = null;
			}
			return result;
		}
		private string GetMasterHostInternal(string masterName)
		{
			List<string> list = this.sentinelClient.SentinelGetMasterAddrByName(masterName);
			if (list.Count <= 0)
			{
				return null;
			}
			return this.SanitizeMasterConfig(list);
		}
		public void Dispose()
		{
			new IDisposable[]
			{
				this.sentinelClient,
				this.sentinePubSub
			}.Dispose(RedisSentinelWorker.Log);
		}

注意GetMasterHost()方法內:當發生異常時,會觸發this物件的OnSentinelError事件,顧名思義這個事件用於哨兵異常的後續處理。通過原始碼搜尋,只有GetNextSentinel()方法內對OnSentinelError事件增加了處理程式-->即RedisSentinel內的private void OnSentinelError(Exception ex)方法。而這個方法內部對列印日誌和觸發事件this.OnWorkerError後,又呼叫GetNextSentinel()重新給this.worker欄位賦值。

需要注意:Dispose()方法實際是分別呼叫了this.sentinelClient和this.sentinePubSub的登出操作。

 

RedisNativeClient類的相關功能和實現

接著呼叫了RedisNativeClient類的SentinelGetMasterAddrByName()方法:

這個類裡的幾個方法的含義綜合起來就是:將哨兵客戶端的查詢指令通過Socket傳送到服務端,並將返回結果格式化為所需的RedisEndpoint型別。

在方法SendReceive()內還包含了Socket連線、重試、頻率控制、超時控制等機制。

檢視程式碼
        public List<string> SentinelGetMasterAddrByName(string masterName)
		{
			List<byte[]> list = new List<byte[]>
			{
				Commands.Sentinel,
				Commands.GetMasterAddrByName,
				masterName.ToUtf8Bytes()
			};
			return this.SendExpectMultiData(list.ToArray()).ToStringList();
		}
		protected byte[][] SendExpectMultiData(params byte[][] cmdWithBinaryArgs)
		{
			return this.SendReceive<byte[][]>(cmdWithBinaryArgs, new Func<byte[][]>(this.ReadMultiData), (this.Pipeline != null) ? new Action<Func<byte[][]>>(this.Pipeline.CompleteMultiBytesQueuedCommand) : null, false) ?? TypeConstants.EmptyByteArrayArray;
		}

		protected T SendReceive<T>(byte[][] cmdWithBinaryArgs, Func<T> fn, Action<Func<T>> completePipelineFn = null, bool sendWithoutRead = false)
		{
			int num = 0;
			Exception ex = null;
			DateTime utcNow = DateTime.UtcNow;
			T t;
			for (;;)
			{
				try
				{
					this.TryConnectIfNeeded();
					if (this.socket == null)
					{
						throw new RedisRetryableException("Socket is not connected");
					}
					if (num == 0)
					{
						this.WriteCommandToSendBuffer(cmdWithBinaryArgs);
					}
					if (this.Pipeline == null)
					{
						this.FlushSendBuffer();
					}
					else if (!sendWithoutRead)
					{
						if (completePipelineFn == null)
						{
							throw new NotSupportedException("Pipeline is not supported.");
						}
						completePipelineFn(fn);
						t = default(T);
						t = t;
						break;
					}
					T t2 = default(T);
					if (fn != null)
					{
						t2 = fn();
					}
					if (this.Pipeline == null)
					{
						this.ResetSendBuffer();
					}
					if (num > 0)
					{
						Interlocked.Increment(ref RedisState.TotalRetrySuccess);
					}
					Interlocked.Increment(ref RedisState.TotalCommandsSent);
					t = t2;
				}
				catch (Exception ex2)
				{
					RedisRetryableException ex3 = ex2 as RedisRetryableException;
					if ((ex3 == null && ex2 is RedisException) || ex2 is LicenseException)
					{
						this.ResetSendBuffer();
						throw;
					}
					Exception ex4 = ex3 ?? this.GetRetryableException(ex2);
					if (ex4 == null)
					{
						throw this.CreateConnectionError(ex ?? ex2);
					}
					if (ex == null)
					{
						ex = ex4;
					}
					if (!(DateTime.UtcNow - utcNow < this.retryTimeout))
					{
						if (this.Pipeline == null)
						{
							this.ResetSendBuffer();
						}
						Interlocked.Increment(ref RedisState.TotalRetryTimedout);
						throw this.CreateRetryTimeoutException(this.retryTimeout, ex);
					}
					Interlocked.Increment(ref RedisState.TotalRetryCount);
					Thread.Sleep(RedisNativeClient.GetBackOffMultiplier(++num));
					continue;
				}
				break;
			}
			return t;
		}

總結

本文著重以Redis連線建立、獲取為線索,對SDK內部的實現機制有了更深入的瞭解。在此基礎上,分析生產環境Redis SDK相關故障時更加得心應手。

相關文章