C# 爬蟲—-Cookies處理(Set-Cookie)

trevan發表於2018-08-16
/// <summary>
    /// Cookie 助手
    /// </summary>
    public class CookieHelper
    {
        /// <summary>
        /// 解析Cookie
        /// </summary>
        private static readonly Regex RegexSplitCookie2 = new Regex(@"[^,][Ss]+?;+[Ss]+?(?=,S)");

        /// <summary>
        /// 獲取所有Cookie 通過Set-Cookie
        /// </summary>
        /// <param name="setCookie"></param>
        /// <returns></returns>
        public static CookieCollection GetCookiesByHeader(string setCookie)
        {
            var cookieCollection = new CookieCollection();
            //拆分Cookie
            //var listStr = RegexSplitCookie.Split(setCookie);
            setCookie += ",T";//配合RegexSplitCookie2 加入字尾
            var listStr = RegexSplitCookie2.Matches(setCookie);
            //迴圈遍歷
            foreach (Match item in listStr)
            {
                //根據; 拆分Cookie 內容
                var cookieItem = item.Value.Split(`;`);
                var cookie = new Cookie();
                for (var index = 0; index < cookieItem.Length; index++)
                {
                    var info = cookieItem[index];
                    //第一個 預設 Cookie Name
                    //判斷鍵值對
                    if (info.Contains("="))
                    {
                        var indexK = info.IndexOf(`=`);
                        var name = info.Substring(0, indexK).Trim();
                        var val = info.Substring(indexK + 1);
                        if (index == 0)
                        {
                            cookie.Name = name;
                            cookie.Value = val;
                            continue;
                        }
                        if (name.Equals("Domain", StringComparison.OrdinalIgnoreCase))
                        {
                            cookie.Domain = val;
                        }
                        else if (name.Equals("Expires", StringComparison.OrdinalIgnoreCase))
                        {
                            DateTime.TryParse(val, out var expires);
                            cookie.Expires = expires;
                        }
                        else if (name.Equals("Path", StringComparison.OrdinalIgnoreCase))
                        {
                            cookie.Path = val;
                        }
                        else if (name.Equals("Version", StringComparison.OrdinalIgnoreCase))
                        {
                            cookie.Version = Convert.ToInt32(val);
                        }
                    }
                    else
                    {
                        if (info.Trim().Equals("HttpOnly", StringComparison.OrdinalIgnoreCase))
                        {
                            cookie.HttpOnly = true;
                        }
                    }
                }
                cookieCollection.Add(cookie);
            }
            return cookieCollection;
        }

        /// <summary>
        /// 獲取 Cookies
        /// </summary>
        /// <param name="setCookie"></param>
        /// <param name="uri"></param>
        /// <returns></returns>
        public static string GetCookies(string setCookie, Uri uri)
        {
            //獲取所有Cookie
            var strCookies = string.Empty;
            var cookies = GetCookiesByHeader(setCookie);
            foreach (Cookie cookie in cookies)
            {
                //忽略過期Cookie
                if (cookie.Expires < DateTime.Now && cookie.Expires != DateTime.MinValue)
                {
                    continue;
                }
                if (uri.Host.Contains(cookie.Domain))
                {
                    strCookies += $"{cookie.Name}={cookie.Value}; ";
                }
            }
            return strCookies;
        }

        /// <summary>
        /// 通過Name 獲取 Cookie Value
        /// </summary>
        /// <param name="setCookie">Cookies</param>
        /// <param name="name">Name</param>
        /// <returns></returns>
        public static string GetCookieValueByName(string setCookie, string name)
        {
            var regex = new Regex($"(?<={name}=).*?(?=; )");
            return regex.IsMatch(setCookie) ? regex.Match(setCookie).Value : string.Empty;
        }

        /// <summary>
        /// 通過Name 設定 Cookie Value
        /// </summary>
        /// <param name="setCookie">Cookies</param>
        /// <param name="name">Name</param>
        /// <param name="value">Value</param>
        /// <returns></returns>
        public static string SetCookieValueByName(string setCookie, string name, string value)
        {
            var regex = new Regex($"(?<={name}=).*?(?=; )");
            if (regex.IsMatch(setCookie))
            {
                setCookie = regex.Replace(setCookie, value);
            }
            return setCookie;
        }

        /// <summary>
        /// 通過Name 更新Cookie
        /// </summary>
        /// <param name="oldCookie">原Cookie</param>
        /// <param name="newCookie">更新內容</param>
        /// <param name="name">名字</param>
        /// <returns></returns>
        public static string UpdateCookieValueByName(string oldCookie, string newCookie, string name)
        {
            var regex = new Regex($"(?<={name}=).*?[(?=; )|$]");
            if (regex.IsMatch(oldCookie) && regex.IsMatch(newCookie))
            {
                oldCookie = regex.Replace(oldCookie, regex.Match(newCookie).Value);
            }
            return oldCookie;
        }

        /// <summary>
        /// 根據新Cookie 更新舊的
        /// </summary>
        /// <param name="oldCookie"></param>
        /// <param name="newCookie"></param>
        /// <returns></returns>
        public static string UpdateCookieValue(string oldCookie, string newCookie)
        {
            var list = GetCookiesByHeader(newCookie);
            foreach (Cookie cookie in list)
            {
                var regex = new Regex($"(?<={cookie.Name}=).*?[(?=; )|$]");
                oldCookie = regex.IsMatch(oldCookie) ? regex.Replace(oldCookie, cookie.Value) : $"{cookie.Name}={cookie.Value}; {oldCookie}";
            }
            return oldCookie;
        }



    }

  更新:加入自動更新Cookie 的方法

相關文章