redis3.0叢集監控指令碼
自己寫的一個簡單的官方3.0叢集監控指令碼,
監控項:
叢集狀態,節點狀態,節點分佈,連線數,ops,命中率,記憶體使用率,慢查詢,key分佈,持久化相關項等。
執行展示資訊如下:
後續會新增一些集中控制項,比如引數調整等。
指令碼內容:
#!/bin/bash
######變數定義####################################
auth="ASF6tp73yF5VPGVhHJuF" #認證密碼
configdir='/data/redis_cluster' #配置目錄
bindir='/usr/local/redis-3.0.3/bin/' #redis安裝目錄
client="$bindir/redis-cli " #
max_memory=0 #初始化最大記憶體
flag=0 #輸入引數是否有誤標識
list=(
100.69.212.221:8001
100.69.212.221:8002
100.69.212.221:8003
) #叢集ip list
is_sendms=$2 #是否傳送簡訊報警:預設不傳送;=1: 傳送
###################################################
###報警函式定義####################################
sendms(){
if [ "$2" == "1" ]; then
#自己填寫報警方式
echo "報警:$1"
fi
}
export -f sendms
###################################################
###redis狀態統計函式###############################
statistics_redis(){
local ip=`echo $1 | awk -F':' '{print $1}'`
local port=`echo $1 | awk -F':' '{print $2}'`
local is_slowlog=0
last_slowlog_time=`$client -h $ip -p $port slowlog get 1 |awk '{if(NR==2) print $1}'`
current_time=`date +%s`
if [ "$last_slowlog_time" != "" ]; then
if [ $((last_slowlog_time+60*10)) -gt $current_time ]; then
is_slowlog=1
fi
fi
$client -h $ip -p $port info all | awk -F ':' -v max_memory=$max_memory -v addr=$1 -v is_sendms=$2 -v is_slowlog=$is_slowlog '{\
if($0~/uptime_in_seconds:/) uptime=$2;\
else if($0~/connected_clients:/) cnt_clients=$2;\
else if($0~/used_memory:/) used_memory=$2;\
else if($0~/used_memory_rss:/) used_memory_rss=$2;\
else if($0~/used_memory_peak:/) used_memory_peak=$2;\
else if($0~/mem_fragmentation_ratio:/) mem_ratio=$2;\
else if($0~/rdb_last_bgsave_status:/) bgsave_status=$2;\
else if($0~/aof_last_write_status:/) aof_write_status=$2;\
else if($0~/aof_last_bgrewrite_status:/) aof_bgrewrite_status=$2;\
else if($0~/instantaneous_ops_per_sec:/) ops_per_sec=$2;\
else if($0~/keyspace_hits:/) keyspace_hits=$2;\
else if($0~/keyspace_misses:/) keyspace_misses=$2;\
else if($0~/:keys=/) keys=keys"\n"$2;\
}END{\
printf("\033[1;33;1m####概況:\033[0m\n");\
printf(" 啟動時間:%d\n",uptime);\
printf(" 當前連線數:%d\n",cnt_clients);\
printf(" 當前OPS:%d\n",ops_per_sec);\
printf(" 當前key分佈情況:%s\n",keys);\
printf("\033[1;33;1m####命中情況:\033[0m\n");\
printf(" 命中次數: %d\n",keyspace_hits);\
printf(" miss次數: %d\n",keyspace_misses);\
printf(" 命中率:%d%\n",keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100);\
printf("\033[1;33;1m####記憶體使用情況:\033[0m\n");\
printf(" 1)分配總記憶體:%dMb\n",used_memory_rss/1024/1024);\
printf(" 2)使用記憶體:%dMb\n",used_memory/1024/1024);\
printf(" 3)峰值:%dMb\n",used_memory/1024/1024);\
printf(" 4)最大記憶體:%dMb\n",max_memory/1024/1024);\
printf(" 5)記憶體碎片率:%s\n",mem_ratio);\
printf("\033[1;33;1m####持久化: \033[0m\n");\
printf(" 上次bgsave狀態:%s\n",bgsave_status);\
printf(" 上次aof狀態:%s\n",aof_write_status);\
printf(" 上次rewrite狀態:%s\n",aof_bgrewrite_status);\
printf("\033[1;33;1m####報警資訊: \033[0m\n");\
if(cnt_clients>=1000){printf("\033[1;31;1mwarning: 當前連線數:%d,超標\033[0m\n",cnt_clients);\
system("sendms "addr"_當前連線數:"cnt_clients" "is_sendms);\
};\
if(ops_per_sec>=50000){printf("\033[1;31;1mwarning: 當前OPS:%d,超標\033[0m\n",ops_per_sec);\
system("sendms "addr"_當前OPS:"ops_per_sec" "is_sendms);\
};\
if(keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100<=50){\
printf("\033[1;31;1mwarning:當前命中率:%d%,過低\033[0m\n",keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100);\
};\
if(used_memory/max_memory*100>=80){printf("\033[1;31;1mwarning:當前記憶體使用:%d%,過高\033[0m\n",used_memory/max_memory*100);\
system("sendms "addr"_記憶體使用率:"used_memory/max_memory*100"% "is_sendms);\
};\
if(is_slowlog>0) {printf("\033[1;31;1mwarning:存在慢查詢,請確認!\033[0m\n");\
system("sendms "addr"_存在慢查詢 "is_sendms) };\
#if(bgsave_status!~/ok/){printf("上次bgsave狀態錯誤:%s\n",bgsave_status)};\
#if(aof_write_status!~/ok/){printf("上次aof狀態錯誤:%s\n",aof_write_status)};\
#if (aof_bgrewrite_status!~/ok/){printf("上次rewrite狀態錯誤:%s\n",aof_bgrewrite_status)};\
}'
}
###############################################
####BEGIN:遍歷list##################
for config in ${list[@]}
do
ip=`echo $config| awk -F':' '{print $1}'`
port=`echo $config| awk -F':' '{print $2}'`
####stop######################
if [ "$1" == "stop" ]; then
flag=1
echo -n $config" "
$client -h $ip -p $port shutdown5588 2>>/dev/null
#if [ $? -eq 0 ]; then
echo "shutdown success!"
#else
# echo "shutdown meet error!"
#fi
fi
####start###################
if [ "$1" == "start" ]; then
flag=1
echo -n $config" "
ssh $ip "${bindir}/redis-server ${configdir}/${port}.cnf"
if [ $? -eq 0 ]; then
echo "started"
else
echo "starting meet error"
fi
fi
####status##################
if [ "$1" == "status" ]; then
flag=1
###找到一個存活的節點,根據它來檢視叢集狀態
isalive=`$client -h $ip -p $port ping 2>>/dev/null`
if [ "$isalive" == "PONG" ]; then
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
echo -e "\e[1;32;1m#叢集基本資訊: #\e[0m"
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
cluster_is_ok=`$client -h $ip -p $port cluster info | grep cluster_state| cut -b 15-16`
if [ "$cluster_is_ok" == "ok" ]; then
echo -e "cluster_state:\e[1;32;1mok\e[0m"
else
echo -e "\033[1;31;1m$($client -h $ip -p $port cluster info | grep cluster_state)\e[0m"
sendms "${config}_cluster_state:$cluster_is_ok" $is_sendms
fi
nodes_alive=`$client -h ${ip} -p ${port} cluster nodes | grep -vE 'fail|disconnected' | wc -l`
if [ ${#list[*]} -ne $nodes_alive ]; then
echo -e "total nodes:${#list[*]}, \033[1;31;1malive nodes:${nodes_alive}!!\033[0m"
echo -e "\033[1;31;1mWarning: some nodes have down!!\033[0m"
sendms "${config}_cluster_state:some_nodes_is_down" $is_sendms
else
echo "total nodes:${#list[*]}, alive nodes:${nodes_alive}"
fi
max_memory=`$client -h $ip -p $port config5588 get maxmemory | awk '{if(NR>1)print $1}'`
###使用迴圈匹配整理出目前cluster的M-s關係樹
echo -e "\e[1;32;4m#####主從結構樹:\e[0m"
v_str=""
cnt=1
for master in `$client -h $ip -p $port cluster nodes|grep 'master'|grep -vE 'fail|disconnected'|awk '{print $1","$2}'|sort -k 2,2 -t ','`
do
mid=`echo $master | awk -F',' '{print $1}'`
maddr=`echo $master | awk -F',' '{print $2}'`
mip=`echo $master | awk -F',|:' '{print $2}'`
echo -e "\033[1;36;1mmaster${cnt}:"$maddr"\033[0m"
$client -h $ip -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | awk '{print " |-->slave"NR":"$2}'
tmp=`$client -h $ip -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | grep $mip | wc -l`
v_tmp=`$client -h $ip -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | awk '{printf $2" "}'`
if [ $tmp -ne 0 ]; then
echo -e "\033[1;31;1mWarning: master's slave node is on the master's server!!\033[0m"
sendms "${maddr}_cluster_state:M-S_is_on_same_server" $is_sendms
fi
v_str=$v_str"\""$maddr" "$v_tmp"\" "
let cnt++
done
###v_str變數記錄("m1 s1" "m2 s2")類似的二維陣列結構用於逐個分析每個存活狀態的redis例項
declare -a array="("$v_str")"
###使用雙層巢狀迴圈遍歷收集各個redis例項的狀態
n_array=${#array[*]}
for((i=0; i<$n_array;i++))
do
inner_array=(${array[$i]})
n_inner_array=${#inner_array[*]}
echo ""
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
echo -e "\e[1;32;1m#分片$((i+1)): #\e[0m"
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
for((j=0;j<$n_inner_array;j++))
do
echo -e "\e[1;35;1m+++++${inner_array[$j]}+++++\e[0m"
statistics_redis ${inner_array[$j]} $is_sendms
done
done
exit 0
fi
fi
done
if [ "$flag" == "0" ]; then
echo -e "\e[1;31;1musage: sh cluster_control [start|stop|status] [1]\e[0m"
fi
if [ "$is_alive" != "PONG" -a "$1" == "status" ]; then
echo -e "\e[1;31;1mAll nodes is stopped.\e[0m"
fi
監控項:
叢集狀態,節點狀態,節點分佈,連線數,ops,命中率,記憶體使用率,慢查詢,key分佈,持久化相關項等。
執行展示資訊如下:
後續會新增一些集中控制項,比如引數調整等。
指令碼內容:
#!/bin/bash
######變數定義####################################
auth="ASF6tp73yF5VPGVhHJuF" #認證密碼
configdir='/data/redis_cluster' #配置目錄
bindir='/usr/local/redis-3.0.3/bin/' #redis安裝目錄
client="$bindir/redis-cli " #
max_memory=0 #初始化最大記憶體
flag=0 #輸入引數是否有誤標識
list=(
100.69.212.221:8001
100.69.212.221:8002
100.69.212.221:8003
) #叢集ip list
is_sendms=$2 #是否傳送簡訊報警:預設不傳送;=1: 傳送
###################################################
###報警函式定義####################################
sendms(){
if [ "$2" == "1" ]; then
#自己填寫報警方式
echo "報警:$1"
fi
}
export -f sendms
###################################################
###redis狀態統計函式###############################
statistics_redis(){
local ip=`echo $1 | awk -F':' '{print $1}'`
local port=`echo $1 | awk -F':' '{print $2}'`
local is_slowlog=0
last_slowlog_time=`$client -h $ip -p $port slowlog get 1 |awk '{if(NR==2) print $1}'`
current_time=`date +%s`
if [ "$last_slowlog_time" != "" ]; then
if [ $((last_slowlog_time+60*10)) -gt $current_time ]; then
is_slowlog=1
fi
fi
$client -h $ip -p $port info all | awk -F ':' -v max_memory=$max_memory -v addr=$1 -v is_sendms=$2 -v is_slowlog=$is_slowlog '{\
if($0~/uptime_in_seconds:/) uptime=$2;\
else if($0~/connected_clients:/) cnt_clients=$2;\
else if($0~/used_memory:/) used_memory=$2;\
else if($0~/used_memory_rss:/) used_memory_rss=$2;\
else if($0~/used_memory_peak:/) used_memory_peak=$2;\
else if($0~/mem_fragmentation_ratio:/) mem_ratio=$2;\
else if($0~/rdb_last_bgsave_status:/) bgsave_status=$2;\
else if($0~/aof_last_write_status:/) aof_write_status=$2;\
else if($0~/aof_last_bgrewrite_status:/) aof_bgrewrite_status=$2;\
else if($0~/instantaneous_ops_per_sec:/) ops_per_sec=$2;\
else if($0~/keyspace_hits:/) keyspace_hits=$2;\
else if($0~/keyspace_misses:/) keyspace_misses=$2;\
else if($0~/:keys=/) keys=keys"\n"$2;\
}END{\
printf("\033[1;33;1m####概況:\033[0m\n");\
printf(" 啟動時間:%d\n",uptime);\
printf(" 當前連線數:%d\n",cnt_clients);\
printf(" 當前OPS:%d\n",ops_per_sec);\
printf(" 當前key分佈情況:%s\n",keys);\
printf("\033[1;33;1m####命中情況:\033[0m\n");\
printf(" 命中次數: %d\n",keyspace_hits);\
printf(" miss次數: %d\n",keyspace_misses);\
printf(" 命中率:%d%\n",keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100);\
printf("\033[1;33;1m####記憶體使用情況:\033[0m\n");\
printf(" 1)分配總記憶體:%dMb\n",used_memory_rss/1024/1024);\
printf(" 2)使用記憶體:%dMb\n",used_memory/1024/1024);\
printf(" 3)峰值:%dMb\n",used_memory/1024/1024);\
printf(" 4)最大記憶體:%dMb\n",max_memory/1024/1024);\
printf(" 5)記憶體碎片率:%s\n",mem_ratio);\
printf("\033[1;33;1m####持久化: \033[0m\n");\
printf(" 上次bgsave狀態:%s\n",bgsave_status);\
printf(" 上次aof狀態:%s\n",aof_write_status);\
printf(" 上次rewrite狀態:%s\n",aof_bgrewrite_status);\
printf("\033[1;33;1m####報警資訊: \033[0m\n");\
if(cnt_clients>=1000){printf("\033[1;31;1mwarning: 當前連線數:%d,超標\033[0m\n",cnt_clients);\
system("sendms "addr"_當前連線數:"cnt_clients" "is_sendms);\
};\
if(ops_per_sec>=50000){printf("\033[1;31;1mwarning: 當前OPS:%d,超標\033[0m\n",ops_per_sec);\
system("sendms "addr"_當前OPS:"ops_per_sec" "is_sendms);\
};\
if(keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100<=50){\
printf("\033[1;31;1mwarning:當前命中率:%d%,過低\033[0m\n",keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100);\
};\
if(used_memory/max_memory*100>=80){printf("\033[1;31;1mwarning:當前記憶體使用:%d%,過高\033[0m\n",used_memory/max_memory*100);\
system("sendms "addr"_記憶體使用率:"used_memory/max_memory*100"% "is_sendms);\
};\
if(is_slowlog>0) {printf("\033[1;31;1mwarning:存在慢查詢,請確認!\033[0m\n");\
system("sendms "addr"_存在慢查詢 "is_sendms) };\
#if(bgsave_status!~/ok/){printf("上次bgsave狀態錯誤:%s\n",bgsave_status)};\
#if(aof_write_status!~/ok/){printf("上次aof狀態錯誤:%s\n",aof_write_status)};\
#if (aof_bgrewrite_status!~/ok/){printf("上次rewrite狀態錯誤:%s\n",aof_bgrewrite_status)};\
}'
}
###############################################
####BEGIN:遍歷list##################
for config in ${list[@]}
do
ip=`echo $config| awk -F':' '{print $1}'`
port=`echo $config| awk -F':' '{print $2}'`
####stop######################
if [ "$1" == "stop" ]; then
flag=1
echo -n $config" "
$client -h $ip -p $port shutdown5588 2>>/dev/null
#if [ $? -eq 0 ]; then
echo "shutdown success!"
#else
# echo "shutdown meet error!"
#fi
fi
####start###################
if [ "$1" == "start" ]; then
flag=1
echo -n $config" "
ssh $ip "${bindir}/redis-server ${configdir}/${port}.cnf"
if [ $? -eq 0 ]; then
echo "started"
else
echo "starting meet error"
fi
fi
####status##################
if [ "$1" == "status" ]; then
flag=1
###找到一個存活的節點,根據它來檢視叢集狀態
isalive=`$client -h $ip -p $port ping 2>>/dev/null`
if [ "$isalive" == "PONG" ]; then
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
echo -e "\e[1;32;1m#叢集基本資訊: #\e[0m"
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
cluster_is_ok=`$client -h $ip -p $port cluster info | grep cluster_state| cut -b 15-16`
if [ "$cluster_is_ok" == "ok" ]; then
echo -e "cluster_state:\e[1;32;1mok\e[0m"
else
echo -e "\033[1;31;1m$($client -h $ip -p $port cluster info | grep cluster_state)\e[0m"
sendms "${config}_cluster_state:$cluster_is_ok" $is_sendms
fi
nodes_alive=`$client -h ${ip} -p ${port} cluster nodes | grep -vE 'fail|disconnected' | wc -l`
if [ ${#list[*]} -ne $nodes_alive ]; then
echo -e "total nodes:${#list[*]}, \033[1;31;1malive nodes:${nodes_alive}!!\033[0m"
echo -e "\033[1;31;1mWarning: some nodes have down!!\033[0m"
sendms "${config}_cluster_state:some_nodes_is_down" $is_sendms
else
echo "total nodes:${#list[*]}, alive nodes:${nodes_alive}"
fi
max_memory=`$client -h $ip -p $port config5588 get maxmemory | awk '{if(NR>1)print $1}'`
###使用迴圈匹配整理出目前cluster的M-s關係樹
echo -e "\e[1;32;4m#####主從結構樹:\e[0m"
v_str=""
cnt=1
for master in `$client -h $ip -p $port cluster nodes|grep 'master'|grep -vE 'fail|disconnected'|awk '{print $1","$2}'|sort -k 2,2 -t ','`
do
mid=`echo $master | awk -F',' '{print $1}'`
maddr=`echo $master | awk -F',' '{print $2}'`
mip=`echo $master | awk -F',|:' '{print $2}'`
echo -e "\033[1;36;1mmaster${cnt}:"$maddr"\033[0m"
$client -h $ip -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | awk '{print " |-->slave"NR":"$2}'
tmp=`$client -h $ip -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | grep $mip | wc -l`
v_tmp=`$client -h $ip -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | awk '{printf $2" "}'`
if [ $tmp -ne 0 ]; then
echo -e "\033[1;31;1mWarning: master's slave node is on the master's server!!\033[0m"
sendms "${maddr}_cluster_state:M-S_is_on_same_server" $is_sendms
fi
v_str=$v_str"\""$maddr" "$v_tmp"\" "
let cnt++
done
###v_str變數記錄("m1 s1" "m2 s2")類似的二維陣列結構用於逐個分析每個存活狀態的redis例項
declare -a array="("$v_str")"
###使用雙層巢狀迴圈遍歷收集各個redis例項的狀態
n_array=${#array[*]}
for((i=0; i<$n_array;i++))
do
inner_array=(${array[$i]})
n_inner_array=${#inner_array[*]}
echo ""
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
echo -e "\e[1;32;1m#分片$((i+1)): #\e[0m"
echo -e "\e[1;32;1m#----------------------------------#\e[0m"
for((j=0;j<$n_inner_array;j++))
do
echo -e "\e[1;35;1m+++++${inner_array[$j]}+++++\e[0m"
statistics_redis ${inner_array[$j]} $is_sendms
done
done
exit 0
fi
fi
done
if [ "$flag" == "0" ]; then
echo -e "\e[1;31;1musage: sh cluster_control [start|stop|status] [1]\e[0m"
fi
if [ "$is_alive" != "PONG" -a "$1" == "status" ]; then
echo -e "\e[1;31;1mAll nodes is stopped.\e[0m"
fi
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/20625855/viewspace-1847453/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- Redis安裝+叢集+效能監控Redis
- 【shell】磁碟監控指令碼指令碼
- prometheus監控k8s叢集PrometheusK8S
- consul叢集docker版本指令碼Docker指令碼
- PostgreSQL之鎖監控指令碼SQL指令碼
- 一文讀懂clickhouse叢集監控
- xcall叢集執行命令指令碼指令碼
- storm叢集啟動停止指令碼ORM指令碼
- 監控系統告警指令碼集合指令碼
- 使用夜鶯+categraf監控redis和redis叢集Redis
- 如何用Prometheus監控十萬container的Kubernetes叢集PrometheusAI
- Kubernetes叢集部署史上最詳細(二)Prometheus監控Kubernetes叢集Prometheus
- Shell 系統資訊監控指令碼指令碼
- 關於Oracle 12c的叢集監控(CHM)Oracle
- 基於 ZooKeeper 實現爬蟲叢集的監控爬蟲
- 如何優雅地使用雲原生 Prometheus 監控叢集Prometheus
- 監控磁碟使用率的shell指令碼指令碼
- 網路卡流量監控指令碼,python實現指令碼Python
- centos 監控web站點是否500 指令碼CentOSWeb指令碼
- shell指令碼:監控MySQL服務是否正常指令碼MySql
- Shell指令碼監控MySQL主從狀態指令碼MySql
- Ceph Reef(18.2.X)的內建Prometheus監控叢集Prometheus
- vivo 容器叢集監控系統架構與實踐架構
- WGCLOUD 叢集監控平臺 v2.1,正式釋出原始碼,支援開源GCCloud原始碼
- shell指令碼監控啟動停止weblogic服務指令碼Web
- 關於前端指令碼異常監控的思考前端指令碼
- Prometheus多叢集監控的3種方案,你選哪種?Prometheus
- 容器叢集監控系統架構如何對症下藥?架構
- 打造雲原生大型分散式監控系統(四): Kvass+Thanos 監控超大規模容器叢集分散式
- 在 Linux 上用 Bash 指令碼監控 messages 日誌Linux指令碼
- 使用Shell指令碼程式監控網站URL是否正常指令碼網站
- 寫了個監控 ElasticSearch 程式異常的指令碼!Elasticsearch指令碼
- Kubernetes 叢集和應用監控方案的設計與實踐
- 監控Kubernetes叢集證書過期時間的三種方案
- 使用kubeadm安裝kubernetes叢集指令碼(更新日期:2024.7.19)指令碼
- 阿里雲 ACK One 多叢集管理全面升級:多叢集服務、多叢集監控、兩地三中心應用容災阿里
- 透過shell指令碼監控日誌切換頻率指令碼
- 基於Ping和Telnet/NC的監控指令碼案例分析指令碼
- zabbix-mongodb監控指令碼(高效能、低佔用)MongoDB指令碼