如何快速批量檢查所有虛擬機器上的一些配置?
答:使用ansible統計所有虛擬機器上需要的配置資訊,結果示例見下,統計方法見check.sh指令碼
swapTotal[swap總大小] swapUsed[已用swap大小] swapUsedPer[swap佔用百分比] swappiness[控制何時使用swap的引數] totalMem[總記憶體大小] availableMem[可用記憶體大小] availablePer[可用記憶體佔比] cpuCores[cpu核數] centosp[centos版本號] diskSize[硬碟大小] diskUsed[硬碟已使用佔比] diskUsedPer[已用磁碟佔比] gc_cnt[gc耗時超標個數]
10.0.5.105 SUCCESS | recon-app-preprocess b8367764797b5f392ee82af741f289c7107762dc | swapTotal[8191996] swapUsed[97768] swapUsedPer[1.19] | swappiness[30] | totalMem[7806.48M] availableMem[1395.55M] availablePer[17.88%] | cpuCores[4] | centos[7.0.1406] | diskSize[134G] diskUsed[3.0G] diskUsedPer[3%] | gc_cnt[0] |
10.0.5.106 SUCCESS | recon-app-preprocess b8367764797b5f392ee82af741f289c7107762dc | swapTotal[8191996] swapUsed[38860] swapUsedPer[0.47] | swappiness[30] | totalMem[7806.48M] availableMem[1327.68M] availablePer[17.01%] | cpuCores[4] | centos[7.0.1406] | diskSize[134G] diskUsed[3.0G] diskUsedPer[3%] | gc_cnt[0] |
10.0.5.119 SUCCESS | recon-basedata-app-service b6313ad774ac64cc8da84a36eaee9f98c6258342 | swapTotal[8191996] swapUsed[0] swapUsedPer[0.00] | swappiness[30] | totalMem[7806.48M] availableMem[2461.65M] availablePer[31.53%] | cpuCores[4] | centos[7.0.1406] | diskSize[134G] diskUsed[2.7G] diskUsedPer[2%] | gc_cnt[0] |
10.0.5.120 SUCCESS | recon-basedata-app-service b6313ad774ac64cc8da84a36eaee9f98c6258342 | swapTotal[8191996] swapUsed[0] swapUsedPer[0.00] | swappiness[30] | totalMem[7806.48M] availableMem[2489.03M] availablePer[31.88%] | cpuCores[4] | centos[7.0.1406] | diskSize[134G] diskUsed[2.7G] diskUsedPer[2%] | gc_cnt[0] |
10.0.5.129 SUCCESS | recon-front-app 061a05510d90c8b89ddf00e3445052235291d3a2 | swapTotal[8388604] swapUsed[0] swapUsedPer[0.00] | swappiness[1] | totalMem[7804.19M] availableMem[2550.10M] availablePer[32.68%] | cpuCores[4] | centos[7.2.1511] | diskSize[133G] diskUsed[2.1G] diskUsedPer[2%] | gc_cnt[0] |
10.0.5.131 SUCCESS | recon-front-app 061a05510d90c8b89ddf00e3445052235291d3a2 | swapTotal[8388604] swapUsed[0] swapUsedPer[0.00] | swappiness[1] | totalMem[7804.19M] availableMem[2580.34M] availablePer[33.06%] | cpuCores[4] | centos[7.2.1511] | diskSize[133G] diskUsed[2.1G] diskUsedPer[2%] | gc_cnt[0] |
10.0.5.132 SUCCESS | recon-front-app 061a05510d90c8b89ddf00e3445052235291d3a2 | swapTotal[8388604] swapUsed[0] swapUsedPer[0.00] | swappiness[1] | totalMem[7804.19M] availableMem[2549.73M] availablePer[32.67%] | cpuCores[4] | centos[7.2.1511] | diskSize[133G] diskUsed[2.1G] diskUsedPer[2%] | gc_cnt[0] |
複製程式碼
check.sh
#!/bin/bash
#----------------------------
# 虛擬機器資訊檢查工具
# author: xiaoxiaomai
# date: 2018/10/19
#---------------------------
today_date=`date +%Y-%m-%d`
gc_time_threshold=1
if [ ! -z $1 ];then
gc_time_threshold=$1
fi
healthcheck="curl -s http://localhost:8080/healthcheck.html|grep -a -E \"Name|commit.id\"|awk -F '<' '{str=str\$1;}END{print str}'|awk -F '=|git' '{print \$2,\$4}'"
swaped="free|awk '/Swap:/{print \"swapTotal[\"\$2\"] swapUsed[\"\$3\"] swapUsedPer[\"sprintf(\"%.2f\",\$3 * 100/\$2)\"]\"}'"
swappiness="cat /proc/sys/vm/swappiness|awk '{print \"swappiness[\"\$0\"]\"}'"
memTotal="cat /proc/meminfo|grep -E \"MemAvailable|MemTotal\"|awk '{if (NR==1) {totalMem = \$2} else { availableMem = \$2}} END{ print \"totalMem[\"sprintf(\"%.2f\",totalMem/1024)\"M] availableMem[\"sprintf(\"%.2f\", availableMem/1024)\"M] availablePer[\"sprintf(\"%.2f\",availableMem*100/totalMem)\"%]\"}'"
cpuCores="cat /proc/cpuinfo|grep 'cpu cores'|uniq|awk '{print \"cpuCores[\"\$4\"]\"}'"
centosRelease="awk '{print \"centos[\"\$4\"]\"}' /etc/centos-release"
disk="df -h /| grep -v Filesystem|awk '{print \"diskSize[\"\$2\"] diskUsed[\"\$3\"] diskUsedPer[\"\$5\"]\"}'"
jvm="for i in \$(if [ -f \"/data/dataLogs/gc/gc.log\" ]; then grep ParNew: /data/dataLogs/gc/gc.log |grep $today_date| awk -F 'real=' '{print \$2}'|awk '{print \$1}' ; fi); do echo \$i|awk 'BEGIN{r=\"False\"} {if (\$0>$gc_time_threshold) {r=\"True\"}} END{print r}' ; done | grep True|wc -l |awk '{print \"gc_cnt[\"\$0\"]\"}'"
echo "swapTotal[swap總大小] swapUsed[已用swap大小] swapUsedPer[swap佔用百分比] swappiness[控制何時使用swap的引數] totalMem[總記憶體大小] availableMem[可用記憶體大小] availablePer[可用記憶體佔比] cpuCores[cpu核數] centosp[centos版本號] diskSize[硬碟大小] diskUsed[硬碟已使用佔比] diskUsedPer[已用磁碟佔比] gc_cnt[gc耗時超標個數]"
ansible -i checkhosts allhosts -m shell -a "$healthcheck && $swaped && $swappiness && $memTotal && $cpuCores && $centosRelease && $disk && $jvm"|awk '{if(NF == 6) {print "\n"$1,$3} else{print $0}}'|awk 'BEGIN{line=1} {if (length($0)!=0) {strs[line]=strs[line]" "$0" |"} else {line+=1}} END{for(i in strs) {print strs[i]}}'|awk 'gsub(/^ *| *$/, "")'|sort
複製程式碼
如何對上述的結果根據某些閾值進行快速檢查?
答:使用python處理(最好使用virtualenv),結果示例見下,統計方法見check.py指令碼
複製程式碼
check.py
import sys
import subprocess
import operator
import xlsxwriter
reload(sys)
sys.setdefaultencoding('utf-8')
mem_threshold = 90
swap_threshold = 10
disk_threshold = 50
gc_threshold = 1
def main():
get_result_cmd = 'sh check.sh ' + str(gc_threshold)
result_byte = subprocess.check_output(get_result_cmd, shell=True)
result = result_byte.split('\n')
check_file_name = "check_result.xlsx"
book = xlsxwriter.Workbook(check_file_name)
sheet = book.add_worksheet("彙總")
sheet_detail = book.add_worksheet("明細")
sheet_mem = book.add_worksheet("記憶體超標明細")
sheet_disk = book.add_worksheet("硬碟超標明細")
sheet_swap = book.add_worksheet("swap超標明細")
sheet_gc = book.add_worksheet("gc耗時超標明細")
gt_mem_list = []
gt_disk_list = []
gt_swap_list = []
gt_gc_list = []
all_lines = []
health_check_result = {}
disk_check_result = {}
disk_avg_check_result = {}
disk_gt_check_result = {}
swap_per_check_result = {}
swappiness_check_result = {}
mem_check_result = {}
mem_available_check_result = {}
cpu_cores_check_result = {}
centos_release_check_result = {}
gc_check_result = {}
for line in result:
all_lines.append(line)
check_info = line.split("|")
if "SUCCESS" not in line:
continue
ip = check_info[0].strip().split(" ")[0]
idc_no = get_idc_no(replace_space(check_info[0]).split(" ")[0])
health_check(idc_no, replace_space(check_info[1]), health_check_result)
disk_check(ip, idc_no, replace_space(check_info[7]), gt_disk_list, disk_check_result, disk_avg_check_result,
disk_gt_check_result)
swap_check(ip, idc_no, replace_space(check_info[2]), replace_space(check_info[3]), gt_swap_list,
swap_per_check_result, swappiness_check_result)
mem_check(ip, idc_no, replace_space(check_info[4]), gt_mem_list, mem_check_result, mem_available_check_result)
cpu_check(idc_no, replace_space(check_info[5]), cpu_cores_check_result)
centos_release_check(idc_no, replace_space(check_info[6]), centos_release_check_result)
gc_check(ip, idc_no, check_info[8], gt_gc_list, gc_check_result)
output(sheet, sheet_detail, sheet_mem, sheet_disk, sheet_swap, sheet_gc, centos_release_check_result, cpu_cores_check_result,
disk_avg_check_result,
disk_check_result, disk_gt_check_result, health_check_result, mem_available_check_result, mem_check_result,
swap_per_check_result,
swappiness_check_result, gc_check_result, gt_mem_list, gt_disk_list, gt_swap_list, gt_gc_list, all_lines)
book.close()
def output(sheet, sheet_detail, sheet_mem, sheet_disk, sheet_swap, sheet_gc, centos_release_check_result, cpu_cores_check_result,
disk_avg_check_result,
disk_check_result, disk_gt_check_result, health_check_result, mem_available_check_result, mem_check_result,
swap_per_check_result,
swappiness_check_result, gc_check_result, gt_mem_list, gt_disk_list, gt_swap_list, gt_gc_list, all_lines):
sheet.write(0, 0, "IDC編號")
sheet.write(0, 1, "存活虛擬機器")
sheet.write(0, 2, "磁碟總容量")
sheet.write(0, 3, "磁碟平均使用量")
sheet.write(0, 4, "磁碟使用超過" + str(disk_threshold) + "%")
sheet.write(0, 5, "swap使用超過" + str(swap_threshold) + "%")
sheet.write(0, 6, "swappiness檢查")
sheet.write(0, 7, "總記憶體檢查")
sheet.write(0, 8, "記憶體使用率超過" + str(mem_threshold) + "%")
sheet.write(0, 9, "cpu核數")
sheet.write(0, 10, "centos版本號")
sheet.write(0, 11, "gc耗時")
idcs = ["10", "11", "20", "21", "30", "31"]
index = 1
print "存活虛擬機器", health_check_result
print "磁碟總容量", disk_check_result
print "磁碟平均使用量", disk_avg_check_result
print "磁碟使用超過" + str(disk_threshold) + "%", disk_gt_check_result
print "swap使用超過" + str(swap_threshold) + "%", swap_per_check_result
print "swappiness檢查", swappiness_check_result
print "總記憶體檢查", mem_check_result
print "記憶體使用率超過" + str(mem_threshold) + "%", mem_available_check_result
print "cpu核數", cpu_cores_check_result
print "centos版本號", centos_release_check_result
print "gc耗時", gc_check_result
for idc in idcs:
if idc not in cpu_cores_check_result:
continue
sheet.write(index, 0, idc)
sheet.write(index, 1, health2str(health_check_result[idc]))
sheet.write(index, 2, disk2str(disk_check_result[idc]))
sheet.write(index, 3, disk_avg2str(disk_avg_check_result[idc]))
sheet.write(index, 4, disk_per2str(disk_gt_check_result[idc]))
sheet.write(index, 5, swap_per2str(swap_per_check_result[idc]))
sheet.write(index, 6, swappiness2str(swappiness_check_result[idc]))
sheet.write(index, 7, mem2str(mem_check_result[idc]))
sheet.write(index, 8, mem_available2str(mem_available_check_result[idc]))
sheet.write(index, 9, cpu2str(cpu_cores_check_result[idc]))
sheet.write(index, 10, centos2str(centos_release_check_result[idc]))
sheet.write(index, 11, gc2str(gc_check_result[idc]))
index = index + 1
sheet_detail_index = 0
for line in all_lines:
sheet_detail.write(sheet_detail_index, 0, line)
sheet_detail_index = sheet_detail_index + 1
gt_mem_list = sorted(gt_mem_list, key=operator.itemgetter(1), reverse=True)
gt_disk_list = sorted(gt_disk_list, key=operator.itemgetter(1), reverse=True)
gt_swap_list = sorted(gt_swap_list, key=operator.itemgetter(1), reverse=True)
gt_gc_list = sorted(gt_gc_list, key=operator.itemgetter(1), reverse=True)
sheet_mem.write(0, 0, "虛擬機器ip")
sheet_mem.write(0, 1, "當前可用記憶體佔比")
sheet_disk.write(0, 0, "虛擬機器ip")
sheet_disk.write(0, 1, "當前已用磁碟空間佔比")
sheet_swap.write(0, 0, "虛擬機器ip")
sheet_swap.write(0, 1, "當前已用swap佔比")
sheet_gc.write(0, 0, "虛擬機器ip")
sheet_gc.write(0, 1, "gc耗時大於" + str(gc_threshold) + "秒次數")
sheet_mem_index = 1
for gt_mem in gt_mem_list:
sheet_mem.write(sheet_mem_index, 0, gt_mem[0])
sheet_mem.write(sheet_mem_index, 1, str(gt_mem[1]) + "%")
sheet_mem_index = sheet_mem_index + 1
sheet_disk_index = 1
for gt_disk in gt_disk_list:
sheet_disk.write(sheet_disk_index, 0, gt_disk[0])
sheet_disk.write(sheet_disk_index, 1, str(gt_disk[1]) + "%")
sheet_disk_index = sheet_disk_index + 1
sheet_swap_index = 1
for gt_swap in gt_swap_list:
sheet_swap.write(sheet_swap_index, 0, gt_swap[0])
sheet_swap.write(sheet_swap_index, 1, str(gt_swap[1]) + "%")
sheet_swap_index = sheet_swap_index + 1
sheet_gc_index = 1
for gt_gc in gt_gc_list:
sheet_gc.write(sheet_gc_index, 0, gt_gc[0])
sheet_gc.write(sheet_gc_index, 1, str(gt_gc[1]) + "次")
sheet_gc_index = sheet_gc_index + 1
def centos2str(item):
"""
:param item: {'7.0.1406': 3}
:return:
"""
result = ""
for key in item:
result = result + "centos版本號為" + key + " 的共" + str(item[key]) + "臺\n"
return result
def cpu2str(item):
"""
:param item: {'1': 2, '2': 1}
:return:
"""
result = ""
for key in item:
result = result + "總核數為" + key + "核 的共" + str(item[key]) + "臺\n"
return result
def gc2str(item):
"""
:param item: {'lt': 0, 'gt': 2}
:return:
"""
gt = str(item["gt"])
lt = str(item["lt"])
return "gc耗時高於" + str(gc_threshold) + "秒的共" + gt + "臺\n" + "其他共" + lt + "臺"
def mem_available2str(item):
"""
:param item: {'lt': 0, 'gt': 2}
:return:
"""
gt = str(item["gt"])
lt = str(item["lt"])
return "高於" + str(mem_threshold) + "%共" + gt + "臺\n" + "其他共" + lt + "臺"
def mem2str(item):
"""
:param item: {'3772.20': 1, '3774.49': 1}
:return:
"""
result = ""
for key in item:
result = result + "總記憶體大小為" + key + "M 的共" + str(item[key]) + "臺\n"
return result
def swappiness2str(item):
"""
:param item: {'30': 2}
:return:
"""
result = ""
for key in item:
result = result + "配置為" + key + " 的共" + str(item[key]) + "臺\n"
return result
def disk_per2str(item):
"""
:param item: {'gt10': 0, 'lt10': 2}
:return:
"""
gt = str(item["gt"])
lt = str(item["lt"])
return "高於" + str(disk_threshold) + "%共" + gt + "臺\n" + "其他共" + lt + "臺"
def swap_per2str(item):
"""
:param item: {'gt10': 0, 'lt10': 2}
:return:
"""
gt = str(item["gt"])
lt = str(item["lt"])
return "高於" + str(swap_threshold) + "%共" + gt + "臺\n" + "其他共" + lt + "臺"
def disk_avg2str(item):
"""
:param item: {'sum': 190, 'used': 30.3}
:return:
"""
sum = item["sum"]
used = item["used"]
return str(round(used * 100 / sum, 2)) + "%"
def disk2str(item):
"""
:param item: {'63': 1, '127': 1}
:return:
"""
result = ""
for key in item:
result = result + key + "G 共" + str(item[key]) + "臺\n"
return result
def health2str(item):
result = ""
for key in item:
app_name = key.split("_")[0]
commit_id = key.split("_")[1]
result = result + app_name + "[commitid:" + commit_id + "]共" + str(item[key]) + "臺\n"
return result
def gc_check(ip, idc_no, gc_info, gt_gc_list, gc_check_result):
"""
:param ip:
:param idc_no:
:param gc_check_result:
:return:
"""
if idc_no not in gc_check_result:
gc_check_result[idc_no] = {"gt": 0, "lt": 0}
idc_result = gc_check_result[idc_no]
gc_cnt = int(get_value(gc_info))
if gc_cnt > 0:
idc_result["gt"] = idc_result["gt"] + 1
gt_gc_list.append((ip, gc_cnt))
else:
idc_result["lt"] = idc_result["lt"] + 1
def centos_release_check(idc_no, centos_release_info, centos_release_check_result):
"""
:param idc_no:
:param cpu_info: cpuCores[1]
:param cpu_cores_check_result:
:return:
"""
centos_release_info = get_value(centos_release_info)
if idc_no not in centos_release_check_result:
centos_release_check_result[idc_no] = {}
idc_centos_release_result = centos_release_check_result[idc_no]
if centos_release_info in idc_centos_release_result:
idc_centos_release_result[centos_release_info] = idc_centos_release_result[centos_release_info] + 1
else:
idc_centos_release_result[centos_release_info] = 1
def cpu_check(idc_no, cpu_info, cpu_cores_check_result):
"""
:param idc_no:
:param cpu_info: cpuCores[1]
:param cpu_cores_check_result:
:return:
"""
cpu_info = get_value(cpu_info)
if idc_no not in cpu_cores_check_result:
cpu_cores_check_result[idc_no] = {}
idc_cpu_result = cpu_cores_check_result[idc_no]
if cpu_info in idc_cpu_result:
idc_cpu_result[cpu_info] = idc_cpu_result[cpu_info] + 1
else:
idc_cpu_result[cpu_info] = 1
def mem_check(ip, idc_no, mem_info, gt_mem_list, mem_check_result, mem_available_check_result):
"""
:param ip:
:param idc_no:
:param mem_info: totalMem[3787.16M] availableMem[3253.27M] availablePer[85.90%]
:param gt_mem_list
:param mem_check_result:
:param mem_available_check_result:
:return:
"""
tmp_mem_info = mem_info.split(" ")
total_mem = get_value(tmp_mem_info[0])[:-1]
if idc_no not in mem_check_result:
mem_check_result[idc_no] = {}
idc_mem_result = mem_check_result[idc_no]
if total_mem in idc_mem_result:
idc_mem_result[total_mem] = idc_mem_result[total_mem] + 1
else:
idc_mem_result[total_mem] = 1
if idc_no not in mem_available_check_result:
mem_available_check_result[idc_no] = {"gt": 0, "lt": 0}
idc_result = mem_available_check_result[idc_no]
available_used_per = float(get_value(tmp_mem_info[2])[:-1])
if 100 - available_used_per > mem_threshold:
idc_result["gt"] = idc_result["gt"] + 1
gt_mem_list.append((ip, available_used_per))
else:
idc_result["lt"] = idc_result["lt"] + 1
def swap_check(ip, idc_no, swap_info, swapiness, gt_swap_list, swap_per_check_result, swappiness_check_result):
"""
:param ip:
:param idc_no
:param swap_info: swapTotal[16383996] swapUsed[10780] swapUsedPer[0.07]
:param swapiness swappiness[30]
:param gt_swap_list
:param swap_per_check_result
:param swappiness_check_result
:return:
"""
tmp_swap_info = swap_info.split(" ")
if idc_no not in swap_per_check_result:
swap_per_check_result[idc_no] = {"gt": 0, "lt": 0}
idc_result = swap_per_check_result[idc_no]
swap_used_per = float(get_value(tmp_swap_info[2]))
if swap_used_per > swap_threshold:
idc_result["gt"] = idc_result["gt"] + 1
gt_swap_list.append((ip, swap_used_per))
else:
idc_result["lt"] = idc_result["lt"] + 1
swapiness = get_value(swapiness)
if idc_no not in swappiness_check_result:
swappiness_check_result[idc_no] = {}
idc_swapiness_result = swappiness_check_result[idc_no]
if swapiness in idc_swapiness_result:
idc_swapiness_result[swapiness] = idc_swapiness_result[swapiness] + 1
else:
idc_swapiness_result[swapiness] = 1
def disk_check(ip, idc_no, disk_info, gt_disk_list, disk_check_result, disk_avg_check_result, disk_gt_check_result):
"""
:param ip
:param idc_no
:param disk_info: diskSize[63G] diskUsed[2.9G] diskUsedPer[5%]
:param gt_disk_list
:param disk_check_result:
:param disk_avg_check_result
:param disk_gt_check_result
:return:
"""
tmp_disk_info = disk_info.split(" ")
if idc_no not in disk_check_result:
disk_check_result[idc_no] = {}
idc_result = disk_check_result[idc_no]
disk_size = int(get_value(tmp_disk_info[0])[:-1])
key = str(disk_size)
if idc_result.has_key(key):
idc_result[key] = idc_result[key] + 1
else:
idc_result[key] = 1
disk_used = float(get_value(tmp_disk_info[1])[:-1])
if idc_no not in disk_avg_check_result:
disk_avg_check_result[idc_no] = {"sum": 0, "used": 0}
idc_avg_result = disk_avg_check_result[idc_no]
idc_avg_result["sum"] = idc_avg_result["sum"] + disk_size
idc_avg_result["used"] = idc_avg_result["used"] + disk_used
if idc_no not in disk_gt_check_result:
disk_gt_check_result[idc_no] = {"gt": 0, "lt": 0}
idc_gt_result = disk_gt_check_result[idc_no]
disk_used_per = float(get_value(tmp_disk_info[2])[:-1])
if disk_used_per > disk_threshold:
idc_gt_result["gt"] = idc_gt_result["gt"] + 1
gt_disk_list.append((ip, disk_used_per))
else:
idc_gt_result["lt"] = idc_gt_result["lt"] + 1
def health_check(idc_no, commit_info, healthcheck_result):
"""
:param idc_no: 172.16.11.28 SUCCESS
:param commit_info: dispute-dal 4692cf542b003c11ed7da3b75112cb46ae767952
:param healthcheck_result
:return:
"""
tmp_commit_info = commit_info.split(" ")
if idc_no not in healthcheck_result:
healthcheck_result[idc_no] = {}
idc_result = healthcheck_result[idc_no]
if len(tmp_commit_info) > 1:
appname = tmp_commit_info[0]
commitid = tmp_commit_info[1]
key = appname + "_" + commitid
if idc_result.has_key(key):
idc_result[key] = idc_result[key] + 1
else:
idc_result[key] = 1
else:
a = "1"
def get_value(value):
return value[value.index("[") + 1: value.index("]")]
def get_idc_no_test(ip):
"""
ip轉中心號
:param ip:
:return:
"""
if "172.16.1." in ip:
return "10"
elif "172.16.11." in ip:
return "11"
elif "172.16.13." in ip:
return "20"
else:
return "30"
def get_idc_no(ip):
"""
ip轉中心號
:param ip:
:return:
"""
if "10.0." in ip:
return "10"
elif "10.16." in ip:
return "11"
elif "10.32." in ip:
return "20"
elif "10.48." in ip:
return "21"
elif "10.64." in ip:
return "30"
elif "10.80." in ip:
return "31"
def replace_space(str):
return str.strip().replace(" ", " ")
if __name__ == "__main__":
main()
複製程式碼