對系統日誌的檢查和資料庫日誌的檢查很重要,出現問題及時的通知系統管理員更為重要,本指令碼用python寫的監控指令碼,主要是為zabbix監控自定義的key而準備的,當然大家也可以在返回值方面做修改,可以在寫個發郵件的模組,做個定時,有問題自動發郵件(在之前寫過一個發郵件的類,大家可以做參考:http://wangwei007.blog.51cto.com/68019/978743)。在zabbix中自定義key來檢測系統日誌和資料庫日誌:
UnsafeUserParameters=1
UserParameter=check.sys_error,/usr/local/zabbix/bin/chk_err_log.py syslog
UserParameter=check.mysql_error,/usr/local/zabbix/bin/chk_err_log.py mysqllog
本指令碼適合一臺伺服器多例項的mysql錯誤日誌檢測,也適用於單個示例的檢測,根據自己的需求做修改。
- #!/usr/bin/env python
- #encoding=utf-8
- import os, sys
- def chk_err(log_file,tmp_file,type,print_list,port):
- cur_num = int(os.popen("sudo grep `` %s | wc -l" % log_file).read().strip())
- old_num = 0
- if os.path.exists(tmp_file):
- old_num = int(open(tmp_file).read().strip())
- if cur_num < old_num:
- os.popen("echo 0 > %s" % tmp_file)
- old_num = 0
- else:
- os.popen("echo 0 > %s" % tmp_file)
- err_log = os.popen("sudo grep -ni `error` %s" % log_file).readlines()
- if err_log:
- err_list = []
- for err in err_log:
- if int(err.split(":")[0]) > old_num:
- err_list.append(err[len(err.split(":")[0])+1:])
- if err_list:
- os.popen("echo %s > %s" % (err_log[-1].split(":")[0], tmp_file))
- print_list.append(port)
- def chk_err_log(type):
- try:
- print_list = []
- homedir = "/home/zabbix"
- if not os.path.exists(homedir):
- os.mkdir(homedir)
- if type == "syslog":
- log_file = "/var/log/messages"
- tmp_file = "%s/.syslog_num"%homedir
- cur_num = int(os.popen("sudo grep `` %s | wc -l" % log_file).read().strip())
- old_num = 0
- if os.path.exists(tmp_file):
- old_num = int(open(tmp_file).read().strip())
- if cur_num < old_num:
- os.popen("echo 0 > %s" % tmp_file)
- old_num = 0
- else:
- os.popen("echo 0 > %s" % tmp_file)
- err_log = os.popen("sudo grep -ni `error` %s|grep -v snmpd|grep -v sftp" % log_file).readlines()
- if not err_log:
- return "0"
- err_list = []
- for err in err_log:
- if int(err.split(":")[0]) > old_num:
- err_list.append(err[len(err.split(":")[0])+1:])
- if not err_list:
- return "0"
- else:
- os.popen("echo %s > %s" % (err_log[-1].split(":")[0], tmp_file))
- return "1"
- elif type == "mysqllog":
- psinfo = os.popen("ps auxww|grep mysqld|grep -v root|grep -v grep").readlines()
- if not psinfo:
- return "No mysqld running in this server now"
- for i in psinfo:
- port = "0"
- for j in i.split("--"):
- if j.find("datadir") != -1:
- datadir = j.split("=")[1].strip()
- elif j.find("port") != -1:
- port = j.split("=")[1].strip()
- if port == "0":
- continue
- if port == "3306":
- log_file = "%s/$(hostname).err" % datadir
- else:
- log_file = "%s/mysql.err" % datadir
- tmp_file = "%s/.mysqllog_%s" % (homedir,port)
- chk_err(log_file,tmp_file,type,print_list,port)
- if len(print_list)==0:
- return "0"
- else:
- return print_list
- except Exception, e:
- return e
- if __name__ == "__main__":
- print chk_err_log(sys.argv[1])