nagios批量新增監控

wang_0720發表於2014-03-11
工作中往往需要對新上線的機器進行監控,如果對100臺新機器手動新增監控的話真的是件體力活。提高工作效率的辦法就是寫指令碼自動批量新增。本文將詳細闡述nagios批量新增監控。
首先對要新增監控的機器分組,以5臺機器為例,建立一個host_service列表,列表以空格隔開。第一列是功能分組,說明機器的主要功能;第二列是ip;第三列及後序列是要監控的服務。具體見下表所示:
cat nag_host_serv.txt
db 192.168.151.141 load cpu_idle disk disk_io ssh mysql
web 192.168.151.40 load cpu_idle disk disk_io ssh http
ad 192.168.151.2 load cpu_idle disk disk_io ssh
ad 192.168.151.23 load cpu_idle disk disk_io ssh
mcache 192.168.151.138 cpu_idle disk_io memcache_hits
接下來是編寫監控指令碼,在寫指令碼之前修改下templates.cfg這個檔案,加上一些自己需要的東西。
more /usr/local/nagios/etc/objects/templates.cfg
#monitor  for all etnet linux hosts
define host{
        name                            etnet-host
        use                               generic-host
        check_period                   24x7
        check_interval                  5
        retry_interval                   1
        max_check_attempts        10
        check_command              check-host-alive
        notification_period             workhours
        notification_interval            120
        notification_options            d,u,r
        contact_groups                 admins
        register                           0
        }
#monitor for all etnet services
define service{
        name                            etnet-service
        use                               generic-service
        check_period                   24x7
        check_interval                  5
        retry_interval                   1
        max_check_attempts        10
        notification_period             workhours
        notification_interval           120
        notification_options            w,u,c,r,f
        contact_groups                admins
        register                           0
        }
以上是新增的一些主機模板和services模板,將模板的名字定義為自己喜歡的名字如etnet-host,etnet-service。不過在模板中要引用預設的generic-host,generic-service。
接下來要修改下command.cfg檔案,新增一個check_nrpe命令,因為要通過nrpe監控遠端機器的
tail -4 commands.cfg
define command{
    command_name check_nrpe
    command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
注意這裡的變數前後都要加$$,否則會監控不到服務的。
現在來監控服務吧,nrpe是C/S架構,所以先要配置下nrpe的服務端也就是被監控的機器。
安裝nagios-plugin和nrpe在http://blog.itpub.net/27181165/viewspace-775807/中已經介紹過了,在此不介紹了,下面來說下nrpe的配置
cat etc/nrpe.cfg |grep -v \#|sed '/^$/d'
log_facility=daemon
pid_file=/var/run/nrpe.pid
server_port=5666
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=127.0.0.1,192.168.151.133
dont_blame_nrpe=0
debug=0
command_timeout=60
connection_timeout=300
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/mapper/VolGroup02-LogVol00
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_ssh]=/usr/local/nagios/libexec/check_ssh -H 192.168.151.23 -p 22 -t 10
command[check_cpu_idle]=/usr/local/nagios/libexec/check_cpu_idle
command[check_disk_io]=/usr/local/nagios/libexec/check_disk_io
allowed_hosts允許被誰監控,command監控哪些服務。在nagios libexec下有很多監控外掛,如果不夠用可以自定義外掛,這個以後介紹。
現在開啟nrpe程式就可以利用nrpe來收集服務資訊了。nrpe可以加入xinetd啟動也可以寫指令碼啟動。下面是一個nrpe啟動指令碼是網友提供的直接拿來用了。
[root@localhost nagios]# cat /etc/init.d/nrpe
#!/bin/sh
#
# Source function library
if [ -f /etc/rc.d/init.d/functions ]; then
. /etc/rc.d/init.d/functions
elif [ -f /etc/init.d/functions ]; then
. /etc/init.d/functions
elif [ -f /etc/rc.d/functions ]; then
. /etc/rc.d/functions
fi

# Source networking configuration.
. /etc/sysconfig/network

# Check that networking is up.
[ ${NETWORKING} = "no" ] && exit 0

NrpeBin=/usr/local/nagios/bin/nrpe
NrpeCfg=/usr/local/nagios/etc/nrpe.cfg
LockFile=/var/lock/subsys/nrpe

# See how we were called.
case "$1" in
  start)
 # Start daemons.
 echo -n "Starting nrpe: "
 daemon $NrpeBin -c $NrpeCfg -d
 echo
 touch $LockFile
 ;;
  stop)
 # Stop daemons.
 echo -n "Shutting down nrpe: "
 killproc nrpe
 echo
 rm -f $LockFile
 ;;
  restart)
 $0 stop
 $0 start
 ;;
  status)
 status nrpe
 ;;
  *)
 echo "Usage: nrpe {start|stop|restart|status}"
 exit 1
esac
exit 0
好不容易進入正題,上批量新增指令碼吧
[root@localhost scripts]# cat nag_host_serv.sh
#!/bin/bash
#Author: Andy
#Time: 20140307
NAGIOS_OBJ=/usr/local/nagios/etc/objects
NAGIOS_CONF=/usr/local/nagios/etc
function add_host_serv()
{
while read groups ip services
do
    #monitor linux host,etnet-host must be defined in templates.cfg
    #新增主機
    cat >>${NAGIOS_OBJ}/${groups}.cfg<     #monitor host ${ip}
    define host{
      use  etnet-host
      host_name ${groups}-${ip}
      alias     ${ip}
      address   ${ip}
    }
    EOF
#新增服務
    #monitor  services,etnet-service must be defined in templates.cfg
    for ser in `echo ${services}`;do
        #monitor linux  services
        cat >>${NAGIOS_OBJ}/${groups}.cfg<         #monitor service ${ser}
        define service{
          use    etnet-service
          host_name  ${groups}-${ip}
          service_description check_${ser}
          check_command  check_nrpe!check_${ser}
        }
        EOF
    done
done< nag_host_serv.txt
}
#在nagios.cfg中新增監控檔案
function config_edit()
{
#append cfg files to nagios.cfg
for group in `cat nag_host_serv.txt|cut -d " " -f 1|uniq`;do
    resul=`cat ${NAGIOS_CONF}/nagios.cfg|grep -Ew "${group}.cfg"`
    if [ -z "${resul}" ];then
        sed -i "/templates.cfg/a cfg_file=${NAGIOS_OBJ}/${group}.cfg" ${NAGIOS_CONF}/nagios.cfg
      fi
done
}
#==============================
add_host_serv
config_edit

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/27181165/viewspace-1107510/,如需轉載,請註明出處,否則將追究法律責任。

相關文章