系統巡檢指令碼

rongshiyuan發表於2012-08-07
1)巡檢
2)日誌收集
巡檢方面,我寫了一個非常簡單的指令碼,想仿照dell的dset來做(dell的dset還是不錯的,特別是可閱讀性方面)
日誌收集方面,以前,一直使用我們自己寫的sysdump指令碼,後來我覺得系統自帶的sysreport會更好。
先給出我還沒有寫完的那個巡檢的指令碼:
 
CODE:
#!/bin/sh

MY_VERSION="1.0Beta"
# maintain_report.sh - Generate a system maintain check up report about a Linux machine
#
# Set path
PATH=/usr/local/sbin:/usr/local/bin:/usr/bin:/sbin:/usr/sbin:/bin

# Specify custom (source) script, if used
CUSTOM_SCRIPT="/usr/local/sbin/sr_custom_script"

echo "$(date +'%Y-%m-%d %k:%M:%S') Status report v$MY_VERSION for $HOSTNAME"
echo "Written by Arno van Amersfoort"
echo "--------------------------------------------------"

uname -a
free
uptime
export TERM=vt100; /usr/bin/top -b -n 1 |grep -i '^CPU'


if [ -e /proc/mdstat ]; then
echo ""
echo "---------------"
echo "| RAID status |"
echo "---------------"
# if [ -x /sbin/mdadm ]; then
# /sbin/mdadm --detail --scan 2>&1
# echo ""
# fi
FAIL=0
DEGRADED=0
MISMATCH=0
while read LINE; do
printf "$LINE"
if [ -n "$(echo "$LINE" |grep 'active raid')" ]; then
DEV="$(echo "$LINE" |awk '{ print $1 }')"

printf " (mm_cnt=$(cat /sys/block/$DEV/md/mismatch_cnt)"
# Mismatch fixing (repair) doesn't work yet with current 2.6.18 kernel, so no use in reporting errors :-S
# if [ "$(cat /sys/block/$DEV/md/mismatch_cnt)" != "0" ]; then
# MISMATCH=$(($MISMATCH +1))
# printf " (WARNING: Unsynchronised (mismatch) blocks!)"
# fi

if [ -n "$(echo "$LINE" |grep '\(F\)')" ]; then
FAIL=$(($FAIL + 1))
printf " (WARNING: FAILED DISK(S)!)"
fi

if [ -n "$(echo "$LINE" |grep '\(S\)')" ]; then
printf " (Hotspare(s) available)"
else
printf " (NOTE: No hotspare?!)"
fi
fi

if [ -n "$(echo "$LINE" |grep 'blocks')" ]; then
if [ -n "$(echo "$LINE" |grep '_')" ]; then
DEGRADED=$(($DEGRADED + 1))
printf " (DEGRADED!!!)"
fi
fi

echo ""
done < /proc/mdstat

if [ $FAIL -gt 0 ]; then
echo ""
echo "** WARNING: $FAIL MD(RAID) array(s) have FAILED disk(s)! **"
fi

if [ $DEGRADED -gt 0 ]; then
echo ""
echo "** WARNING: $DEGRADED MD(RAID) array(s) are running in degraded mode! **"
fi

if [ $MISMATCH -gt 0 ]; then
echo ""
echo "** WARNING: $MISMATCH MD(RAID) array(s) have unsynchronized (mismatch) blocks! **"
fi
fi


echo ""
echo "---------------------"
echo "| S.M.A.R.T. status |"
echo "---------------------"
if which smartctl 2>&1 >/dev/null; then
cat /proc/partitions |grep -e "sd.$" -e "sd. " -e "hd.$" -e "hd. " |awk '{ print $4 }' |while read HDD; do
printf "/dev/$HDD: "
# Explicity turn on SMART on device:
smartctl -q silent -s on /dev/$HDD
# smartctl -H -l error /dev/$HDD |tail -n5 |grep -v -e "^$" -e "Version" -e "Device does not support" -e "Enable Save with" 2>&1
smartctl -H /dev/$HDD |grep -v -e '^$' |tail -n1 2>&1
done
else
echo "smartctl binary not found (smartmontools not installed?)"
fi


echo ""
echo "------------------"
echo "| DiskSpace info |"
echo "------------------"
DF_WARNING=0
#Set IFS to EOL
IFS='
'
#IFS=$'\n'
for LINE in `df -h -P -l -T --sync`; do
echo -n "$LINE"
if [ -n "$(echo "$LINE" |grep -v "Use%")" ]; then
free_perc=`echo "$LINE" |awk '{ print $6 }' |sed s,'%',,`
if [ $free_perc -gt 90 ]; then
echo ' (At or near max. capacity!)'
DF_WARNING=1
fi
fi
echo ""
done
if [ "$DF_WARNING" = "1" ]; then
echo ""
echo "** WARNING: One or more filesystems are at or near maximum capacity! **"
fi


if [ -d /var/log/fsck ]; then
echo ""
echo "------------------"
echo "| Fsck boot logs |"
echo "------------------"
cat /var/log/fsck/*
fi


echo ""
echo "--------------------"
echo "| Hardware sensors |"
echo "--------------------"
if which sensors 2>&1 >/dev/null; then
# Re-read configuration:
if [ -n "$(sensors -s )" ]; then
echo "** WARNING: Unable to read hardware sensors (/etc/sensors.conf incorrect/missing?) **"
else
result=`sensors 2>&1`
echo "$result"
if [ -n "$(echo "$result" |grep -e "ALARM")" ]; then
echo ""
echo "** WARNING: One or more sensors show ALARM! **"
fi
fi
else
echo "sensors binary not found (lm-sensors not installed?)"
fi


echo ""
echo "-----------------------"
echo "| Kernel warnings log |"
echo "-----------------------"
KERNEL_LOG="kwarnings.log"
if [ ! -f "/var/log/$KERNEL_LOG" ]; then
echo "/var/log/$KERNEL_LOG does NOT exist. Consider adding a line like"
echo "'kern.warn /var/log/$KERNEL_LOG' to your syslog.conf"
else
cat "/var/log/$KERNEL_LOG.0" >"/tmp/$KERNEL_LOG" 2>/dev/null
cat "/var/log/$KERNEL_LOG" >>"/tmp/$KERNEL_LOG"
filesize=$(ls -l "/tmp/$KERNEL_LOG" |awk '{ print $5 }')

if [ $filesize -eq 0 ]; then
echo "(Empty)"
else
cat "/tmp/$KERNEL_LOG"
fi

# Remove temp file
rm -f "/tmp/$KERNEL_LOG"

# Remove log files
rm -f "/var/log/$KERNEL_LOG.1" 2>/dev/null
mv "/var/log/$KERNEL_LOG.0" "/var/log/$KERNEL_LOG.1" 2>/dev/null
mv "/var/log/$KERNEL_LOG" "/var/log/$KERNEL_LOG.0" 2>/dev/null
printf "" >|"/var/log/$KERNEL_LOG"
fi


echo ""
echo "------------------------"
echo "| Available OS Updates |"
echo "------------------------"
if which apt-get 2>&1 >/dev/null; then
# apt-get update -q=2 2>&1
echo "** Testing what packages could be upgraded: **"
# apt-get upgrade -u --download-only -y -q -V 2>&1
else
echo "apt-get binary not found (apt-get not installed?)"
fi


echo ""
echo "-------------------"
echo "| Chkrootkit info |"
echo "-------------------"
if which chkrootkit 2>&1 >/dev/null; then
chkrootkit -q >/tmp/chkrootkit_output.txt 2>&1
if [ -n "$(cat /tmp/chkrootkit_output.txt)" ]; then
cat /tmp/chkrootkit_output.txt
else
echo "Nothing found"
fi
rm -f /tmp/chkrootkit_output.txt
else
echo "chkrootkit binary not found (chkrootkit not installed?)"
fi



if which clamscan 2>&1 >/dev/null; then
echo ""
echo "-----------------"
echo "| ClamAV status |"
echo "-----------------"

# Show clam version:
clamscan --version 2>&1

# Dummy run to see whether we're not out-of-date:
echo "" |clamscan --quiet - 2>&1
fi


if [ -e /var/log/backup.log ]; then
echo ""
echo "---------------"
echo "| Backup info |"
echo "---------------"
cat /var/log/backup.log
fi


if [ -e "$CUSTOM_SCRIPT" ]; then
. "$CUSTOM_SCRIPT"
fi
 
 
然後再給出那個sysdump指令碼

#!/bin/bash
# 2007-5-25

prog=$0;
cat <--------------------------------------------------------
$prog can be used to dump system information, including
- hardware (/etc/sysconfig/hwconf)
- syslog (/var/log/messages)
- CPU (/proc/cpuinfo)
- memory (/proc/meminfo)
all info is dumped into ./SYSINFO directory.
--------------------------------------------------------

EOF

dumpdir="/SYSINFO"

if [ -d $dumpdir ]; then
#echo "ERR: Old $dumpdir exists, please remove $dumpdir, and run $prog again."
# exit -1
/bin/rm -rf $dumpdir
fi

mkdir $dumpdir
mkdir $dumpdir/proc

# please speak English.
unset LANG

# set -x
hostname >& $dumpdir/hostname.out

date;hwclock >& $dumpdir/date.out

echo "dumping hardware info..."

echo "dumping CPU info..."
cp /proc/cpuinfo $dumpdir/proc/
cp /proc/cmdline $dumpdir/proc/
cp /proc/loadavg $dumpdir/proc/
cp /proc/slabinfo $dumpdir/proc/

echo "dumping memory info..."
cp /proc/meminfo $dumpdir/proc/

echo "dumping PCI info..."
cp /proc/pci $dumpdir/proc/
lspci -vv >& $dumpdir/lspci.out

echo "dumping ifconfig info..."
ifconfig >& $dumpdir/ifconfig.out
ifconfig -s >& $dumpdir/ifconfig-s.out
mii-tool >& $dumpdir/mii-tool.out

echo "dumping module info..."
lsmod >& $dumpdir/lsmod.out

echo "dumping device info..."
cp /proc/devices $dumpdir/proc/

echo "dumping interrupt info..."
cp /proc/interrupts $dumpdir/proc/

echo "dumping I/O info..."
cp /proc/iomem $dumpdir/proc/
cp /proc/ioports $dumpdir/proc/

echo "dumping partition info..."
cp /proc/partitions $dumpdir/proc/
df >& $dumpdir/df.out
df -i >& $dumpdir/df-i.out
fdisk -l >& $dumpdir/fdisk.out
sfdisk -s >& $dumpdir/sfdisk.out

echo "dumping net info..."
cp -rf /proc/net $dumpdir/proc/net

echo "dumping uname..."
uname -a >& $dumpdir/uname.out
cp /etc/issue $dumpdir
cp /etc/hosts $dumpdir
cp /boot/grub/grub.conf $dumpdir

echo "dumping syslog..."
top -b -n 1 >& $dumpdir/top.txt
lsof >& $dumpdir/lsof.txt
tar cvfz /var/log/cron.tgz /var/spool/cron/
lastlog >& /var/log/lastlog.redflag
tar zcf $dumpdir/log.tgz --exclude=/var/log/lastlog /var/log


echo "dumping /etc..."
tar zcf $dumpdir/etc.tgz /etc

echo "dumping HA4.0..."
if [ -d /opt/RSIrsf ]
then
tar zcf $dumpdir/RSIrsf.tgz /opt/RSIrsf
fi

echo "dumping HA4.1..."
if [ -d /opt/redflag/hacluster ]
then
/usr/sbin/clplogcc -o $dumpdir
#tar zcf $dumpdir/hacluster.tgz /opt/redflag/hacluster
fi

echo "dumping normal user crontab file..."
tar zcf $dumpdir/cron.tgz /var/spool/cron

echo "other stuff..."
free -m > $dumpdir/free.out
cp /root/.bash_history $dumpdir/bash_history
last > $dumpdir/last.out
ps auxw > $dumpdir/ps-auxw.out
ps auxw --forest > $dumpdir/ps-auxw-forest.out
ps -wef >$dumpdir/ps-wef.out
ipcs > $dumpdir/ipcs.out
netstat -ap > $dumpdir/netstat-ap.out
cp -f /etc/X11/XF86Config* $dumpdir
cp -r /etc/X11/xorg.conf $dumpdir

echo "Packaging..."

tar zcf sysinfo.tgz $dumpdir

echo "--------------------------------------------------------------------------------------"
echo "Please send ./sysinfo.tgz to [email]support@company.com[/email] for further investigation, thanx."
echo "--------------------------------------------------------------------------------------"

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/17252115/viewspace-740237/,如需轉載,請註明出處,否則將追究法律責任。

相關文章