フリーのサーバ監視ツールMONIT。 - 牧歌組合〜耳コピとエロジャケとIT〜

サービス生死確認やサーバリソース監視のために，最近はnagiosを使うことが多かったが、Nagiosより軽そうなのでmonitを試してみた。
スタンドアロンな自宅サーバだし、いちいちcgiも要らないなってことで。

http://mmonit.com/monit/download/
からダウンロード。展開するとすぐ動かせる。

# tar vxzpf monit-5.3-linux-x86.tar.gz
# cd monit-5.3
# ln -s /home/hpsuser/monit-5.3 /usr/local/monit
# vi /usr/local/monit/conf/monitrc
----------------------------------------
set logfile /usr/local/monit/monit.log
set mailserver localhost
set alert root@localhost

#- サーバ監視
  check system localhost
    if loadavg (1min) > 4 then alert
    if loadavg (5min) > 2 then alert
    if memory usage > 75% then alert
    if swap usage > 25% then alert
    if cpu usage (user) > 70% then alert
    if cpu usage (system) > 30% then alert
    if cpu usage (wait) > 20% then alert
#- MySQLプロセス監視
check process mysqld with pidfile /var/lib/mysql/kadai.pid
    start program = "/etc/init.d/mysql start" with timeout 60 seconds
    stop program  = "/etc/init.d/mysql stop"
    if cpu > 60% for 2 cycles then alert
    if cpu > 80% for 5 cycles then restart
    if totalmem > 200.0 MB for 5 cycles then restart
    if children > 250 then restart
    if loadavg(5min) greater than 10 for 8 cycles then stop
    if failed host localhost port 10501 protocol mysql
       then restart
    if 5 restarts within 5 cycles then timeout
    group server
#- アプリケーションcounterプロセス監視
check process counterd pidfile /var/run/counterd.pid
    start program = "/etc/init.d/counterd start" with timeout 60 seconds
    stop program  = "/etc/init.d/counterd stop"
    if cpu > 60% for 2 cycles then alert
    if cpu > 80% for 5 cycles then restart
    if totalmem > 200.0 MB for 5 cycles then restart
    if children > 250 then restart
    if loadavg(5min) greater than 10 for 8 cycles then stop
    if 5 restarts within 5 cycles then timeout
    group server
#- MySQL Proxyプロセス監視
check process mysql-proxy with pidfile /home/mysql-proxy/mysql-proxy.pid
    start program = "/etc/init.d/mysql-proxy start" with timeout 60 seconds
    stop program  = "/etc/init.d/mysql-proxy stop"
    if cpu > 60% for 2 cycles then alert
    if cpu > 80% for 5 cycles then restart
    if totalmem > 200.0 MB for 5 cycles then restart
    if children > 250 then restart
    if loadavg(5min) greater than 10 for 8 cycles then stop
    if 5 restarts within 5 cycles then timeout
    group server
#- disk監視
  check filesystem datafs with path /dev/mapper/VolGroup00-LogVol00
    start program  = "/bin/mount /"
    stop program  = "/bin/umount /data"
    if failed permission 660 then unmonitor
    if failed uid root then unmonitor
    if failed gid disk then unmonitor
    if space usage > 80% for 5 times within 15 cycles then alert
    if space usage > 99% then stop
    if inode usage > 80000 then alert	#30000->80000
    if inode usage > 99% then stop
    group server
----------------------------------------

#- 起動スクリプトの作成
# cd /etc/rc.d/init.d/
# vi monit
----------------------------------------
#!/bin/sh
# chkconfig: 345 99 01
# description: Moniter Daemon
# processname: monit
progname=monit
lockfile=/var/lock/subsys/${progname}
prog=/usr/local/monit/bin/monit
conf=/usr/local/monit/conf/monitrc
. /etc/init.d/functions
RETVAL=0
start() {
        echo -n $"Starting $progname: "
        daemon "${prog} -c ${conf}"
        RETVAL=$?
        echo
        [ $RETVAL = 0 ] && touch $lockfile
        return $RETVAL
}
stop() {
        echo -n $"Stopping $progname: "
        pgrep -f $prog | xargs kill
        RETVAL=$?
        echo
        [ $RETVAL = 0 ] && rm -f $lockfile
        return $RETVAL
}
case "$1" in
  start)
        start
        ;;
  stop)
        stop
        ;;
  restart)
        stop
        start
        ;;
  *)
        echo $"Usage: $0 {start|stop|restart}"
        exit 1
esac
exit $RETVAL
----------------------------------------
# chkconfig --add monit
# chkconfig --list | grep monit
lvm2-monitor    0:off   1:on    2:on    3:on    4:on    5:on    6:off
mdmonitor       0:off   1:off   2:on    3:on    4:on    5:on    6:off
monit           0:off   1:off   2:off   3:on    4:on    5:on    6:off

非常に動作は軽く、アラートメール送信だけなら使えそうな感じでした。
導入もお手軽だし、機会あれば仕事でも使ってみたい。