1、编写python脚本监控nginx
#!/usr/bin/python # -*- coding: utf-8 -*- import os, sys, time import string import getopt def usage(): print """check_nginx is a Nagios to monitor nginx status Usage: check_nginx [-h|--help][-w|--warning][-c|--critical] Options: --help|-h) print check_nginx help. --warning|-w) Sets a warning level for nginx Active connections. Default is: off --critical|-c) Sets a critical level for nginx Active connections. Default is: off Example: ./check_nginx -w 10 -c 5""" sys.exit(3) try: options,args = getopt.getopt(sys.argv[1:],"hw:c:",["help","warning=","critical="]) #“hw:c:”短格式 --- h 后面没有冒号:表示后面不带参数,p:和 i:后面有冒号表示后面需要参数 #["help","warning=","critical="]长格式 --- help后面没有等号=,表示后面不带参数,其他三个有=,表示后面需要参数 #返回值 options 是个包含元祖的列表,每个元祖是分析出来的格式信息,比如 [('-i','127.0.0.1'),('-p','80')] ; #args 是个列表,包含那些没有‘-’或‘--’的参数,比如:['55','66'] except getopt.GetoptError: usage() sys.exit(3) for name,value in options: if name in ("-h","--help"): usage() if name in ("-w","--warning"): warning = int(value) if name in ("-c","--critical"): critical = int(value) try: ret = os.popen('ps -C nginx -o pid,cmd').readlines() except Exception: print "NGINX STATUS unknown: Error while getting Connection" sys.exit(3) if len(ret) < critical: print "Critical-nginx process is killed." sys.exit(2) elif len(ret) < warning: print "Warning-nginx process is too low." sys.exit(1) else: print "OK-nginx is running" sys.exit(0)
2、将脚本check_nginx拷贝到/usr/local/nagios/libexec/下,并chmod +x ./check_nginx
如果你要监控nagios安装本机的nginx进程,请参考如下步骤:
3、在/usr/local/nagios/etc/command中增加如下
define command{
command_name check_nginx
command_line $USER1$/check_nginx -w $ARG1$ -c $ARG2$
}
4、在自己定义的service.cfg中增加
define service{
use generic-service
host_name localhost
service_description nginx
check_command check_nginx!4!2
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups admins
}
如果需要监控远程主机的nginx进程,请先安装nrpe,然后参考如下
3、Vim /etc/nagios/nrpe.cfg
加入一行
command[check_nginx]=/usr/local/nagios/libexec/check_nginx -w $ARG1$ -c $ARG2$
4、修改/etc/nagios/nrpe.cfg
dont_blame_nrpe=1 #打开参数传递
5、重启nrpe
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
测试执行
/usr/local/nagios/libexec/check_nrpe -H 127.0.0.1 -c check_nginx -a 4 1
如果能够正常返回值就是正常的
6、在/usr/local/nagios/etc/object/command中增加
define command{
command_name
check_nrpe
command_line /usr/local/nagios/libexec/check_nrpe –H
$HOSTADDRESS$ -c $ARG1$ -a $ARG2$ $ARG3$
}
7、在自己定义的service.cfg中增加
define service{
use generic-service
host_name localhost
service_description nginx
check_command check_nrpe!check_nginx!4!2
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups admins
}