最近我们服务的使用方总是反应说我们接口超时,于是做了一个监控脚本,统计最近五分钟的响应情况,并对异常情况发送邮件报警。
#!/bin/bash
function define()
{
ori_log_path="/usr/local/nginx/logs/access.log"
tmp_log_path="/usr/local/nginx/logs/5min_abc.access.log"
date_stamp=`date -d "-5min" +%Y:%H:%M:%S`
day_stamp=`date +%d`
}
function gather()
{
awk -F '[/ "[]' -vnstamp="$date_stamp" -vdstamp="$day_stamp" '$7>=nstamp && $5==dstamp' ${ori_log_path} > ${tmp_log_path}
log_num=`cat ${tmp_log_path} | wc -l`
log_gt_num=`cat ${tmp_log_path}|awk '{if($NF>0.3) {print $NF}}' | wc -l`
request_time=`awk '{print $NF}' ${tmp_log_path} | awk '{sum+=$1}END{print sum}'`
ave_request_time=`echo | awk "{print ${request_time}/${log_num}}" `
rm -f /usr/local/nginx/logs/5min_abc.access.log
}
function output()
{
echo "abc log_gt_0.3_num is ${log_gt_num}"
echo "abc requeset_time is ${request_time}"
echo "abc aver request time is ${ave_request_time}"
echo "abc request total is ${log_num}"
}
function sendmail()
{
if [ $log_gt_num -gt 100 ] || [ `expr ${ave_request_time} > 0.02` -eq 0 ];then
echo -e "ad log_gt_0.3_num is ${log_gt_num}
ad requeset_time is ${request_time}
ad aver request time is ${ave_request_time}
ad request total is ${log_num}"|mail -s 'nginx log 报警,The last five minutes' 359381458@qq.com
fi
echo "sendmail"
}
function main()
{
define
gather
output
sendmail
}
main