• shell脚本实现多台服务器自动巡检


    运维服务一个项目二十多台(或者多台)服务器,每天要做服务器的性能巡检工作是查看服务器的CPU、内存、磁盘空间是否在正常值范围内。像这样每天或者每个固定时间段都做相同的重复工作,为了简化重复的工作,我写了基于liunx服务器的自动巡检脚本,在crontab中设定一个固定时间进行自动执行即可,以减少人工重复劳动的工作。

    环境:

    我的项目上主要服务器是LINUX和AIX两种服务器,总数在30台左右。现在的工作量是每周巡检两次,都是手动登录到每台服务器使用相应的命令查看服务器性能参数。

    思路:

    1、所有的服务器之间的网络都是在同一个局域网内,所有网络两两相通。

    2、在其中选择一台性能相对较好或者是服务器运行压力较小的服务器,作为巡检服务器。

    3、通过这一服务器来实现对其他服务器的巡检,然后把巡检结果记录到巡检服务器上。

    4、每台服务器巡检结果都以时间和ip做命名用来区分,最后将所有巡检结果压缩打包。

    5、每次维护人员只需要定时去取这个压缩包查看最后结果即可,免去了对每台服务器都需要登录和输入相同的命令进行查看。

    具体实现脚本

    脚本1

    #! /bin/bash
    echo "start running" | tee -a 
    LANG=en
    set `date`
    path="/home/check"
    echo "start running" | tee -a  $path/log/$1-$2-$3.log
    if [ -d /home/check/result/$1-$2-$3 ];
     then
       echo ''
    else
    mkdir -p /home/check/result/$1-$2-$3
    echo `date +"%Y/%m/%d-%H:%M:%S"` "create " "$1-$2-$3" "directory success "|tee -a $path/log/$1-$2-$3.log
    fi
    echo `date +"%Y/%m/%d-%H:%M:%S"` "starting reading linuxconfig.txt " |tee -a $path/log/$1-$2-$3.log
    cat "$path"/config/linuxconfig.txt| while read line;
    do
    ip=`echo $line |cut -d '=' -f2`
    echo `date +"%Y/%m/%d-%H:%M:%S"` "check LINUX " $ip " starting " |tee -a $path/log/$1-$2-$3.log
    (
    sleep 1
    #echo account
     echo root
    
    sleep 1
    #echo password
     echo root
    
    sleep 3
    echo "free -k"
    echo ""
    echo "df -k"
    echo ""
    
    #memory_used_rate
    echo "ps -ef| grep java"
    echo ""
    echo "netstat -an|egrep -n '80|22|21|23|9043|9044|45331|45332|39194|19195'"
    echo ""
    #echo "ifconfig -a "
    echo  "/sbin/ip ad"
    echo ""
    
    echo " tail -2000  /var/log/messages | grep -v snmp |grep  -i  error "
    echo ""
    echo "/bin/dmesg  |grep -i error"
    echo ""
    
    echo "top -n1|sed -n '1,5p'"
    echo "exit"
    echo "/usr/bin/vmstat  1 3"
    echo ""
    
    sleep 5
    )|telnet $ip >/home/check/result/$1-$2-$3/$ip-$1-$2-$3-$4.txt
    echo `date +"%Y/%m/%d-%H:%M:%S"` "check LINUX " $ip " end" |tee -a $path/log/$1-$2-$3.log
    echo "" | tee -a $path/log/$1-$2-$3.log
    done
    echo `date +"%Y/%m/%d-%H:%M:%S"` "end reading linuxconfig.txt  " |tee -a $path/log/$1-$2-$3.log
     
    echo `date +"%Y/%m/%d-%H:%M:%S"` "starting reading AIXconfig.txt " | tee -a $path/log/$1-$2-$3.log
    cat "$path"/config/AIXconfig.txt| while read line;
    do
    ip=`echo $line |cut -d '=' -f2`
    echo `date +"%Y/%m/%d-%H:%M:%S"` "check IBM AIX " $ip " starting " |tee -a $path/log/$1-$2-$3.log
    (
    sleep 1
    #echo account
     echo root
    
    sleep 1
    #echo password
     echo root
    sleep 5
    echo ""
    #echo "df -k"
     echo "df -g"
    echo ""
    
    #memory_used_rate
    echo "ps -ef| grep java"
    echo ""
    echo "netstat -an|egrep -n '80|22|21|23|9043|9044|45331|45332|39194|19195'"
    echo ""
    echo "ifconfig -a"
    echo ""
    echo "topas"
    echo "exit"
    sleep 5
    )|telnet $ip >/home/check/result/$1-$2-$3/$ip-$1-$2-$3-$4.txt
    echo `date +"%Y/%m/%d-%H:%M:%S"` "check IBM AIX " $ip " end " |tee -a $path/log/$1-$2-$3.log
    echo "" | tee -a $path/log/$1-$2-$3.log
    done
    echo `date +"%Y/%m/%d-%H:%M:%S"` "end reading AIXconfig.txt " | tee -a $path/log/$1-$2-$3.log
    zip -r /home/check/result/$1-$2-$3/$1-$2-$3.zip /home/check/result/$1-$2-$3/*
    echo "End running "
    

    注意:该脚本的巡检是基于TELNET服务所以被检服务器必须开启TELNET服务

    脚本2

    #!/bin/bash  
    #admin:spirits  
      
    #***********CPU检测*************  
    echo "`date '+%Y年%m月%d日 %H:%M:%S'` 数据库服务器硬件情况开始巡检。。。"  
      
    top -bn 6 >>top  
      
    grep -n "%id" top >> newtop  
      
    grep -n "zombie" top >> insisttop  
      
    top1=`cat  newtop   | awk '{print $5}' | sed -n 4p | sed 's/%//g' |sed 's/id,//g'`  
    top2=`cat  newtop   | awk '{print $5}' | sed -n 5p | sed 's/%//g' |sed 's/id,//g'`  
    top3=`cat  newtop   | awk '{print $5}' | sed -n 6p | sed 's/%//g' |sed 's/id,//g'`  
      
    top4=`cat insisttop | awk '{print $10}' | sed -n 2p | sed 's/%//g' |sed 's/id,//g'`  
      
    #echo "top4:$top4"  
      
    if [ $top4 -gt 0 ]  
      
    then   
        echo "`date '+%Y年%m月%d日 %H:%M:%S'` 采集处理服务器上出现僵尸进程,巡检程序将自动kill该进程,如需人工确认请执行命令top后再执行ps -A -ostat,ppid,pid,cmd | grep -e '^[Zz]'来确认是否将僵尸进程杀死"  >> ./newreport.txt  
      
        ps -A -o stat,ppid,pid,cmd | grep -e '^[Zz]' | awk '{print $2}' | xargs kill -9  
      
    else   
        echo "`date '+%Y年%m月%d日 %H:%M:%S'` 采集处理服务器上无僵尸进程正常运行!"  
    fi  
      
    a=${top1:0:2}  
    b=${top2:0:2}  
    c=${top3:0:2}  
      
    echo "top1: $a"  
    echo "top2: $b"  
    echo "top3: $c"  
      
      
     if [  $a    -lt  20  ]&&[  $b    -lt  20  ]&&[  $c    -lt  20  ]    ; then  
      
        echo  "`date '+%Y年%m月%d日 %H:%M:%S'` 数据库服务器CPU占用率不正常,top取到的值是$top1,$top2,$top3,小于参考值20,请及时处理!" >> ./newreport.txt  
      
    else  
      
      echo "CPU占用率正常!"   
      
    fi  
      
    rm -rf top  
      
    rm -rf newtop  
      
    rm -rf insisttop  
      
    #***************内存检测***********  
    free1=`free -g | awk '{print $4}' | sed -n 3p | sed 's/%//g' |sed 's/t//g'`  
      
    total=`free -g | awk '{print $2}' | sed -n 2p | sed 's/%//g' |sed 's/t//g'`  
      
    canshu=0.2  
      
    tempd=`echo $total $canshu |awk '{print $1*$2}'`  
      
    biaozhun=${tempd%.*}  
      
    if [ $free1  -le  $biaozhun  ]  ;  then   
    echo "`date '+%Y年%m月%d日 %H:%M:%S'`  数据库服务器内存占用率过高,free -g取到的值是$free1,小于等于参考值$biaozhun,请及时处理!" >> ./newreport.txt  
      
    else  
      
    echo "内存占用率正常!"  
      
    fi  
      
    #**************文件系统巡检**********  
    df1=`df -h | awk '{print $5}' | sed -n 2p | sed 's/%//g'`  
    df2=`df -h | awk '{print $5}' | sed -n 3p | sed 's/%//g'`  
    df3=`df -h | awk '{print $5}' | sed -n 4p | sed 's/%//g'`  
    df4=`df -h | awk '{print $5}' | sed -n 5p | sed 's/%//g'`  
    df5=`df -h | awk '{print $5}' | sed -n 6p | sed 's/%//g'`  
      
     if [ $df1 -gt  90 ]||[ $df2  -gt  90 ]||[ $df3 -gt  90 ]||[ $df4 -gt  90 ]||[ $df5 -gt  90 ] ; then  
      
        echo "`date '+%Y年%m月%d日 %H:%M:%S'` 数据库服务器磁盘占用率过高!df -h取到的值是$df1,$df2,$df3,$df4,$df5,参考值是90,若其中一个或一个以上大于参考值,请及时处理!" >> ./newreport.txt  
      
    else  
      
        echo "磁盘占用率正常!"  
      
    fi  
      
    #*********************磁盘IO性能巡检***************  
    iostat -x 2 5 >>iostat.txt  
      
    scvtm1=" `cat  iostat.txt  | awk '{print $11}' | sed -n 16p | sed 's/%//g' `"  
      
    scvtm2="` cat  iostat.txt  | awk '{print $11}' | sed -n 17p | sed 's/%//g'`"  
      
    scvtm3="` cat  iostat.txt  | awk '{print $11}' | sed -n 18p | sed 's/%//g'`"  
      
    scvtm4="` cat  iostat.txt  | awk '{print $11}' | sed -n 19p | sed 's/%//g'`"  
      
    scvtm13="` cat  iostat.txt  | awk '{print $11}' | sed -n 25p | sed 's/%//g'`"  
      
    scvtm6=" `cat  iostat.txt  | awk '{print $11}' | sed -n 26p | sed 's/%//g' `"  
      
    scvtm7="` cat  iostat.txt  | awk '{print $11}' | sed -n 27p | sed 's/%//g'`"  
      
    scvtm8="` cat  iostat.txt  | awk '{print $11}' | sed -n 28p | sed 's/%//g'`"  
      
    scvtm9="` cat  iostat.txt  | awk '{print $11}' | sed -n 34p | sed 's/%//g'`"  
      
    scvtm10="` cat  iostat.txt  | awk '{print $11}' | sed -n 35p | sed 's/%//g'`"  
      
    scvtm11="` cat  iostat.txt  | awk '{print $11}' | sed -n 36p | sed 's/%//g'`"  
      
    scvtm12="` cat  iostat.txt  | awk '{print $11}' | sed -n 37p | sed 's/%//g'`"  
      
      
      
    util1="`cat  iostat.txt  | awk '{print $12}' | sed -n 16p | sed 's/%//g'`"  
      
    util2="` cat  iostat.txt  | awk '{print $12}' | sed -n 17p | sed 's/%//g'`"  
      
    util3="` cat  iostat.txt  | awk '{print $12}' | sed -n 18p | sed 's/%//g'`"  
      
    util4="` cat  iostat.txt  | awk '{print $12}' | sed -n 19p | sed 's/%//g'`"  
      
    util5="` cat  iostat.txt  | awk '{print $12}' | sed -n 25p | sed 's/%//g'`"  
      
    util6=" `cat  iostat.txt  | awk '{print $12}' | sed -n 26p | sed 's/%//g' `"  
      
    util7="` cat  iostat.txt  | awk '{print $12}' | sed -n 27p | sed 's/%//g'`"  
      
    util8="` cat  iostat.txt  | awk '{print $12}' | sed -n 28p | sed 's/%//g'`"  
      
    util9="` cat  iostat.txt  | awk '{print $12}' | sed -n 34p | sed 's/%//g'`"  
      
    util10="` cat  iostat.txt  | awk '{print $12}' | sed -n 35p | sed 's/%//g'`"  
      
    util11="` cat  iostat.txt  | awk '{print $12}' | sed -n 36p | sed 's/%//g'`"  
      
    util12="` cat  iostat.txt  | awk '{print $12}' | sed -n 37p | sed 's/%//g'`"  
      
    #***********1/2/3/4****************  
      
    maxa=`echo "$scvtm1 $scvtm2 $scvtm3 $scvtm4" | awk '{for(i=1;i<=NF;i++)$i>a?a=$i:a}END{print a}'`  
      
    #*************13/6/7/8/**************  
      
    maxb=`echo "$scvtm13 $scvtm6 $scvtm7 $scvtm8" | awk '{for(i=1;i<=NF;i++)$i>a?a=$i:a}END{print a}'`  
      
    #*************************9/10/11/12******************  
      
    maxc=`echo "$scvtm9 $scvtm10 $scvtm11 $scvtm12" | awk '{for(i=1;i<=NF;i++)$i>a?a=$i:a}END{print a}'`  
      
    #********************util1/2/3/4**********************  
      
    maxd=`echo "$util1 $util2 $util3 $util4" | awk '{for(i=1;i<=NF;i++)$i>a?a=$i:a}END{print a}'`  
      
      
    #**********************util5/6/7/8*******************  
      
    maxe=`echo "$util5 $util6 $util7 $util8" | awk '{for(i=1;i<=NF;i++)$i>a?a=$i:a}END{print a}'`  
      
    #***********************util9/10/11/12***************  
      
    maxf=`echo "$util9 $util10 $util11 $util12" | awk '{for(i=1;i<=NF;i++)$i>a?a=$i:a}END{print a}'`  
      
    #******************做判断************************  
    m=${maxa:0:1}  
      
    n=${maxb:0:1}  
      
    h=${maxc:0:1}  
      
    k=${maxd:0:1}  
      
    l=${maxe:0:1}  
      
    o=${maxf:0:1}  
      
      
    if [  $m -ge 15 ]&&[ $k -ge 99 ]&&[ $k -lt 100 ]$$[  $n -ge 15 ]&&[ $l -ge  99 ]&&[ $l -lt 100 ]&&[  $h -ge 15]&&[ $o -ge 99 ]&&[ $o -lt 100 ]  
      
    then  
      
        echo "`date '+%Y年%m月%d日 %H:%M:%S'`  数据库服务器磁盘IO存在瓶颈,请及时处理!" >> ./newreport.txt  
      
    else  
      
       echo "磁盘IO正常!"  
      
    fi  
      
    rm -rf ./iostat.txt  
      
    #*********************************网络连通性检测**********************  
      
    network1=`ping -s 4096 -c 5  135.0.51.15 | awk '{print $6}' | sed -n 9p | sed 's/%//g' |sed 's/t//g'`  
      
    if [ $network1 -gt 0 ]  
      
    then   
      
       echo "`date '+%Y年%m月%d日 %H:%M:%S'` 数据库服务器到该目标IP之间的网络不稳定,ping取到的值是$network1,大于参考值是0,系统存在风险,请及时处理!"  >> ./newreport.txt  
      
    else   
      
       echo "网络连通性正常!"  
      
    fi  
      
    echo "`date '+%Y年%m月%d日 %H:%M:%S'` 数据库服务器硬件情况巡检结束!"
    

    脚本3

    运维需要了解服务器的资源使用率可以通过脚本查看,多台可以通过配合ansible进行查看

    #!/bin/bash
    
    phy_cpu=$(cat /proc/cpuinfo | grep "physical id"|sort | uniq | wc -l)
    
    logic_cpu_num=$(cat /proc/cpuinfo | grep "processor"| wc -l)
    
    cpu_core_num=$(cat /proc/cpuinfo | grep "cores"|uniq|awk -F: '{print $2}')
    
    cpu_freq=$(cat /proc/cpuinfo | grep MHz | uniq | awk -F: '{print $2}')
    
    system_core=$(uname -r)
    
    system_version=$(cat /etc/redhat-release)
    
    system_hostname=$(hostname | awk '{print $1}')
    
    systemc_envirement_variables=$(env | grep PATH)
    
    mem_free=$(grep MemFree /proc/meminfo)
    
    disk_usage=$(df -h)
    
    system_uptime=$(uptime)
    
    system_load=$(cat /proc/loadavg)
    
    system_ip=$(ifconfig | grep "inet"|grep -v "127.0.0.1"|awk -F: '{print $1}'|awk 'NR==1{print}'| awk '{print $2}')                #自己改的
    
    mem_info=$(/usr/sbin/dmidecode | grep -A 16 "Memory Device"|grep -E "Size|Locator"|grep -v Bank)
    
    mem_total=$(grep MemTotal /proc/meminfo)
    
    day01=$(date +%Y)
    
    day02=$(date +%m)
    
    day03=$(date +%d)
    
    
    
    path=inspection.txt
    
    echo -e " " > $path
    
    echo -e $day01年$day02月$day03系统巡检报告  >> $path
    
    echo -e 主机名:"	"$system_hostname >> $path
    
    echo -e 服务器IP: "	"$system_ip >> $path
    
    echo -e 系统内核: "	"$system_core  >> $path
    
    echo -e 操作系统版本:"	"$system_version >> $path
    
    echo -e 磁盘使用情况: "	""	" $disk_usage >> $path
    
    echo -e CPU核数:"	"$cpu_core_num >> $path
    
    echo -e 物理CPU个数:"	"$phy_cpu >> $path
    
    echo -e 逻辑CPU个数:"	"$logic_cpu_num >> $path
    
    echo -e 系统环境变量:"	"$systemc_envirement_variables >> $path
    
    echo -e CPU的主频:"	"$cpu_freq >> $path
    
    echo -e 内存简要信息:"	"$mem_info >> $path
    
    echo -e 内存总大小:"	"$mem_total >> $path
    
    echo -e 内存空间: "	"$mem_free >> $path
    
    echo -e 时间/系统运行时间/当前登陆用户/系统过去1分钟/5分钟/15分钟内平均负载/"	"$system_uptime >> $path
    
    echo -e 1分钟/5分钟/15分钟平均负载/在采样时刻,运行任务的数目/系统活跃任务的个数/最大的pid值线程/ "	"$system_load >> $path
    

    脚本4

    #!/bin/bash
    
    #set -x
    2012-02-25
    #version: 2.0
    export LC_ALL="en_US.UTF-8"
     
    server_info(){
    echo ====================================================
    #echo ======Time======
    #date
    echo ======1 hostname======
    /bin/hostname
    echo ======2 IP MASK======
    /sbin/ifconfig eth0|grep "inet addr:"|awk '{print $2,"/ "$4}'
    echo ======3 Gateway======
    cat /etc/sysconfig/network|grep GATEWAY|awk -F "=" '{print $2}'
    echo ======4 Product Name======
    dmidecode | grep -A10 "System Information$" |grep "Product Name:"|awk '{print $3,$4,$5}'
    ##echo ======Host SN======
    ##dmidecode | grep -A10 "System Information$" |grep "Serial Number:"|awk '{print "SN:",$3}'
    echo ======5 CPU ======
    cat /proc/cpuinfo|grep "name"|cut -d: -f2 |awk '{print "*"$1,$2,$3,$4}'|uniq -c
    echo ======6 Physical memory number======
     dmidecode | grep -A 16 "Memory Device$" |grep Size:|grep -v "No Module Installed"|awk '{print "*" $2,$3}'|uniq -c
    echo ======7 System version ======
    cat /etc/issue | head -1
    echo =========================================================
    }
     
    OS_info(){
    echo ==========================================================
    echo ======1 kernel version ======
    uname -a
    echo ======2 running day ======
    /usr/bin/uptime |awk '{print $3,$4}'
    echo ==========================================================
    }
     
    performance_info(){
    echo ==========================================================
    echo ======1 CPU used ======
    top -n 1 |grep C[Pp][Uu] |grep id|awk '{print $5}'|awk -F "%" '{print $1}'
    #cpu_total=$(cat /proc/stat | grep 'cpu ' | awk '{print $2+$3+$4+$5+$6+$7+$8}')
    #cpu_idle=$(cat /proc/stat | grep 'cpu ' |awk '{print $5}')
    #cpu_use=`expr 100-"$cpu_idle/$cpu_total*100"|bc -l`
    #echo $cpu_total
    #echo $cpu_idle
    #echo $cpu_use
    echo ======2 memory used ======
    #free -m |grep Mem|awk '{print $2,$3}'
    mem_total=$(free -m |grep Mem|awk '{print $2}')
    mem_used=$(free -m |grep Mem|awk '{print $3}')
    mem_rate=`expr $mem_used/$mem_total*100|bc -l`
    echo $mem_rate
    echo ======3 swap used ======
    #free -m |grep Swap|awk '{print $2,$3}'
    Swap_total=$(free -m |grep Swap|awk '{print $2}')
    Swap_used=$(free -m |grep Swap|awk '{print $3}')
    Swap_rate=`expr $Swap_used/$Swap_total*100|bc -l`
    echo $Swap_rate
    echo ======4 top pic ======
    top -b -n 1|head -25
    echo ==========================================================
    }
     
    sec_info(){
    echo ======1 user load ======
    w
    echo ======2 file used ======
    df -ah
    echo ======3 demsg error======
    dmesg |grep fail
    dmesg |grep error
    echo ======4 demsg error======
    lastlog
    }
    
    system_hardware_config(){
    echo ===========================disk====================================
    df -H |awk "{OFS="	"}{ print $1,$2,$3,$4,$5,$6}"
    echo ===========================free====================================
    free |head -1 |awk "{OFS="	"} {print $1,$2,$6}"
    free -m |awk "BEGIN{OFS="	"}{if (NR==2 ||NR==4 )print $2,$3,$7}"
    }
     
    server_info>>$(/bin/hostname)-`date +%F`
    OS_info>>$(/bin/hostname)-`date +%F`
    performance_info>>$(/bin/hostname)-`date +%F`
    sec_info>>$(/bin/hostname)-`date +%F`
    
    echo "run Ok"
    

    日常LINUX巡检命令

    hostname
    uname -a
    netstat -rn
    ifconfig -a
    cat /etc/sysconfig/hwconf
    cat /proc/meminfo
    cat /proc/cpuinfo
    cat /proc/swaps
    sfdisk -g
    df –k
    sfdisk –g
    dmesg
    more /var/log/boot.log
    more /var/log/messages
    

    linux服务器的日常巡检脚本

    1、需巡检的服务器上定时执行:

    #!/bin/sh
    echo "------------ daily check begin -----------------" >>dc1.txt
    #cd /home/wjlcn/monitor/check
    cd /home/wjlcn/monitor/check/
    date=`date +%c`
    filename=`hostname`_check_`date +%Y%m%d`.txt
    echo "-----------sar -ru 10 3----------------" >>dc1.txt
    sar -ru 10 3 |sed -n '21,25p' >>dc1.txt
    echo "------------top -d 1 -n 1 -------------" >>dc1.txt
    /usr/bin/top  -b -d 1 -n 1 |sed -n '1,10p' |awk '{print $9,$12}' >top1.txt
    sed '1,7d' top1.txt >>dc1.txt
    
    echo "------------free -m ----------------" >>dc1.txt
    free -m >>dc1.txt
    echo "--------------df -h ---------------" >>dc1.txt
    df -h >>dc1.txt
    echo "----------  tripwire --check ----------">> dc1.txt
    /usr/sbin/tripwire --check|sed -n '10p;18p;33,37p' >>dc1.txt
    echo $date >>$filename
    cat dc1.txt >>$filename
    echo $date >>$filename
    echo "--------------- the end ---------------" >>$filename
    rm dc1.txt top1.txt
    

    2、定时上传至ftp服务器

    # 这样就只需在ftp服务器上巡检所有的服务器即可
    #!/bin/sh
    cd /home/itownet/monitor/check
    LOFFILE=ftp.log
    ftp -n >>$LOFFILE <<EOF
    open IP
    user user  password
    binary
    cd test/pcreport
    put *.txt
    bye
    EOF
    

    文件说明

    该Shell脚本旨在针对大量Linux服务器的巡检提供一种相对自动化的解决方案。脚本组成有三部分:shellsh.sh、checksh.sh、file.txt;这三个文件需放在一个文件夹下以root权限执行,缺一不可。

    脚本用法:

    将要巡检的服务器的IP地址和对应的密码全部放入file.txt中保存,每行一个IP对应一个密码即可。然后用如下命令运行:

    ./ shellsh.sh file.txt192.168.182.143 123456

    其中file.txt可以更换文件名,192.168.182.143为你想保存巡检日志的到哪个服务器的服务器IP,123456为该服务器的密码。

    运行结果:

    运行完后会在192.168.182.143服务器的/tmp目录下升成一个目录,即:GatherLogDirectory这个目录下即存放的是被巡检的服务器的巡检日志,这些日志以被巡检的服务器的IP命名,形如:192.168.182.146.log。在被巡检的服务器上会升成两个目录即:CheckScript、 LocalServerLogDirectory;其中CheckScript中是checksh.sh脚本,LocalServerLogDirectory中存放的是checksh.sh在该服务器上运行后升成的日志。

    测试结果:

    我只在虚拟机上的三台Linux系统上测试过,分别是Ubuntu、RedHat、Kali。运行正常,平均巡检一个服务器花费3分钟。

    cat shellsh.sh
    #!/bin/bash
    login_info=$1
    gather_server_ip=$2
    gather_server_password=$3
    grep_ip=`ifconfig | grep '([[:digit:]]{1,3}.){3}[[:digit:]]{1,3}' --color=auto -o | sed -e '2,5d'`
    
    GatherPath="/tmp/GatherLogDirectory"
    CheckScriptPath="/tmp/CheckScript"
    
    
    if [ $# -ne 3 ]; then
        echo -e "Parameters if fault!
    "
        echo -e "Please using:$0 login_info gather_server_ip
    "
        echo -e "For example: $0 IpAndPassword.txt $grep_ip
    "
        exit;
    fi
    
    if [ ! -x "$GatherPath" ];then
        mkdir "$GatherPath"
        echo -e "The log's path is: $GatherPath"
    fi
    
    
    
    cat $login_info | while read line
    
    do
    
        server_ip=`echo $line|awk '{print $1}'`
        server_password=`echo $line|awk '{print $2}'`
        login_server_command="ssh -o StrictHostKeyChecking=no root@$server_ip"
        scp_gather_server_checksh="scp checksh.sh root@$server_ip:$CheckScriptPath"
    
    /usr/bin/expect<<EOF
            set timeout 20
            spawn $login_server_command
            expect {
                       "*yes/no" { send "yes
    "; exp_continue }
                       "*password:" { send "$server_password
    " }
                   }
            expect "Permission denied, please try again." {exit}
            expect "#" { send "mkdir $CheckScriptPath
    "}
            expect eof
            exit
    
    EOF
        
    /usr/bin/expect<<EOF
            set timeout 20
            spawn $scp_gather_server_checksh
            expect {
                       "*yes/no" { send "yes
    "; exp_continue }
                       "*password:" { send "$server_password
    " }
                   }
            expect "Permission denied, please try again." {exit}
            expect "Connection refused" {exit}
            expect "100%"
            expect eof
            exit
    
    EOF
    
        
    /usr/bin/expect<<EOF
            set timeout 60
            spawn $login_server_command
            expect {
                       "*yes/no" { send "yes
    "; exp_continue }
                       "*password:" { send "$server_password
    " }
                   }
            expect "Permission denied, please try again." {exit}
            expect "#" { send "cd $CheckScriptPath;./checksh.sh $gather_server_ip $gather_server_password
    "}
            expect eof
            exit
    
    EOF
    
    done
    
    cat checksh.sh
    #!/bin/bash
    ########################################################################################
    #Function:
    #This script checks the system's information,disks's information,performance,etc...of the
    #server
    #
    #Author:
    #By Jack Wang
    #
    #Company:
    #ShaanXi Great Wall Information Co.,Ltd.
    ########################################################################################
    
    ########################################################################################
    #
    #GatherServerIpAddress is the server's IP address that gather the checking log
    #GatherServerPassword is the server's IP address that gather the checking log
    #
    ########################################################################################
    GatherServerIpAddress=$1
    GatherServerPassword=$2
    
    ########################################################################################
    #GetTheIpCommand is a command that you can get the IP address
    ########################################################################################
    GetTheIpCommand=`ifconfig | grep '([[:digit:]]{1,3}.){3}[[:digit:]]{1,3}' --color=auto -o | sed -e '2,5d'`
    
    ########################################################################################
    #LogName is a command that Your logs'name
    ########################################################################################
    LogName=`ifconfig|grep '([[:digit:]]{1,3}.){3}[[:digit:]]{1,3}' --color=auto -o|sed -e '2,5d'``echo "-"``date +%Y%M%d`
    
    ########################################################################################
    #
    #GatherLogPath is a path that collecting log path
    #LocalServerLogPath is local log path
    #
    ########################################################################################
    GatherServerLogPath="/tmp/GatherLogDirectory"
    LocalServerLogPath="/tmp/LocalServerLogDirectory"
    
    
    ########################################################################################
    #LinuxOsInformation is function that usege to collect OS's information
    ########################################################################################
    LinuxOsInformation(){
    Hostname=`hostname`
    UnameA=`uname -a`
    OsVersion=`cat /etc/issue | sed '2,4d'`
    Uptime=`uptime|awk '{print $3}'|awk -F "," '{print $1}'`
    ServerIp=`ifconfig|grep "inet"|sed '2,4d'|awk -F ":" '{print $2}'|awk '{print $1}'`
    ServerNetMask=`ifconfig|grep "inet"|sed '2,4d'|awk -F ":" '{print $4}'|awk '{print $1}'`
    ServerGateWay=`netstat -r|grep "default"|awk '{print $2}'`
    SigleMemoryCapacity=`dmidecode|grep -P -A5 "Memorys+Device"|grep "Size"|grep -v "Range"|grep '[0-9]'|awk -F ":" '{print $2}'|sed 's/^[ 	]*//g'`
    MaximumMemoryCapacity=`dmidecode -t 16|grep "Maximum Capacity"|awk -F ":" '{print $2}'|sed 's/^[ 	]*//g'`
    NumberOfMemorySlots=`dmidecode -t 16|grep "Number Of Devices"|awk -F ":" '{print $2}'|sed 's/^[ 	]*//g'`
    MemoryTotal=`cat /proc/meminfo|grep "MemTotal"|awk '{printf("MemTotal:%1.0fGB
    ",$2/1024/1024)}'|awk -F ":" '{print $2}'`
    PhysicalMemoryNumber=`dmidecode|grep -A16 "Memory Device"|grep "Size:"|grep -v "No Module Installed"|grep -v "Range Size:"|wc -l`
    ProductName=`dmidecode|grep -A10 "System Information"|grep "Product Name"|awk -F ":" '{print $2}'|sed 's/^[ 	]*//g'`
    SystemCPUInfomation=`cat /proc/cpuinfo|grep "name"|cut  -d: -f2|awk '{print "*"$1,$2,$3,$4}'|uniq -c|sed 's/^[ 	]*//g'`
    
    echo -e "Hostname|$Hostname
    Unamea|$UnameA
    OsVersion|$OsVersion
    Uptime|$Uptime
    ServerIp|$ServerIp
    ServerNetMask|$ServerNetMask
    ServerGateWay|$ServerGateWay
    SigleMemoryCapacity|$SigleMemoryCapacity
    MaximumMemoryCapacity|$MaximumMemoryCapacity
    NumberOfMemorySlots|$NumberOfMemorySlots
    MemoryTotal|$MemoryTotal
    PhysicalMemoryNumber|$PhysicalMemoryNumber
    ProductName|$ProductName
    SystemCPUInformation|$SystemCPUInfomation"
    
    }
    
    PerformanceInfomation (){
    CPUIdle=`top -d 2 -n 1 -b|grep C[Pp][Uu]|grep id|awk '{print $5}'|awk -F "%" '{print $1}'`
    CPUloadAverage=`top -d 2 -n 1 -b|grep "load average:"|awk -F ":" '{print $5}'|sed 's/^[ 	]*//g'`
    ProcessNumbers=`top -d 2 -n 1 -b|grep "Tasks"|awk -F "[: ,]" '{print $3}'`
    Proce***unning=`top -d 2 -n 1 -b|grep "Tasks"|awk -F "[: ,]" '{print $8}'`
    ProcessSleeping=`top -d 2 -n 1 -b|grep "Tasks"|awk -F "[: ,]" '{print $11}'`
    ProcessStoping=`top -d 2 -n 1 -b|grep "Tasks"|awk -F "[: ,]" '{print $16}'`
    ProcessZombie=`top -d 2 -n 1 -b|grep "Tasks"|awk -F "[: ,]" '{print $21}'`
    UserSpaceCPU=`top -d 2 -n 1 -b|grep 'C[Pp][Uu]'|head -1|awk -F "[: ,%]" '{print $4}'`
    SystemSpaceCPU=`top -d 2 -n 1 -b|grep 'C[Pp][Uu]'|head -1|awk -F "[: ,%]" '{print $8}'`
    ChangePriorityCPU=`top -d 2 -n 1 -b|grep 'C[Pp][Uu]'|head -1|awk -F "[: ,%]" '{print $12}'`
    WaitingCPU=`top -d 2 -n 1 -b|grep 'C[Pp][Uu]'|head -1|awk -F "[: ,%]" '{print $19}'`
    HardwareIRQCPU=`top -d 2 -n 1 -b|grep 'C[Pp][Uu]'|head -1|awk -F "[: ,%]" '{print $23}'`
    SoftwareIRQCPU=`top -d 2 -n 1 -b|grep 'C[Pp][Uu]'|head -1|awk -F "[: ,%]" '{print $27}'`
    MemUsed=`top -d 2 -n 1 -b|grep "Mem"|awk -F "[: ,]" '{print $11}'|tr -d "a-zA-Z"|awk '{printf("%dM
    ",$1/1024)}'`
    MemFreeP=`top -d 2 -n 1 -b|grep "Mem"|awk -F "[: ,]" '{print $16}'|tr -d "a-zA-Z"|awk '{printf("%dM
    ",$1/1024)}'`
    MemBuffersP=` top -d 2 -n 1 -b|grep "Mem"|awk -F "[: ,]" '{print $22}'|tr -d "a-zA-Z"|awk '{printf("%dM
    ",$1/1024)}'`
    CacheCachedP=`top -d 2 -n 1 -b|grep "Swap"|awk -F "[: ,]" '{print $24}'|tr -d "a-zA-Z"|awk '{printf("%dM
    ",$1/1024)}'`
    CacheTotal=`top -d 2 -n 1 -b|grep "Swap"|awk -F "[: ,]" '{print $4}'|tr -d "a-zA-Z"|awk '{printf("%dM
    ",$1/1024)}'`
    CacheUsed=`top -d 2 -n 1 -b|grep "Swap"|awk -F "[: ,]" '{print $14}'|tr -d "a-zA-Z"|awk '{printf("%dM
    ",$1/1024)}'`
    CacheFree=`top -d 2 -n 1 -b|grep "Swap"|awk -F "[: ,]" '{print $18}'|tr -d "a-zA-Z"|awk '{printf("%dM
    ",$1/1024)}'`
    
    echo -e "CPUIdle|$CPUIdle
    CPUloadAverage|$CPUloadAverage
    ProcessNumbers|$ProcessNumbers
    Proce***unning|$Proce***unning
    ProcessSleeping|$ProcessSleeping
    ProcessStoping|$ProcessStoping
    ProcessZombie|$ProcessZombie
    UserSpaceCPU|$UserSpaceCPU
    SystemSpaceCPU|$SystemSpaceCPU
    ChangePriorityCPU|$ChangePriorityCPU
    WaitingCPU|$WaitingCPU
    HardwareIRQCPU|$HardwareIRQCPU
    SoftwareIRQCPU|$SoftwareIRQCPU
    MemUsed|$MemUsed
    MemFreeP|$MemFreeP
    MemBuffersP|$MemBuffersP
    CacheCachedP|$CacheCachedP
    CacheTotal|$CacheTotal
    CacheUsed|$CacheUsed
    CacheFree|$CacheFree
    "
    }
    
    OprateSystemSec () {
    echo '======================UserLogin======================'
    w
    
    echo '======================FileUsed======================='
    df -ah
    
    echo '======================dmesgError====================='
    dmesg | grep error
    
    echo '======================dmesgFail======================'
    dmesg | grep Fail
    
    echo '======================BootLog========================'
    more /var/log/boot.log | grep -V "OK" | sed '1,6d'
    
    echo '======================route -n======================='
    route -n
    echo '======================iptables -L===================='
    iptables -L
    echo '======================netstat -lntp=================='
    netstat -lntp
    echo '======================netstat -antp=================='
    netstat -antp
    echo '======================BootLog========================'
    netstat -s
    echo '======================netstat -s====================='
    last
    echo '======================du -sh /etc/==================='
    du -sh /etc/
    echo '======================du -sh /boot/=================='
    du -sh /boot/
    echo '======================du -sh /dev/==================='
    du -sh /dev/
    echo '======================df -h=========================='
    df -h
    echo '======================mount | column -t=============='
    mount | column -t
    
    }
    
    
    TopAndVmstat(){
    top -d 2 -n 1 -b
    vmstat 1 10
    }
    
    CheckGatherLog(){
    
    if [ -f "$LocalServerLogPath/$GetTheIpCommand.log" ];then
           rm -rf $LocalServerLogPath/$GetTheIpCommand.log
    fi
    
    if [ ! -x "$LocalServerLogPath" ];then
        mkdir "$LocalServerLogPath"
    fi
    
    if [ ! -f "$LocalServerLogPath/$GetTheIpCommand.log" ];then
        touch $LocalServerLogPath/$GetTheIpCommand.log
        LinuxOsInformation>>$LocalServerLogPath/$GetTheIpCommand.log
        PerformanceInfomation>>$LocalServerLogPath/$GetTheIpCommand.log
        OprateSystemSec>>$LocalServerLogPath/$GetTheIpCommand.log
        TopAndVmstat>>$LocalServerLogPath/$GetTheIpCommand.log
    fi
    }
    
    CheckGatherLog
    
    SCP_LOG_TO_GATHER_SERVER="scp $LocalServerLogPath/$GetTheIpCommand.log root@$GatherServerIpAddress:$GatherServerLogPath"
    
    /usr/bin/expect<<EOF
            set timeout 50
            spawn $SCP_LOG_TO_GATHER_SERVER
            expect {
                    "*yes/no)?" 
                     { 
                         send "yes
    "
                         "*password:*" {send "GatherServerPassword
    "}
                     } 
                    "*password:"         
                     {
                         send "$GatherServerPassword
    "
                     }
                   }
            expect "*password:"  { send "$GatherServerPassword
    " }
            expect "100%"
            expect eof
    
    EOF
    
    # file.txt内容形式
    cat file.txt
    192.168.182.143  123456
    192.168.182.129  123456
    192.168.182.146  123456
    

    注:192.168.182.143是被巡检的服务器ip,123456是被巡检的服务器密码。

    cat check_linux.sh
    #!/bin/bash
    
    check_process(){
    tolprocess=`ps auxf|grep DisplayMa[nager]|wc -l`
    
    #if [ "$tolprocess" -lt "1" ];then
    if [ "$tolprocess" -ge "1" ];then
        echo 'process ok'
    else
        echo 'fail'
    fi
    }
    
    
    check_log(){
    if [ -e /etc/syslog-ng/syslog-ng.conf ];then
        conlog=`cat '/etc/syslog-ng/syslog-ng.conf'|grep "10.70.72.253"|wc -l`
        if [ "$conlog" -ge "1" ];then
            echo 'syslog-ng ok'
        fi
    elif [ -e /etc/syslog.conf ];then
        conlog=`cat '/etc/syslog.conf'|grep "10.70.72.253"|wc -l`
        if [ "$conlog" -ge "1" ];then
               echo 'syslog ok'
        fi
    else
        echo 'log not find or error'
    fi
    }
    
    
    check_cpuidle(){
    mincpu=`sar -u 2 10|grep all|awk '{print $NF}'|sort -nr|tail -1`
    
    if [ $(echo "${mincpu} < 20" | bc) = 1 ];then
    #if [ "$mincpu" -le "20" ];then
        echo 'cpu idle is less than 20% ,please check'
    else
        echo 'cpu idle is more than 20%, it is ok '
    fi
    
    }
    
    
    check_mem(){
    vmstat 2 10 
    }
    
    
    check_disk(){
    chkdsk=`fdisk -l|egrep 'failed|unsynced|unavailable'|wc -l`
    if [ "$chkdsk" -ge "1" ];then
        echo 'fdisk check ok '
    else
        echo 'fdisk check find error,please check your disk '
    fi
    }
    
    
    check_io(){
    util=`sar -d 2 10|egrep -v 'x86|^$|await'|awk '{print $NF}'|sort -nr|tail -1`
    await=`sar -d 2 10|egrep -v 'x86|^$|await'|awk '{print $(NF-2)}'|sort -nr|tail -1`
    
    if [ $(echo "${util} < 80" | bc) = 1 ] && [ $(echo "${await} < 100" | bc) = 1 ] ;then
        echo 'disk io check is fine'
    else
        echo 'disk io use too high '
    fi
    
    }
    
    
    check_swap(){
    
    tolswap=`cat /proc/meminfo|grep SwapTotal|awk '{print $2}'`
    #awk '/SwapTotal/{total=$2}/SwapFree/{free=$2}END{print (total-free)/1024}' /proc/meminfo 
    useswap=`awk '/SwapTotal/{total=$2}/SwapFree/{free=$2}END{print (total-free)}' /proc/meminfo `
    util=`awk 'BEGIN{printf "%.1f
    ",'$useswap'/'$tolswap'}'`
    
    
    if [ $(echo "${util} < 0.3" | bc) = 1 ] || [ $(echo "${useswap} < 1024" | bc) = 1 ] ;then
        echo 'swap use is ok '
    else
        echo "useswap: $useswap kb, swap util is $util"
    fi
    
    }
    
    
    check_dmesg(){
    chkdm=`dmesg |egrep 'scsi reset|file system full'|wc -l`
    if [ "$chkdm" -ge "1" ];then
        echo 'dmesg test ok '
    else
        echo 'dmesg check find error '
    fi
    }
    
    check_boot(){
    chkdm=`cat /var/log/boot.msg|egrep 'scsi reset|file system full'|wc -l`
    if [ "$chkdm" -ge "1" ];then
        echo 'boot check fine '
    else
        echo 'boot check find error '
    fi
    }
    
    check_inode(){
    maxinode=`df -i|awk '{print $5}'|egrep -v 'IUse|-' |sed 's/%//g'|sort -nr|head -1`
    if [ $(echo "${maxinode} < 80" | bc) = 1 ];then
        echo 'inode check ok '
    else
        echo 'inode used more than 80% '
    fi
    }
    
    check_df(){
    dfuse=`df -HT|awk '{print $6}'|grep -v Use|sed 's/%//g'|sort -nr|head -1`
    if [ $(echo "${dfuse} < 80" | bc) = 1 ];then
        echo 'disk used is less than 80% ,it is ok !'
    elif [ $(echo "${dfuse} > 80" | bc) = 1 ] && [ $(echo "${dfuse} < 90" | bc) = 1 ];then
        echo 'warning , disk used more than 80% and less than 90% '
    else
        echo ' Critical, disk used more than 90% '
    fi
    }
    
    
    echo '################### check process ###################'
    check_process
    echo '################### check syslog ####################'
    check_log
    echo '################### check cpuidle ###################'
    check_cpuidle
    echo '################### echo memory stat ################'
    check_mem
    echo '################### check fdisk #####################'
    check_disk
    echo '################### check io used ###################'
    check_io
    echo '################### check swap used #################'
    check_swap
    echo '################### check dmesg #####################'
    check_dmesg
    echo '################### check inode #####################'
    check_inode
    echo '################### check disk used #################'
    check_df
    
  • 相关阅读:
    UVa 11991 Easy Problem from Rujia Liu?
    UVa 11995 I Can Guess the Data Structure!
    LA 3485 (积分 辛普森自适应法) Bridge
    LA 5009 (三分法求极值) Error Curves
    软帝学院:一万字的Java基础知识总结大全(实用)
    学java可以做些什么
    从零开始学习java一般需要多长时间?
    学习java需要英语很好吗?
    java的8种基本数据类型
    什么是java变量,java变量是什么
  • 原文地址:https://www.cnblogs.com/gaohongyu/p/13987880.html
Copyright © 2020-2023  润新知