source: http://linoxide.com/linux-shell-script/shell-script-check-linux-system-health/
This article introduces a shell script to perform linux system health check.
This script collects system information and status like hostname, kernel version, uptime, cpu / memory / disk usage.
Script uses:
hostname, uptime, who, mpstat, lscpu, ps, top, df, free, bc commands to get system information
and cut, grep, awk and sed for text processing.
The output of the script is a text file which will be generated in the current directory.
A variable is set to provide email address to which script can send report file.
Apart from system status, the script will check a predefined threshold for cpu load and filesystem size.
Remember : Make sure you have all the above commands working, to output all results correctly.
1 #!/bin/bash 2 EMAIL='' 3 function sysstat { 4 echo -e " 5 ##################################################################### 6 Health Check Report (CPU,Process,Disk Usage, Memory) 7 ##################################################################### 8 9 10 Hostname : `hostname` 11 Kernel Version : `uname -r` 12 Uptime : `uptime | sed 's/.*up ([^,]*), .*/1/'` 13 Last Reboot Time : `who -b | awk '{print $3,$4}'` 14 15 16 17 ********************************************************************* 18 CPU Load - > Threshold < 1 Normal > 1 Caution , > 2 Unhealthy 19 ********************************************************************* 20 " 21 MPSTAT=`which mpstat` 22 MPSTAT=$? 23 if [ $MPSTAT != 0 ] 24 then 25 echo "Please install mpstat!" 26 echo "On Debian based systems:" 27 echo "sudo apt-get install sysstat" 28 echo "On RHEL based systems:" 29 echo "yum install sysstat" 30 else 31 echo -e "" 32 LSCPU=`which lscpu` 33 LSCPU=$? 34 if [ $LSCPU != 0 ] 35 then 36 RESULT=$RESULT" lscpu required to producre acqurate reults" 37 else 38 cpus=`lscpu | grep -e "^CPU(s):" | cut -f2 -d: | awk '{print $1}'` 39 i=0 40 while [ $i -lt $cpus ] 41 do 42 echo "CPU$i : `mpstat -P ALL | awk -v var=$i '{ if ($3 == var ) print $4 }' `" 43 let i=$i+1 44 done 45 fi 46 echo -e " 47 Load Average : `uptime | awk -F'load average:' '{ print $2 }' | cut -f1 -d,` 48 49 Heath Status : `uptime | awk -F'load average:' '{ print $2 }' | cut -f1 -d, | awk '{if ($1 > 2) print "Unhealthy"; else if ($1 > 1) print "Caution"; else print "Normal"}'` 50 " 51 fi 52 echo -e " 53 ********************************************************************* 54 Process 55 ********************************************************************* 56 57 => Top memory using processs/application 58 59 PID %MEM RSS COMMAND 60 `ps aux | awk '{print $2, $4, $6, $11}' | sort -k3rn | head -n 10` 61 62 => Top CPU using process/application 63 `top b -n1 | head -17 | tail -11` 64 65 ********************************************************************* 66 Disk Usage - > Threshold < 90 Normal > 90% Caution > 95 Unhealthy 67 ********************************************************************* 68 " 69 df -Pkh | grep -v 'Filesystem' > /tmp/df.status 70 while read DISK 71 do 72 LINE=`echo $DISK | awk '{print $1," ",$6," ",$5," used"," ",$4," free space"}'` 73 echo -e $LINE 74 echo 75 done < /tmp/df.status 76 echo -e " 77 78 Heath Status" 79 echo 80 while read DISK 81 do 82 USAGE=`echo $DISK | awk '{print $5}' | cut -f1 -d%` 83 if [ $USAGE -ge 95 ] 84 then 85 STATUS='Unhealty' 86 elif [ $USAGE -ge 90 ] 87 then 88 STATUS='Caution' 89 else 90 STATUS='Normal' 91 fi 92 93 LINE=`echo $DISK | awk '{print $1," ",$6}'` 94 echo -ne $LINE " " $STATUS 95 echo 96 done < /tmp/df.status 97 rm /tmp/df.status 98 TOTALMEM=`free -m | head -2 | tail -1| awk '{print $2}'` 99 TOTALBC=`echo "scale=2;if($TOTALMEM<1024 && $TOTALMEM > 0) print 0;$TOTALMEM/1024"| bc -l` 100 USEDMEM=`free -m | head -2 | tail -1| awk '{print $3}'` 101 USEDBC=`echo "scale=2;if($USEDMEM<1024 && $USEDMEM > 0) print 0;$USEDMEM/1024"|bc -l` 102 FREEMEM=`free -m | head -2 | tail -1| awk '{print $4}'` 103 FREEBC=`echo "scale=2;if($FREEMEM<1024 && $FREEMEM > 0) print 0;$FREEMEM/1024"|bc -l` 104 TOTALSWAP=`free -m | tail -1| awk '{print $2}'` 105 TOTALSBC=`echo "scale=2;if($TOTALSWAP<1024 && $TOTALSWAP > 0) print 0;$TOTALSWAP/1024"| bc -l` 106 USEDSWAP=`free -m | tail -1| awk '{print $3}'` 107 USEDSBC=`echo "scale=2;if($USEDSWAP<1024 && $USEDSWAP > 0) print 0;$USEDSWAP/1024"|bc -l` 108 FREESWAP=`free -m | tail -1| awk '{print $4}'` 109 FREESBC=`echo "scale=2;if($FREESWAP<1024 && $FREESWAP > 0) print 0;$FREESWAP/1024"|bc -l` 110 111 echo -e " 112 ********************************************************************* 113 Memory 114 ********************************************************************* 115 116 => Physical Memory 117 118 Total Used Free %Free 119 120 ${TOTALBC}GB ${USEDBC}GB ${FREEBC}GB $(($FREEMEM * 100 / $TOTALMEM ))% 121 122 => Swap Memory 123 124 Total Used Free %Free 125 126 ${TOTALSBC}GB ${USEDSBC}GB ${FREESBC}GB $(($FREESWAP * 100 / $TOTALSWAP ))% 127 " 128 } 129 FILENAME="health-`hostname`-`date +%y%m%d`-`date +%H%M`.txt" 130 sysstat > $FILENAME 131 echo -e "Reported file $FILENAME generated in current directory." $RESULT 132 if [ "$EMAIL" != '' ] 133 then 134 STATUS=`which mail` 135 if [ "$?" != 0 ] 136 then 137 echo "The program 'mail' is currently not installed." 138 else 139 cat $FILENAME | mail -s "$FILENAME" $EMAIL 140 fi 141 fi