#!/bin/bash
# Program:
# Automatic inspection operation system and oracle database.
# History:
# 2016/05/01 Liang Guojun First Release
#
#==============================================================================
# define variable
#==============================================================================
PATH=$PATH:$HOME/bin
export PATH
export ORACLE_BASE=/u01/app/oracle
export ORACLE_HOME=$ORACLE_BASE/product/11.2.0/db_1
export ORACLE_SID=orcl
export PATH=$ORACLE_HOME/bin:$ORACLE_HOME/OPatch:$PATH
export NLS_LANG=AMERICAN_AMERICA.AL32UTF8
#define ora_healthcheck base directory
ORAH_HOME=/home/oracle/ora_healthcheck
#define Oracle SID
ORAH_DB_SID=orcl
#define tablespace rate
ORAH_TBS_RATE=80
#define alert log size,unit:KB
ORAH_ALERT_SIZE=2097152
ORAH_HOSTNAME=onlinedb
ORAH_BIN=$ORAH_HOME/bin
ORAH_LOG=$ORAH_HOME/logs
ORAH_TMP=$ORAH_HOME/tmp
DATE=`date "+%Y%m%d-%H%M%S"`
rm -f $ORAH_TMP/*
#==============================================================================
# judge oracle SMON process status
#==============================================================================
ps_smon_ori=`ps -ef|grep smon | grep -v grep|grep -v root|grep -v asm|grep -v grid|awk '{print $NF}'`
ps_smon_sid=ora_smon_${ORAH_DB_SID}
if [ "${ps_smon_ori}" = "${ps_smon_sid}" ];then
db_is_down_smon=' '
else
db_is_down_smon='SEND'
fi
#==============================================================================
# judge instance open status
#==============================================================================
instance_status_ori=`sqlplus -S / as sysdba <<EOF
set pagesize 999
set linesize 180
col value format a100
select status from v\$instance;
exit;
EOF`
instance_status=`echo ${instance_status_ori}|awk -F " " '{print $3}'`
if [ "${instance_status}" = "OPEN" ];then
db_is_down_status=' '
else
db_is_down_status='SEND'
fi
if [[ "${db_is_down_smom}" = "SEND" ]] || [[ "${db_is_down_status}" = "SEND" ]];then
echo "database ${ORAH_HOSTNAME},already Down!" | mutt -s "database ${ORAH_HOSTNAME},already Down!" test@139.com
else
echo "database status is OK!"
fi
#==============================================================================
# execute database tablespace free rate sql
#==============================================================================
sqlplus -S / as sysdba <<EOF > $ORAH_TMP/db_tbs.log
set linesize 160
set pagesize 999
col exclude for a18
col TBS_TOTAL_MB for 9999999
col TBS_USED_MB for 9999999
col TBS_FREE_MB for 9999999
col TBS_RATE for a11
col EXTEND_MAX_MB for 9999999
col EXTEND_FREE_MB for 9999999
col EXTEND_RATE for a11
select a.tablespace_name "excludethis",
round(current_size / 1024 / 1024, 1) TBS_TOTAL_MB,
round((current_size - b.free_bytes) / 1024 / 1024, 1) TBS_USED_MB,
round(b.free_bytes / 1024 / 1024, 1) TBS_FREE_MB,
round(((current_size - b.free_bytes) / current_size) * 100) || '%' TBS_RATE,
round(a.max_size / 1024 / 1024, 1) EXTEND_MAX_MB,
round((a.max_size - (current_size - b.free_bytes)) / 1024 / 1024) EXTEND_FREE_MB,
round(((current_size - b.free_bytes) / a.max_size) * 100) || '%' EXTEND_RATE
from (select tablespace_name,
sum(ddf.bytes) current_size,
sum(case
when ddf.autoextensible = 'YES' THEN
DDF.MAXBYTES
ELSE
DDF.BYTES
END) max_size
from dba_data_files ddf
group by tablespace_name
union
select tablespace_name,
sum(ddf.bytes) current_size,
sum(case
when ddf.autoextensible = 'YES' THEN
DDF.MAXBYTES
ELSE
DDF.BYTES
END) max_size
from dba_temp_files ddf
group by tablespace_name) a,
(select dfs.tablespace_name, sum(dfs.bytes) free_bytes
from dba_free_space dfs
group by dfs.tablespace_name
union
select tfs.tablespace_name, sum(tfs.BYTES_FREE) free_bytes
from v$TEMP_SPACE_HEADER tfs
group by tfs.tablespace_name) b
where a.tablespace_name = b.tablespace_name(+);
exit;
EOF
#==============================================================================
# judge database tablespace free rate
#==============================================================================
grep -v "excludethis" $ORAH_TMP/db_tbs.log|grep -v "---"|grep -v "rows selected">$ORAH_TMP/db_tbs_ori.log
sed -i '/^[[:space:]]*$/d' $ORAH_TMP/db_tbs_ori.log
TBS_RATE=`awk '{print $8}' $ORAH_TMP/db_tbs_ori.log|sed -e "s/%//g"`
TBS_RATE_LENGTH=`cat $ORAH_TMP/db_tbs_ori.log|wc -l`
for ((i=1;i<=${TBS_RATE_LENGTH};i++));do
TBS_RATE_SIZE=`awk '{print $8}' $ORAH_TMP/db_tbs_ori.log|sed -e "s/%//g"|sed -n ${i}p`
TBS_NAME=`awk '{print $1}' $ORAH_TMP/db_tbs_ori.log|sed -e "s/%//g"|sed -n ${i}p`
if [ ${TBS_RATE_SIZE} -gt ${ORAH_TBS_RATE} ];then
sed -n ${i}p $ORAH_TMP/db_tbs_ori.log>>$ORAH_TMP/db_tbs_rate_name.log
else
echo " "
fi
done;
TBS_RATE_NAME=`cat $ORAH_TMP/db_tbs_rate_name.log`
if [ -z "${TBS_RATE_NAME}" ];then
echo "tablespace is OK!"
else
echo "database ${ORAH_HOSTNAME},tablespace(${TBS_NAME}) if full" | mutt -s "database ${ORAH_HOSTNAME},tablespace(${TBS_NAME}) if full" test@139.com
fi
#==============================================================================
# get alert path
#==============================================================================
alertpath_ori=`sqlplus -S / as sysdba <<EOF
set pagesize 999
set linesize 180
col value format a100
select value from v\$parameter where name='background_dump_dest';
exit;
EOF`
#==============================================================================
# judge alert size if more then 2G backup and touch new
#==============================================================================
alertpath=`echo $alertpath_ori|awk -F " " '{print $3}'`
alert_size=`du -k $alertpath/alert_${ORAH_DB_SID}.log |awk -F " " '{print $1}'`
alert_2g=' '
if [ ${alert_size} -gt ${ORAH_ALERT_SIZE} ];then
mv $alertpath/alert_${ORAH_DB_SID}.log $alertpath/alert_${ORAH_DB_SID}_${DATE}.log && touch $alertpath/alert_$ORAH_DB_SID.log
alert_2g='YES'
else
echo "alert size less then 2G"
fi
#==============================================================================
# judge alert log
#==============================================================================
if_alertout_null=$ORAH_LOG/alert_out.log
if [ "${alert_2g}" = "YES" ];then
cat $alertpath/alert_$ORAH_DB_SID.log> $ORAH_LOG/alert_out.log
lastnum=$(grep -n ".*" $alertpath/alert_$ORAH_DB_SID.log |tail -1|awk -F ":" '{print $1}')
echo ${lastnum} >$ORAH_LOG/alert_num.log
else
if [ ! -s "${if_alertout_null}" ];then
tail -n 100 $alertpath/alert_$ORAH_DB_SID.log> $ORAH_LOG/alert_out.log
lastnum=$(grep -n ".*" $alertpath/alert_$ORAH_DB_SID.log |tail -1|awk -F ":" '{print $1}')
echo ${lastnum} >$ORAH_LOG/alert_num.log
else
firstnum=`cat $ORAH_LOG/alert_num.log`
firstnum=`echo "scale=0;$firstnum+1"|bc`
lastnum=$(grep -n ".*" $alertpath/alert_$ORAH_DB_SID.log |tail -1|awk -F ":" '{print $1}')
echo ${lastnum} >$ORAH_LOG/alert_num.log
sed -n ''"$firstnum","$lastnum"'p' $alertpath/alert_$ORAH_DB_SID.log >$ORAH_LOG/alert_out.log
fi
fi
if_alert_null=`egrep 'ORA-|Heartbeat failed|Deadlock' $ORAH_LOG/alert_out.log`
if [ -z "${if_alert_null}" ];then
echo "alert log is OK!"
else
echo "database ${ORAH_HOSTNAME},alert log has problem" | mutt -s "database ${ORAH_HOSTNAME},alert log has problem" test@139.com
fi
#==============================================================================
# judge CPU
#==============================================================================
cpuuse=`top -b -n2 -p 1 | fgrep "Cpu(s)" | tail -1 | awk -F'id,' -v prefix="$prefix" '{ split($1, vs, ","); v=vs[length(vs)]; sub("%", "", v); printf "%s%.1f%%
", prefix, 100 - v }' |tr -d '%'`
cpunum=`awk -v num1=90 -v num2=$cpuuse 'BEGIN{print(num1>num2)?"0":"1"}'`
if [ $cpunum -eq 0 ];then
echo "CPU is Normal!"
else
echo "host ${ORAH_HOSTNAME},lack of CPU!" | mutt -s "host ${ORAH_HOSTNAME},lack of CPU!" test@139.com
fi
#==============================================================================
# judge Disk
#==============================================================================
diskuse=$(df -hP|grep -v "Filesystem" | grep -c -E (9[0-9]\%)|(100\%))
if [ "${diskuse}" -gt 0 ];
then
echo "host ${ORAH_HOSTNAME},disk has full!" | mutt -s "host ${ORAH_HOSTNAME},disk has full!" test@139.com
else
echo "Disk is Normal!"
fi
#==============================================================================
# judge Memory
#==============================================================================
memuse=`free -m|grep Mem|awk '{print ($3-$6-$7)/$2}'`
memnum=`awk -v num1=$memuse -v num2=0.01 'BEGIN{print(num1>num2)?"0":"1"}'`
if [ $memnum -eq 0 ];then
echo "memory is Normal!"
else
echo "host ${ORAH_HOSTNAME},lack of memory!" | mutt -s "host ${ORAH_HOSTNAME},lack of memory!" test@139.com
fi