1 #!/bin/bash 2 3 source /etc/profile 4 5 runlog='/tmp/zewei/check_schema_log' 6 hive_database_schema=/tmp/hive_database_schema/hive/ 7 mysql_database_schema=/tmp/hive_database_schema/mysql/ 8 9 > $runlog 10 #每天下午一点删除现有的表结构缓存文件. 11 #防止hive有变动. 12 if [ `date +%k` -eq 13 ] 13 then 14 rm -rf /tmp/hive_database_schema/hive/* 15 fi 16 17 while read DB; do 18 HiveDB=`echo $DB |awk '{print $1}'` 19 MysqlDB=`echo $DB |awk '{print $2}'` 20 MysqlHost=`echo $DB |awk '{print $3}'` 21 MysqlPort=`echo $DB |awk '{print $4}'` 22 PartTag=`echo $DB |awk '{print $5}'` 23 24 connect_mysql="mysql -u TableSchemaCheck -pSchemacheck666 -h $MysqlHost -P $MysqlPort $MysqlDB -BNe" 25 #缓存文件夹不在就创建 26 ls $mysql_database_schema/$MysqlDB > /dev/null || mkdir -p $mysql_database_schema/$MysqlDB 27 ls $hive_database_schema/$HiveDB > /dev/null || mkdir -p $hive_database_schema/$HiveDB 28 29 #通过part标记检查是否为分区库 30 if [ "$PartTag" == 'part' ] 31 then 32 table_list=`hive -S -e "use $HiveDB; show tables;" | grep "_part$"` 33 else 34 table_list=`hive -S -e "use $HiveDB; show tables;"` 35 fi 36 37 #对获取到的tables进行循环检查 38 for table in $table_list 39 do 40 if [ "$PartTag" == 'part' ] 41 then 42 hive_table_name=$table 43 mysql_table_name=`echo $table | sed 's/_part$//'` 44 else 45 hive_table_name=$table 46 mysql_table_name=$table 47 fi 48 49 #获取mysql的表结构 50 $connect_mysql "desc $mysql_table_name;" > /tmp/mysql_column 51 if [ $? -ne 0 ] 52 then 53 continue 54 else 55 #把大写转换为小写.因为hive里面的列名没有大小写之分,所以转换一下 56 cat /tmp/mysql_column | awk '{print $1}' | tr '[A-Z]' '[a-z]' > $mysql_database_schema/$MysqlDB/$mysql_table_name 57 fi 58 59 #如果没有hive的表结构缓存文件就去hive取...然后对分区字段进行删除,因为这对于MySQL的表结构来说是多余的 60 if ! ls $hive_database_schema/$HiveDB/$table > /dev/null 61 then 62 hive -S -e "use $HiveDB; desc ${hive_table_name};" | awk '{print $1 }' > /tmp/hive_column 63 part_column=`sed -n '/#/,$p' /tmp/hive_column | egrep -v '#|^$'` 64 for i in $part_column 65 do 66 sed -i "/${i}/d" /tmp/hive_column 67 done 68 egrep -v "#|^$" /tmp/hive_column | tee $hive_database_schema/$HiveDB/$table 69 fi 70 71 #获取MD5 72 md5ForMysql=`md5sum $mysql_database_schema/$MysqlDB/$mysql_table_name | awk '{print $1}'` 73 md5ForHive=`md5sum $hive_database_schema/$HiveDB/$hive_table_name | awk '{print $1}'` 74 75 #MD5不同就记录日志 76 if [ ! "$md5ForMysql"x == "$md5ForHive"x ]; then 77 echo -e "HiveDB: $HiveDB table: $hive_table_name not equal to MysqlDB table: $mysql_table_name" >> $runlog 78 fi 79 done 80 done < /root/script/DBlist 81 82 #统计并报警.报警阈值在监控里面设置 83 err_line=`wc -l $runlog | awk '{print $1}'` 84 [报警]