#!/bin/bash
hadoop_home=/opt/hadoop-2.4.0
tw_nginx_log_file=/home/chiline.com.all/access_com_tw.log
cn_nginx_log_file=/home/chiline.com.all/access_com_cn.log
current_date=$(date +%Y%m%d)
hdfs_url=hdfs://xx.xx.xx.xx:9100
analyse_jar_path=$hadoop_home/ianc
echo "hadoop_home = $hadoop_home"
echo "tw_nginx_log_file = $tw_nginx_log_file"
echo "cn_nginx_log_file = $cn_nginx_log_file"
echo "hdfs_url = $hdfs_url"
echo "analyse_jar_path = $hadoop_home/ianc"
function putTodayLogToHdfs(){
$hadoop_home/bin/hdfs dfs -rm -r $hdfs_url/user/day-$(addDate $current_date 1)
$hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)
$hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)/tw-log
$hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)/cn-log
$hadoop_home/bin/hdfs dfs -put $tw_nginx_log_file $hdfs_url/user/day-$(addDate $current_date 1)/tw-log
$hadoop_home/bin/hdfs dfs -put $cn_nginx_log_file $hdfs_url/user/day-$(addDate $current_date 1)/cn-log
}
function addDate(){
str=$1
days=$2
yy=`echo $str|cut -c 1-4`
mm=`echo $str|cut -c 5-6`
dd=`echo $str|cut -c 7-8`
sav_dd=$days
days=`expr $days - $dd`
while [ $days -ge 0 ]
do
mm=`expr $mm - 1`
[ $mm -eq 0 ] && mm=12 && yy=`expr $yy - 1`
aaa=`cal $mm $yy`
bbb=`echo $aaa|awk '{print $NF}'`
days=`expr $days - $bbb`
done
dd=`expr 0 - $days`
expr $dd : "^.$" > /dev/null && dd=0$dd
expr $mm : "^.$" > /dev/null && mm=0$mm
echo $yy$mm$dd
return $yy$mmSdd
}
function getLogFileName(){
logFileName=$1
IFS='/' arr=($logFileName)
arr_length=${#arr[@]}
echo ${arr[$arr_length-1]}
return ${arr[$arr_length-1]}
}
function removeLastWeekLog(){
remove_date=$(addDate $current_date 7)
echo "start remove history log file,remove_date is $remove_date"
$hadoop_home/bin/hdfs dfs -rm -r $hdfs_url/user/day-$remove_date
}
function analyseTodayLog(){
#tw_log_file_name=getLogFileName $tw_nginx_log_file
tw_log_file_name=`basename $tw_nginx_log_file`
#cn_log_file_name=getLogFileName $cn_nginx_log_file
cn_log_file_name=`basename $cn_nginx_log_file`
tw_log_file=$hdfs_url/user/day-$(addDate $current_date 1)/tw-log/$tw_log_file_name
cn_log_file=$hdfs_url/user/day-$(addDate $current_date 1)/cn-log/$cn_log_file_name
analyse_path=$hdfs_url/user/day-$(addDate $current_date 1)/analyse
#pv analyse
$hadoop_home/bin/hadoop jar $analyse_jar_path/pvanalyse-1.0.jar $tw_log_file $analyse_path/tw-pv
$hadoop_home/bin/hadoop jar $analyse_jar_path/pvanalyse-1.0.jar $cn_log_file $analyse_path/cn-pv
#time analyse
$hadoop_home/bin/hadoop jar $analyse_jar_path/timeanalyse-1.0.jar $tw_log_file $analyse_path/tw-time
$hadoop_home/bin/hadoop jar $analyse_jar_path/timeanalyse-1.0.jar $cn_log_file $analyse_path/cn-time
#area analyse
$hadoop_home/bin/hadoop jar $analyse_jar_path/locationanalyse-1.0.jar $tw_log_file $analyse_path/tw-location
$hadoop_home/bin/hadoop jar $analyse_jar_path/locationanalyse-1.0.jar $cn_log_file $analyse_path/cn-location
}
echo "start put local log to hdfs"
putTodayLogToHdfs;
echo "start analyse today log"
analyseTodayLog;
echo "remove last week log"
removeLastWeekLog;
hadoop_home=/opt/hadoop-2.4.0
tw_nginx_log_file=/home/chiline.com.all/access_com_tw.log
cn_nginx_log_file=/home/chiline.com.all/access_com_cn.log
current_date=$(date +%Y%m%d)
hdfs_url=hdfs://xx.xx.xx.xx:9100
analyse_jar_path=$hadoop_home/ianc
echo "hadoop_home = $hadoop_home"
echo "tw_nginx_log_file = $tw_nginx_log_file"
echo "cn_nginx_log_file = $cn_nginx_log_file"
echo "hdfs_url = $hdfs_url"
echo "analyse_jar_path = $hadoop_home/ianc"
function putTodayLogToHdfs(){
$hadoop_home/bin/hdfs dfs -rm -r $hdfs_url/user/day-$(addDate $current_date 1)
$hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)
$hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)/tw-log
$hadoop_home/bin/hdfs dfs -mkdir $hdfs_url/user/day-$(addDate $current_date 1)/cn-log
$hadoop_home/bin/hdfs dfs -put $tw_nginx_log_file $hdfs_url/user/day-$(addDate $current_date 1)/tw-log
$hadoop_home/bin/hdfs dfs -put $cn_nginx_log_file $hdfs_url/user/day-$(addDate $current_date 1)/cn-log
}
function addDate(){
str=$1
days=$2
yy=`echo $str|cut -c 1-4`
mm=`echo $str|cut -c 5-6`
dd=`echo $str|cut -c 7-8`
sav_dd=$days
days=`expr $days - $dd`
while [ $days -ge 0 ]
do
mm=`expr $mm - 1`
[ $mm -eq 0 ] && mm=12 && yy=`expr $yy - 1`
aaa=`cal $mm $yy`
bbb=`echo $aaa|awk '{print $NF}'`
days=`expr $days - $bbb`
done
dd=`expr 0 - $days`
expr $dd : "^.$" > /dev/null && dd=0$dd
expr $mm : "^.$" > /dev/null && mm=0$mm
echo $yy$mm$dd
return $yy$mmSdd
}
function getLogFileName(){
logFileName=$1
IFS='/' arr=($logFileName)
arr_length=${#arr[@]}
echo ${arr[$arr_length-1]}
return ${arr[$arr_length-1]}
}
function removeLastWeekLog(){
remove_date=$(addDate $current_date 7)
echo "start remove history log file,remove_date is $remove_date"
$hadoop_home/bin/hdfs dfs -rm -r $hdfs_url/user/day-$remove_date
}
function analyseTodayLog(){
#tw_log_file_name=getLogFileName $tw_nginx_log_file
tw_log_file_name=`basename $tw_nginx_log_file`
#cn_log_file_name=getLogFileName $cn_nginx_log_file
cn_log_file_name=`basename $cn_nginx_log_file`
tw_log_file=$hdfs_url/user/day-$(addDate $current_date 1)/tw-log/$tw_log_file_name
cn_log_file=$hdfs_url/user/day-$(addDate $current_date 1)/cn-log/$cn_log_file_name
analyse_path=$hdfs_url/user/day-$(addDate $current_date 1)/analyse
#pv analyse
$hadoop_home/bin/hadoop jar $analyse_jar_path/pvanalyse-1.0.jar $tw_log_file $analyse_path/tw-pv
$hadoop_home/bin/hadoop jar $analyse_jar_path/pvanalyse-1.0.jar $cn_log_file $analyse_path/cn-pv
#time analyse
$hadoop_home/bin/hadoop jar $analyse_jar_path/timeanalyse-1.0.jar $tw_log_file $analyse_path/tw-time
$hadoop_home/bin/hadoop jar $analyse_jar_path/timeanalyse-1.0.jar $cn_log_file $analyse_path/cn-time
#area analyse
$hadoop_home/bin/hadoop jar $analyse_jar_path/locationanalyse-1.0.jar $tw_log_file $analyse_path/tw-location
$hadoop_home/bin/hadoop jar $analyse_jar_path/locationanalyse-1.0.jar $cn_log_file $analyse_path/cn-location
}
echo "start put local log to hdfs"
putTodayLogToHdfs;
echo "start analyse today log"
analyseTodayLog;
echo "remove last week log"
removeLastWeekLog;