• 并行执行hive脚本


     1 ### 模板脚本存放路径(无需修改)
     2 cd /tmp/fix_data/tmp_wjj_20180322_01
     3 ### 脚本名称
     4 script=tmp_wjj_20180322_01
     5 ### 开始日期(包括当月/天)
     6 etl_dt_start='2017-09-01'
     7 ### 结束日期(不包括当月/天)
     8 etl_dt_end='2016-12-01'
     9 ### 并发数(请勿设置高于10)
    10 thread_num=3
    11 ### task数量(设置spark_sql的task数量,如果数据量过亿,可适量调高)
    12 partitions=150
    13 ### 以下无需修改
    14 thread_no=0
    15 spark="beeline -u jdbc:hive2://ip:port -n username -p password --verbose=true"
    16 hive="hive -v"
    17 excute=${spark}
    18 ### 删除执行脚本内容
    19 cat /dev/null > run_sql_${script}
    20 ### to do: 根据进程数进行判断
    21 while [[ ${etl_dt_start} > ${etl_dt_end} ]]
    22 do
    23     echo ${etl_dt_start}
    24     etl_dt_start_str=`echo ${etl_dt_start}|sed 's/-/_/g'`
    25     echo "select 'job_start' as flag,'${etl_dt_start}' as num,current_timestamp() as time;"    > ${script}_${etl_dt_start_str}
    26     echo "set spark.sql.shuffle.partitions=${partitions};"                                    >> ${script}_${etl_dt_start_str}
    27     echo "use db_name;"                                                                       >> ${script}_${etl_dt_start_str}
    28     echo "set mapred.job.queue.name=queue_name;"                                              >> ${script}_${etl_dt_start_str}
    29     echo "set hive.exec.dynamic.partition=true;"                                              >> ${script}_${etl_dt_start_str}
    30     echo "set hive.exec.dynamic.partition.mode=nonstrict;"                                    >> ${script}_${etl_dt_start_str}
    31     sed "s/2017-10-01/${etl_dt_start}/g" ${script}                                            >> ${script}_${etl_dt_start_str}
    32     echo "select 'job_finish' as flag,'${etl_dt_start}' as num,current_timestamp() as time;"  >> ${script}_${etl_dt_start_str}
    33 
    34     thread_no=$((${thread_no}+1))
    35     echo "${excute} -f ${script}_${etl_dt_start_str} > ${script}_${etl_dt_start_str}_log 2>&1 &" >>run_sql_${script}
    36         if [[ $((${thread_no}%${thread_num})) == 0 ]]
    37     then
    38         echo "wait" >>run_sql_${script}
    39     fi
    40     ### etl_dt_start=`date -d "+1 days ${etl_dt_start}" +%Y-%m-%d`
    41     etl_dt_start=`date -d "-1 months ${etl_dt_start}" +%Y-%m-%d`
    42 done
    43 
    44 ### 执行初始化
    45 sh run_sql_${script}
    46 
    47 ### 查看执行时间
    48 # grep -E "job_start|job_finish" ${script}_*_log|grep -v select
  • 相关阅读:
    创建pdf
    IOS绘图
    IOS断点续传
    IOS程序之间的跳转
    MBProgressHUD的使用
    清除缓存的方法(计算)
    使用post请求下载数据
    NSTimer的使用
    定位功能(使用系统地图)
    fork仓库保持同步更新
  • 原文地址:https://www.cnblogs.com/chenzechao/p/9366409.html
Copyright © 2020-2023  润新知