• Shell脚本运行hive语句 | hive以日期建立分区表 | linux schedule程序 | sed替换文件字符串 | shell推断hdfs文件文件夹是否存在


    #!/bin/bash
    source /etc/profile;
    
    ##################################################
    # Author: ouyangyewei                            #
    #                                                #
    # Content: Combineorder Algorithm                #
    ##################################################
    
    # change workspace to here
    cd /
    cd /home/deploy/recsys/algorithm/schedule/project/combineorder
    
    # generate product_sell data
    yesterday=$(date -d '-1 day' '+%Y-%m-%d')
    lastweek=$(date -d '-1 week' '+%Y-%m-%d')
    
    /usr/local/cloud/hive/bin/hive<<EOF 
    CREATE EXTERNAL TABLE IF NOT EXISTS product_sell(
    category_id bigint,
    province_id bigint,
    product_id bigint,
    price double,
    sell_num bigint
    )
    PARTITIONED BY (ds string)
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY '	'
    LINES TERMINATED BY '
    '
    STORED AS TEXTFILE;
    
    INSERT OVERWRITE TABLE product_sell PARTITION (ds='$yesterday') select a.category_id, b.good_receiver_province_id as province_id, a.id as product_id, (b.sell_amount/b.sell_num) as price, b.sell_num from product a join (select si.product_id, s.good_receiver_province_id, sum(si.order_item_amount) sell_amount, sum(si.order_item_num) sell_num from so_item si join so s on (si.order_id=s.id) where si.is_gift=0 and si.is_hidden=0 and si.ds between '$lastweek' and '$yesterday' group by s.good_receiver_province_id, si.product_id) b on (a.id=b.product_id);
    EOF
    
    # generate yhd_gmv_month data
    yesterday=$(date -d '-1 day' '+%Y-%m-%d')
    lastmonth=$(date -d '-1 month' '+%Y-%m-%d')
    
    /usr/local/cloud/hive/bin/hive<<EOF 
    CREATE EXTERNAL TABLE IF NOT EXISTS yhd_gmv_month(
    province_id bigint,
    price_area int,
    product_id bigint,
    sell_num bigint
    )
    PARTITIONED BY (ds string)
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY '	'
    LINES TERMINATED BY '
    '
    STORED AS TEXTFILE;
    
    INSERT OVERWRITE TABLE yhd_gmv_month PARTITION (ds='$yesterday') select ssi.province_id, (case when price>0.0 and price<=10.0 then 0 when price>10.0 and price<=20.0 then 1 when price>20.0 and price<=30.0 then 2 when price>30.0 then 3 else -1 end) as price_area, ssi.product_id, ssi.sell_num from (select s.good_receiver_province_id as province_id, si.product_id, sum(si.order_item_num) as sell_num, sum(si.order_item_amount)/sum(si.order_item_num) as price from so_item si join so s on (si.order_id=s.id) where si.is_hidden=0 and si.is_gift=0 and si.ds between '$lastmonth' and '$yesterday' group by s.good_receiver_province_id, si.product_id) ssi;
    EOF
    
    # execute the combineorder algorithm job
    cd /
    cd /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/pms_category_rec_prod
    hadoop jar /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/recommender-dm-1.0-SNAPSHOT.jar com.yhd.recommender.combineorder.schedule.CombineorderRecommendScheduler
    
    # export "pms_category_rec_prod" data to mysql
    cd /
    cd /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/pms_category_rec_prod
    hadoop jar /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/recommender-dm-1.0-SNAPSHOT.jar com.yhd.recommender.exporter.db.HdfsToDBProcessor
    
    # check "yhd_gmv_month" is exist
    yesterday=$(date -d '-1 day' '+%Y-%m-%d')
    hadoop fs -test -e /user/hive/warehouse/yhd_gmv_month/ds=2014-08-27
    if [ $? -ne 0 ] ;then
    	echo 'Error! Directory is not exist'
    else
    # auto modify date time
    oldestVersionDay=$(date -d '-3 day' '+%Y-%m-%d')
    olderVersionDay=$(date -d '-2 day' '+%Y-%m-%d')
    newVersionDay=$(date -d '-1 day' '+%Y-%m-%d')
    
    sed -r -i '{s/oldestVersion=/user/hive/warehouse/yhd_gmv_month/ds=.*/oldestVersion=/user/hive/warehouse/yhd_gmv_month/ds='"${oldestVersionDay}"'/}' /home/deploy/recsys/algorithm/schedule/verifaction/combineorder/yhd_gmv_month/input/verification.properties
    sed -r -i '{s/olderVersion=/user/hive/warehouse/yhd_gmv_month/ds=.*/olderVersion=/user/hive/warehouse/yhd_gmv_month/ds='"${olderVersionDay}"'/}' /home/deploy/recsys/algorithm/schedule/verifaction/combineorder/yhd_gmv_month/input/verification.properties
    sed -r -i '{s/newVersion=/user/hive/warehouse/yhd_gmv_month/ds=.*/newVersion=/user/hive/warehouse/yhd_gmv_month/ds='"${newVersionDay}"'/}' /home/deploy/recsys/algorithm/schedule/verifaction/combineorder/yhd_gmv_month/input/verification.properties
    
    # export "yhd_gmv_month" data to mysql
    cd /
    cd /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/yhd_gmv_month
    
    hadoop jar /home/deploy/recsys/algorithm/schedule/project/combineorder/schedule/recommender-dm-1.0-SNAPSHOT.jar com.yhd.recommender.exporter.db.HdfsToDBProcessor
    fi
    

  • 相关阅读:
    J
    I
    uva122 二叉树的实现和层次遍历(bfs)
    A
    HDU 波峰
    2239: 童年的圣诞树
    1734: 堆(DFS)
    1731: 矩阵(前缀和)
    1733: 旋转图像(模拟)
    1728: 社交网络(概率问题 组合数/排列数)
  • 原文地址:https://www.cnblogs.com/blfshiye/p/5127582.html
Copyright © 2020-2023  润新知