• Linux备份-删除指定日期内文件


    #!/usr/bin/env bash


    source /etc/profile


    echo " *************** start filter ***************  "

    # get befor six month last day

    #m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m0}

    #m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m1}

    #m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m2}

    #m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m3}

    #m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m4}

    #m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m5}

    #m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m6}


    # 取得当前月的最后一天,访问数组长度:${#m[*]} + ${#m[@]}

    m[0]=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)

    echo m0 : ${m[0]} ' month : ' ${#m[@]}

    for n in $(seq 0 11); do

        m[$n+1]=$(date -d "$(date -d ${m[$n]} +%Y%m01) -1 day" +%Y%m%d)

        echo m$[$n+1] : ${m[$n+1]} ' month : ' ${#m[*]};

    done


    echo " ****** time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "


    max_date=0

    # get the latest file and copy to hdfs

    cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

    for dir in $(ls -l ./ | awk '/^d/{print $NF}')

    do

       if [[ -d $dir && $dir == *\_* ]]; then

          f_d=$(echo $dir | cut -d \_ -f 3 | cut -d . -f 1)

          if [[ $max_date < $f_d ]]; then

            max_date=$f_d

            max_filter=$dir

          fi

       fi

    done

    echo " max date is : "$max_date

    echo " max filter is : "$max_filter

    pwd

    # 复制最近日期的filter文件到hdfs

    hadoop fs -test -e /data/datacenter/run_center_spark_stream/bloom_filters/$max_filter

    if [[ $? == 0 ]]; then

        echo " filter is already exist : "$max_filter

    else

        echo " start hdfs copy "

        echo " ****** start time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "

        hadoop fs -put $max_filter /data/datacenter/run_center_spark_stream/bloom_filters

        echo " ****** end time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "

    fi


    remove_week=$(date -d "$max_date 7 days ago" +%Y%m%d)

    echo " 删除本地序列化文件的日期界限:"$remove_week

    remove_date=$(date -d "$max_date 30 days ago" +%Y%m%d)

    echo " 删除文件 和 Hadoop filter 的日期界限:"$remove_date


    echo " *************** start remove filter ***************  "

    for r_dir in $(ls -l ./ | awk '/^d/{print $NF}')

    do

       if [[ -d $r_dir && $r_dir == *\_* ]]; then

          r_d=$(echo $r_dir | cut -d \_ -f 3 | cut -d . -f 1)

          if [[ $r_d < $remove_date ]]; then

              if [[ ${m[*]} == *$r_d* ]]; then

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

                  pwd

                  for f_dir in $(ls *)

                  do

                     if [[ "$f_dir" == "mau_device_all.FILTER.SER" ]]; then

                        echo " ------ keep mau_filter is: " $f_dir;

                     else

                        echo " remove file is: " $f_dir;

                        rm -r $f_dir

                     fi

                  done

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

                  pwd

              else

                  echo " remove filter_dir is: "$r_dir

                  rm -r $r_dir

              fi

          elif [[ $r_d < $remove_week ]]; then

              if [[ $r_d == $m0 || $r_d == $m1 || $r_d == $m2 ]]; then

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

                  pwd

                  for f_dir in $(ls *)

                  do

                     if [[ "$f_dir" == "mau_device_all.FILTER.SER" ]]; then

                        echo " ------ week keep mau_filter is: " $f_dir;

                     else

                        if [[ "$f_dir" == *.FILTER.SER ]]; then

                            echo " - last day of month - week remove file is: " $f_dir;

                            rm -r $f_dir

                        fi

                     fi

                  done

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

                  pwd

              else

                  echo "week remove filter is: "$r_dir

                  rm -r $r_dir/*.FILTER.SER

              fi

          fi

       fi

    done


    echo " =============== start remove hdfs filter ===============  "

    # 删除hdfs上指定日期外的tdid

    for h_filter in $(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk '{print $8}')

    do

        if [[ $h_filter == *\_* ]]; then

            h_date=$(echo $h_filter | cut -d / -f 6 | cut -d \_ -f 3 | cut -d . -f 1)

    #        echo " hdfs date : "$h_date

    #        echo " hdfs filter : "$h_filter

            if [[ ${m[*]} == *$h_date* ]]; then

                echo " remain hdfs filter is : "$h_filter

            elif [[ $h_date < $remove_date ]]; then

                echo "not remain date is : "$h_date

                echo "remove hdfs filter is : "$h_filter

                hadoop fs -rmr $h_filter

            fi

        fi

    done


    echo " -------------- start tdid ---------------  "

    # 删除小于30天的tdid

    cd /home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo

    for tdid in $(ls *)

    do

        if [[ $tdid == *\_* ]]; then

            t_d=$(echo $tdid | cut -d \_ -f 2 | cut -d . -f 1)

            if [[ $t_d == $max_date || $t_d > $max_date ]]; then

                echo " need copy date : "$t_d

                echo " need copy tdid : "$tdid

                # 检查tdid是否存在

    #            hadoop fs -test -e jiaojiao/tdid/$tdid

    #            if [[ $? == 0 ]]; then

    #                echo " tdid is already exist,remove it first "

    #                hadoop fs -rm jiaojiao/tdid/$tdid

    #                hadoop fs -put $tdid jiaojiao/tdid

    #            else

    #                echo " start copy "

    #                hadoop fs -put $tdid jiaojiao/tdid

    #            fi

            elif [[ $t_d < $remove_date ]]; then

                echo " remove tdid : "$tdid

                rm $tdid

            fi

        fi

    done


    #echo " =============== start remove hdfs tdid ===============  "

    #for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}')

    #do

    #    if [[ $h_tdid == *\_* ]]; then

    #        h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d . -f 1)

    #        echo $h_date

    #        echo $h_tdid

    #    fi

    #done

  • 相关阅读:
    xgqfrms™, xgqfrms® : xgqfrms's offical website of GitHub!
    xgqfrms™, xgqfrms® : xgqfrms's offical website of GitHub!
    详解以太坊世界状态
    VDF 不是工作量证明
    以太坊:Go-Ethereum: 编译运行
    【转】理解分布式账本技术: 经济学视角
    Counterfactual 项目:广义的以太坊状态通道
    Solidity 安全:已知攻击方法和常见防御模式综合列表
    Verge 攻击解析
    以太坊区块链的轻客户端
  • 原文地址:https://www.cnblogs.com/anitinaj/p/10025195.html
Copyright © 2020-2023  润新知