• shell编程系列21--文本处理三剑客之awk中数组的用法及模拟生产环境数据统计


    shell编程系列21--文本处理三剑客之awk中数组的用法及模拟生产环境数据统计
    
    shell中的数组的用法:
        shell数组中的下标是从0开始的
    
        array=("Allen" "Mike" "Messi" "Jerry" "Hanmeimei" "Wang")
        打印元素:        echo ${array[2]}
        打印元素个数:        echo ${#array[@]}
        打印某个元素长度:    echo ${#array[3]}
            
        给元素赋值:    array[3]=ui;
        删除元素:    unset array[2];unset array # 删除数组
        分片访问:    echo ${array[@]:1:3}
        元素内容替换:    ${array[@]/e/E} 只替换第一个e;${array[@]//e/E} 替换所有的e
    
        数组的遍历:
            for a in ${array[@]}
            do
                echo $a
            done
    
        awk中数组的用法:
            在awk中,使用数组时,不仅可以使用1.2..n作为数组小标,也可以使用字符串作为数组下标
    
        典型常用例子:
            1、统计主机上所有的tcp链接状态数,按照每个tcp状态分类
                # netstat -an | grep tcp | awk '{arr[$6]++}END{for (i in arr) print i,arr[i]}'
                LAST_ACK 40
                LISTEN 6
                SYN_RECV 105
                ESTABLISHED 3751
                FIN_WAIT1 33
                FIN_WAIT2 9
                CLOSING 3
                TIME_WAIT 2444
    
            2、计算横向数据综合,计算纵向数据总和
    
                Allen    80    90    96    98
                Mike    93    98    92    91
                Zhang    78    76    87    92
                Jerry    86    89    68    92
                Han    85    95    75    90
                Li    78    88    98    100
    
            # 代码如下:
            [root@localhost shell]# awk -f statics.awk student.txt 
            Name                          Yuwen                         Math                          English                       Physical                      total                         
            Allen                         80                            90                            96                            98                            364                           
            Mike                          93                            98                            92                            91                            374                           
            Zhang                         78                            76                            87                            92                            333                           
            Jerry                         86                            89                            68                            92                            335                           
            Han                           85                            95                            75                            90                            345                           
            Li                            78                            88                            98                            100                           364                           
            every_total                   500                           536                           516                           563                           
            [root@localhost shell]# cat statics.awk 
            BEGIN{
                printf "%-30s%-30s%-30s%-30s%-30s%-30s
    ","Name","Yuwen","Math","English","Physical","total"
            }
            {
                total=$2+$3+$4+$5
                yuwen_sum+=$2
                math_sum+=$3
                english_sum+=$4
                physical_sum+=$5
                printf "%-30s%-30d%-30d%-30d%-30d%-30d
    ",$1,$2,$3,$4,$5,total
            }
            END{
                printf "%-30s%-30d%-30d%-30d%-30d
    ","every_total",yuwen_sum,math_sum,english_sum,physical_sum
            }
    
    
    计算字符串的长度:
    [root@localhost shell]# str="test string"
    [root@localhost shell]# echo $str
    test string
    [root@localhost shell]# echo ${#str}
    11
    
    # 修改数组元素
    array=("Allen" "Mike" "Messi" "Jerry" "Hanmeimei" "Wang")
    [root@localhost shell]# array[1]="Jerry"
    [root@localhost shell]# echo ${array[@]}
    Allen Jerry Messi Jerry Hanmeimei Wang
    
    # 删除第3个元素
    [root@localhost shell]# echo ${array[@]}
    Allen Jerry Messi Jerry Hanmeimei Wang
    [root@localhost shell]# 
    [root@localhost shell]# unset array[2];
    [root@localhost shell]# echo ${array[@]}
    Allen Jerry Jerry Hanmeimei Wang
    
    
    # 在数组中删除下标为1的元素,即Mike被删除,再次删除下标为1的元素,发现数组不变,说明数组虽然删除了元素,下标还是不变保存在内存中
    [root@localhost shell]# array=("Allen" "Mike" "Messi" "Jerry" "Hanmeimei" "Wang")
    [root@localhost shell]# unset array[1]
    [root@localhost shell]# echo ${array[*]}
    Allen Messi Jerry Hanmeimei Wang
    [root@localhost shell]# unset array[1]
    [root@localhost shell]# echo ${array[*]}
    Allen Messi Jerry Hanmeimei Wang
    
    # 分片访问,数组为1的开始遍历3个元素
    [root@localhost shell]# array=("Allen" "Mike" "Messi" "Jerry" "Hanmeimei" "Wang")
    [root@localhost shell]# echo ${array[@]:1:3}
    Mike Messi Jerry
    # 1到最后
    [root@localhost shell]# echo ${array[@]:1}
    Mike Messi Jerry Hanmeimei Wang
    
    #替换1个,替换所有
    [root@localhost shell]# echo ${array[@]}
    Allen Mike Messi Jerry Hanmeimei Wang
    [root@localhost shell]# echo ${array[@]/e/E}
    AllEn MikE MEssi JErry HanmEimei Wang
    [root@localhost shell]# echo ${array[@]//e/E}
    AllEn MikE MEssi JErry HanmEimEi Wang
    
    # 遍历数组
    [root@localhost shell]# for a in ${array[@]};do echo $a;done
    Allen
    Mike
    Messi
    Jerry
    Hanmeimei
    Wang
    
    
        计算横向和、纵向和
    
        Allen    80    90    96    98
        Mike    93    98    92    91
        Zhang    78    76    87    92
        Jerry    86    89    68    92
        Han    85    95    75    90
        Li    78    88    98    100
    
        [root@localhost shell]# awk -f stu.awk student.txt 
        Name                Yuwen               Math                English             Physical            total               
        Allen               80                  90                  96                  98                  364                 
        Mike                93                  98                  92                  91                  374                 
        Zhang               78                  76                  87                  92                  333                 
        Jerry               86                  89                  68                  92                  335                 
        Han                 85                  95                  75                  90                  345                 
        Li                  78                  88                  98                  100                 364                 
        every_total         500                 536                 516                 563                 
        [root@localhost shell]# cat stu.awk 
        BEGIN{
            printf "%-20s%-20s%-20s%-20s%-20s%-20s
    ","Name","Yuwen","Math","English","Physical","total"
        }
    
        {    total=$2+$3+$4+$5
            yunwen_sum+=$2
            math_sum+=$3
            english_sum+=$4
            physical_sum+=$5
            printf "%-20s%-20d%-20d%-20d%-20d%-20d
    ",$1,$2,$3,$4,$5,total
        }
        END{
            printf "%-20s%-20d%-20d%-20d%-20d
    ","every_total",yunwen_sum,math_sum,english_sum,physical_sum
        }
    
    
    
    # 模拟生产环境数据脚本
    [root@localhost shell]# cat insert.sh 
    #!/bin/bash
    #
    
    function create_random()
    {
        min=$1
        max=$(($2-$min+1))
        num=$(date +%s%N)
        echo $(($num%$max+$min))
    }
    
    INDEX=1
    
    while true
    do
        for user in Allen Mike Jerry Tracy Hanmeimei Lilei
        do
            COUNT=$RANDOM
            NUM1=`create_random 1 $COUNT`
            NUM2=`expr $COUNT - $NUM1`
            echo "`date '+%Y-%m-%d %H:%M:%S'` $INDEX Batches: user:$user insert $COUNT records into datebase:product table:detail, insert $NUM1 records successfully,
            failed $NUM2 records" >> ./db.log.`date +%Y%m%d`
            INDEX=`expr $INDEX + 1`
        done
    done
    
    
    数据格式如下:
    db.log.20190608
    
    2019-06-08 10:31:40 15459 Batches: user Jerry insert 5504 records into datebase:product table:detail, insert 5253 records successfully,failed 251 records
    2019-06-08 10:31:40 15460 Batches: user Tracy insert 25114 records into datebase:product table:detail, insert 13340 records successfully,failed 11774 records
    2019-06-08 10:31:40 15461 Batches: user Hanmeimei insert 13840 records into datebase:product table:detail, insert 5108 records successfully,failed 8732 records
    2019-06-08 10:31:40 15462 Batches: user Lilei insert 32691 records into datebase:product table:detail, insert 5780 records successfully,failed 26911 records
    2019-06-08 10:31:40 15463 Batches: user Allen insert 25902 records into datebase:product table:detail, insert 14027 records successfully,failed 11875 records
    ...
    
    
    1、统计每个人分别插入了多少条record进数据库
        输出结果示例:
        Name    totalrecords
        allen    493082
        mike    349287
    
        [root@localhost shell]# awk -f exam1.awk  db.log.20190608 
        User                Total records       
        Jerry               42168344            
        Mike                42070170            
        Lilei               42628926            
        Hanmeimei           41141251            
        Tracy               42241015            
        Allen               42283107            
        [root@localhost shell]# cat exam1.awk 
        BEGIN{
            printf "%-20s%-20s
    ","User","Total records"
        }
    
        {
            USER[$6]+=$8
        }
    
        END{
            for(u in USER)
                printf "%-20s%-20d
    ",u,USER[u]
        }
    
    
    2、统计每个人分别插入成功了多少record,失败了多少record
    
        输出结果:
        User    Success_record    failed_record
        jerry    342222        2813413
        
        success $14
        failed    $17
    
    
        [root@localhost shell]# cat exam2.awk 
        BEGIN{
            printf "%-30s%-30s%-30s
    ","User","Success records","Failed records"
        }
    
        {
            SUCCESS[$6]+=$14
            FAILED[$6]+=$17
        }
    
        END{
            for(u in SUCCESS)
                printf "%-30s%-30d%-30d
    ",u,SUCCESS[u],FAILED[u]
        }
        [root@localhost shell]# awk -f exam2.awk db.log.20190608 
        User                          Success records               Failed records                
        Jerry                         20641524                      21526820                      
        Mike                          21119844                      20950326                      
        Lilei                         21441042                      21187884                      
        Hanmeimei                     20584580                      20556671                      
        Tracy                         20837585                      21403430                      
        Allen                         21024923                      21258184 
    
    3、将例子1和例子2结合起来,一起输出,输出每个人分别插入多少条数据,多少成功,多少失败,并且要格式化输出,加上标题
        输出结果:
        User    Total        success        failed
        tracy    6134139        34113444    1341
        allen    13387878    7897912        14319
    
        代码:
        [root@localhost shell]# cat exam3.awk 
        BEGIN{
            printf "%-30s%-30s%-30s%-30s
    ","Name","total records","success records","failed records"
        }
    
        {
            TOTAL_RECORDS[$6]+=$8
            SUCCESS[$6]+=$14
            FAILED[$6]+=$17
        }
    
        END{
            for(u in TOTAL_RECORDS)
                printf "%-30s%-30d%-30d%-30d
    ",u,TOTAL_RECORDS[u],SUCCESS[u],FAILED[u]
        }
        [root@localhost shell]# awk -f exam3.awk db.log.20190608 
        Name                          total records                 success records               failed records                
        Jerry                         42168344                      20641524                      21526820                      
        Mike                          42070170                      21119844                      20950326                      
        Lilei                         42628926                      21441042                      21187884                      
        Hanmeimei                     41141251                      20584580                      20556671                      
        Tracy                         42241015                      20837585                      21403430                      
        Allen                         42283107                      21024923                      21258184 
    
    4、在例子3的基础上,加上结尾,统计全部插入记录数,成功记录数,失败记录数
        输出结果:
        User    Total        success        failed
        tracy    6134139        34113444    1341
        allen    13387878    7897912        14319
            1413413        3141311341    13411
    
        方法1:
        [root@localhost shell]# cat exam4_b.awk 
        BEGIN{
            printf "%-30s%-30s%-30s%-30s
    ","Name","total records","success records","failed records"
        }
    
        {
            TOTAL_RECORDS[$6]+=$8
            SUCCESS[$6]+=$14
            FAILED[$6]+=$17
        }
    
        END{
            for(u in TOTAL_RECORDS)
            {
                # 在统计出的结果数组中进行累加
                records_sum+=TOTAL_RECORDS[u]
                success_sum+=SUCCESS[u]
                failed_sum+=FAILED[u]
                printf "%-30s%-30d%-30d%-30d
    ",u,TOTAL_RECORDS[u],SUCCESS[u],FAILED[u]
            }
    
            printf "%-30s%-30d%-30d%-30d
    ","",records_sum,success_sum,failed_sum
        }
        [root@localhost shell]# awk -f exam4_b.awk db.log.20190608 
        Name                          total records                 success records               failed records                
        Jerry                         42168344                      20641524                      21526820                      
        Mike                          42070170                      21119844                      20950326                      
        Lilei                         42628926                      21441042                      21187884                      
        Hanmeimei                     41141251                      20584580                      20556671                      
        Tracy                         42241015                      20837585                      21403430                      
        Allen                         42283107                      21024923                      21258184                      
                          252532813                     125649498                     126883315     
        方法2:
        [root@localhost shell]# cat exam4.awk 
        BEGIN{
            printf "%-30s%-30s%-30s%-30s
    ","Name","total records","success records","failed records"
        }
    
        {
            RECORDS[$6]+=$8
            SUCCESS[$6]+=$14
            FAILED[$6]+=$17
            
            # 在原始数据中进行汇总计算
            records_sum+=$8
            success_sum+=$14
            failed_sum+=$17    
        }
    
        END{
            for(u in RECORDS)
                printf "%-30s%-30d%-30d%-30d
    ",u,RECORDS[u],SUCCESS[u],FAILED[u]
    
            printf "%-30s%-30d%-30d%-30d
    ","total",records_sum,success_sum,failed_sum
        }
        [root@localhost shell]# awk -f exam4.awk db.log.20190608 
        Name                          total records                 success records               failed records                
        Jerry                         42168344                      20641524                      21526820                      
        Mike                          42070170                      21119844                      20950326                      
        Lilei                         42628926                      21441042                      21187884                      
        Hanmeimei                     41141251                      20584580                      20556671                      
        Tracy                         42241015                      20837585                      21403430                      
        Allen                         42283107                      21024923                      21258184                      
        total                         252532813                     125649498                     126883315 
    
    5、查找丢失数据的现象,也就是成功+失败的记录数不等于一共插入的记录数,找出这些数据并显示行号和对应行的日志信息
        输出结果:
            
        代码:
            [root@localhost shell]# awk '{if($8!=$14+$17) print NR,$0}' db.log.20190608 
            5 2019-06-08 10:29:59 5 Batches: user Hanmeimei insert 15688 records into datebase:product table:detail, insert 3986 records successfully,failed 1102 records
            15404 2019-06-08 10:31:39 15404 Batches: user Mike insert 26675 records into datebase:product table:detail, insert 1931 records successfully,failed 24725 records
        
        # 写入文件的方式
        [root@localhost shell]# awk -f exam5.awk db.log.20190608 
        5 2019-06-08 10:29:59 5 Batches: user Hanmeimei insert 15688 records into datebase:product table:detail, insert 3986 records successfully,failed 1102 records
        15404 2019-06-08 10:31:39 15404 Batches: user Mike insert 26675 records into datebase:product table:detail, insert 1931 records successfully,failed 24725 records
        [root@localhost shell]# cat exam5.awk 
        BEGIN{
        }
        {
            if($8!=$14+$17) 
            print NR,$0
        }
  • 相关阅读:
    给video添加自定义进度条
    高德地图鼠标获取经纬度
    高德地图行政区域划分
    面试经验之谈
    Hybrid App 开发模式
    运维 08 常用服务安装部署
    运维 07 Linux系统基础优化及常用命令
    运维 06 vim与程序员
    运维 05 Shell基本命令
    运维 04 Shell基础命令(二)
  • 原文地址:https://www.cnblogs.com/reblue520/p/11016953.html
Copyright © 2020-2023  润新知