• Linux awk使用方法


    awk使用简介:

    awk 'BEGIN{statements} pattern{commands} END{ end statements}'
    awk脚本包括三个部分:BEGIN+可以使用模式匹配的通用语句块+END语句块
    执行步骤:
    1.执行begin部分
    2.读取input stream,每读一行数据,执行一次pattern部分
    3.stream读完后执行end
    例如:
    [root@TopInsight 3760_04_code]#awk 'BEGIN {i=0} {i++} END{print i}'  word_freq.sh
    17
    或者
    [root@TopInsight 3760_04_code]#awk "BEGIN {i=0} {i++} END{print i}"  word_freq.sh
    17

    示例1:打印.history_profile文件中中使用次数较多的命令

    [root@Moneytu /var/log/httpd]# printf "%-20s	Count
    " Commands;cat ~/.bash_history  | grep -Ev '#.*' | awk 'BEGIN {} {list[$1]++;} 
    >  END { for(i in list) 
    >  {
    >  printf("%-20s	%d
    ",i,list[i]);} 
    >  }'  v1="Commands" v2="Count" | sort -nrk 2 | head
    Commands            	Count
    ls                  	151
    cd                  	92
    service             	83
    top                 	77
    vi                  	72
    df                  	63
    mysql               	47
    mysqlcheck          	40
    iptables            	39
    tail                	36

    示例2:对apache访问ip进行排序,比较性能:

    [root@Moneytu /var/log/httpd]# cat sum_ip.sh 
    #!/bin/bash
    
    ##对访问ip进行排序
    
    useage(){
        cat << EOF
        $0 filename topN
    EOF
        exit 1
    }
    
    if [ $# -ne 2 ]; then
        useage
    fi 
    
    sumip(){
        printf "%-20s	Count
    ------------------------------
    " IP;
        awk 'BEGIN {FS=" ";}{IP[$1]++;}
        END {for(i in IP)
        {printf("%-20s	%d
    ",i,IP[i]);}
        }'  $1 | sort -nrk 2 | head -n $TOP
    }
    
    FILE=$1
    TOP=$2
    sumip $FILE  $TOP

    结果:

    [root@Moneytu /var/log/httpd]# time ./sum_ip.sh access_log 20
    IP                  	Count
    ------------------------------
    222.223.40.185      	19787
    60.28.116.222       	17468
    112.124.26.17       	13891
    ::1                 	13426
    111.162.65.49       	10201
    61.183.230.130      	9926
    219.140.149.244     	8891
    113.247.155.122     	8023
    218.106.119.137     	7776
    61.189.184.55       	6532
    218.69.24.74        	5931
    220.181.125.198     	5665
    221.226.105.178     	5115
    123.126.68.36       	4655
    182.87.49.132       	4620
    113.116.173.16      	4428
    122.96.24.195       	3878
    60.166.75.85        	3557
    183.54.191.41       	3399
    222.88.66.49        	3004
    
    real	0m0.400s
    user	0m0.347s
    sys	0m0.056s

    比下面的命令要快很多:

    [root@Moneytu /var/log/httpd]# time cat access_log | awk '{print $1}' | sort | uniq -c | sort -nr | head -n 20
      19787 222.223.40.185
      17468 60.28.116.222
      13862 112.124.26.17
      13422 ::1
      10201 111.162.65.49
       9926 61.183.230.130
       8891 219.140.149.244
       8023 113.247.155.122
       7776 218.106.119.137
       6532 61.189.184.55
       5931 218.69.24.74
       5650 220.181.125.198
       5115 221.226.105.178
       4642 123.126.68.36
       4620 182.87.49.132
       4428 113.116.173.16
       3878 122.96.24.195
       3557 60.166.75.85
       3399 183.54.191.41
       3004 222.88.66.49
    
    real	0m4.052s
    user	0m3.823s
    sys	0m0.237s

    awk引用外部变量的方法:

    for f in `ls`;do cat $f | awk '{print $1  >> "'$f.file'" }';done
    
  • 相关阅读:
    yum 安装pip
    sed和awk用法
    awk删除最后一个字符
    shell读取文件内容并进行变量赋值
    git 添加、提交、推送
    git 本地代码冲突解决,强制更新
    sys系统模块
    os模块
    time-时间模块
    环境变量的使用
  • 原文地址:https://www.cnblogs.com/xingxingge/p/10539002.html
Copyright © 2020-2023  润新知