• awk书上练习


    文件car:

    plym fury 1970 73 2500
    chevy malibu 1999 60 3000
    ford mustang 1965 45 10000
    volvo s80 1998 102 9850
    ford thundbd 2003 15 10500
    chevy malibu 2000 50 3500
    bmw 325i 1985 115 450
    honda accord 2001 30 6000
    ford taurus 2004 10 17000
    toyota rav4 2002 180 750
    chevy impala 1985 85 1550
    ford explor 2003 25 9500

    基本

    awk '{print}' car    #类似标准输出
    awk '/chevy/' car    #包含字符串的所有文本行进行复制
    chevy malibu 1999 60 3000
    chevy malibu 2000 50 3500
    chevy impala 1985 85 1550
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '/10/' car    #只要包含,以字符串的形式
    ford mustang 1965 45 10000
    volvo s80 1998 102 9850
    ford thundbd 2003 15 10500
    ford taurus 2004 10 17000
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print $1,$3}' car    #显示第一列,空格,第三列
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '/chevy/{print $1,$3}' car    #匹配行,选中列
    chevy 1999
    chevy 2000
    chevy 1985
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '$2 ~ /[0-9]/' car   #第二列匹配某种正则,最后显示所有行
    volvo s80 1998 102 9850
    bmw 325i 1985 115 450
    toyota rav4 2002 180 750
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '$2 ~ /^[tm]/{print $3,$2,"$"$5}' car    #第2个字段匹配正则,并按要求显示
    1999 malibu $3000
    1965 mustang $10000
    2003 thundbd $10500
    2000 malibu $3500
    2004 taurus $17000
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '$5<=3000' car    #按大小查找
    plym fury 1970 73 2500
    chevy malibu 1999 60 3000
    bmw 325i 1985 115 450
    toyota rav4 2002 180 750
    chevy impala 1985 85 1550
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '/volvo/,/bmw/' car    #匹配两个之间的行
    volvo s80 1998 102 9850
    ford thundbd 2003 15 10500
    chevy malibu 2000 50 3500
    bmw 325i 1985 115 450
    dahu@dahu-OptiPlex-3046:~/myfile$ awk '/chevy/,/ford/' car    #贪心,尽可能多的获取行,和上面例子类似
    chevy malibu 1999 60 3000
    ford mustang 1965 45 10000
    chevy malibu 2000 50 3500
    bmw 325i 1985 115 450
    honda accord 2001 30 6000
    ford taurus 2004 10 17000
    chevy impala 1985 85 1550
    ford explor 2003 25 9500
    awk -f file car

    file 里可以写awk程序,也就是上面引号的内容,不需要加引号.

    BEGIN

    dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa    #前缀
    BEGIN{
    print "Make Mode Year Miles Price"
    print "----------------------------"
    }
    {print}
    dahu@dahu-OptiPlex-3046:~/myfile$ awk -f aaa car
    Make Mode Year Miles Price
    ----------------------------
    plym fury 1970 73 2500
    chevy malibu 1999 60 3000
    ford mustang 1965 45 10000
    volvo s80 1998 102 9850
    ford thundbd 2003 15 10500
    chevy malibu 2000 50 3500
    bmw 325i 1985 115 450
    honda accord 2001 30 6000
    ford taurus 2004 10 17000
    toyota rav4 2002 180 750
    chevy impala 1985 85 1550
    ford explor 2003 25 9500

    length

    dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print length,$0}' car|sort -n    #显示每一行的字符数(包含空格的数量),并按顺序排序,length后也可加括号
    21 bmw 325i 1985 115 450
    22 plym fury 1970 73 2500
    23 volvo s80 1998 102 9850
    24 ford explor 2003 25 9500
    24 toyota rav4 2002 180 750
    25 chevy impala 1985 85 1550
    25 chevy malibu 1999 60 3000
    25 chevy malibu 2000 50 3500
    25 ford taurus 2004 10 17000
    25 honda accord 2001 30 6000
    26 ford mustang 1965 45 10000
    26 ford thundbd 2003 15 10500

    NR

    dahu@dahu-OptiPlex-3046:~/myfile$ awk '{print NR,$0}' car    #显示行数,NR记录编号,NF字段数目
    1 plym fury 1970 73 2500
    2 chevy malibu 1999 60 3000
    3 ford mustang 1965 45 10000
    4 volvo s80 1998 102 9850
    5 ford thundbd 2003 15 10500
    6 chevy malibu 2000 50 3500
    7 bmw 325i 1985 115 450
    8 honda accord 2001 30 6000
    9 ford taurus 2004 10 17000
    10 toyota rav4 2002 180 750
    11 chevy impala 1985 85 1550
    12 ford explor 2003 25 9500
    dahu@dahu-OptiPlex-3046:~/myfile$ awk 'NR==2,NR==4' car    #显示第2行到第4行的内容,太方便了!
    chevy malibu 1999 60 3000
    ford mustang 1965 45 10000
    volvo s80 1998 102 9850

    END

    dahu@dahu-OptiPlex-3046:~/myfile$ awk 'END {print NR,"cars for sale"}' car    #end表示数据已处理之后,此时NR就是总行数了
    12 cars for sale

    if

    dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #if的简短用法,没有使用花括号
    {
        if ($1 ~ /ply/) $1 ="plymouth"
        if ($1 ~ /chev/) $1 ="chevrolet"
        print
    }
    dahu@dahu-OptiPlex-3046:~/myfile$ awk -f aaa car
    plymouth fury 1970 73 2500
    chevrolet malibu 1999 60 3000
    ford mustang 1965 45 10000
    volvo s80 1998 102 9850
    ford thundbd 2003 15 10500
    chevrolet malibu 2000 50 3500
    bmw 325i 1985 115 450
    honda accord 2001 30 6000
    ford taurus 2004 10 17000
    toyota rav4 2002 180 750
    chevrolet impala 1985 85 1550
    ford explor 2003 25 9500
    dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #程序稍加改进,把awk程序文件直接改成可直接运行的,增加一下可执行的权限chmod 744  aaa
    #!/usr/bin/awk -f
    {
        if ($1 ~ /ply/) $1 ="plymouth"
        if ($1 ~ /chev/) $1 ="chevrolet"
        print
    }
    dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car
    dahu@dahu-OptiPlex-3046:~/myfile$ cat price_range
    #{                                #这边是将第五列改成评价
    #if ($5 <=5000) $5="cheap";
    #else if (5000<$5 && $5<10000) $5="please ask";
    #else if ($5>=10000) $5="expensive";
    #print $0
    #}
    BEGIN{
        s="cheap"
    }
    {                                  #保留第5列
    if ($5 <=5000) s="cheap";
    else if (5000<$5 && $5<10000) s="please ask";
    else if ($5>=10000) s="expensive";
    print $0,s                          #显示的时候,注意$
    }
    dahu@dahu-OptiPlex-3046:~/myfile$ awk -f price_range car
    plym fury 1970 73 2500 cheap
    chevy malibu 1999 60 3000 cheap
    ford mustang 1965 45 10000 expensive
    volvo s80 1998 102 9850 please ask
    ford thundbd 2003 15 10500 expensive
    chevy malibu 2000 50 3500 cheap
    bmw 325i 1985 115 450 cheap
    honda accord 2001 30 6000 please ask
    ford taurus 2004 10 17000 expensive
    toyota rav4 2002 180 750 cheap
    chevy impala 1985 85 1550 cheap
    ford explor 2003 25 9500 please ask

    OFS  

    什么样的情况下才会按照新格式输出呢?

    dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #OFS输出字段分隔符,默认是空格,但是我改了之后发现只有满足if条件的才会按照新格式输出,如果注释掉了if,就都不会按照新格式输出,存疑.
    #!/usr/bin/awk -f
    BEGIN{OFS=" >> "}
    {
        if ($1 ~ /ply/) $1 ="plymouth"
        if ($1 ~ /chev/) $1 ="chevrolet"
        print $0
    }
    
    dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car
    plymouth >> fury >> 1970 >> 73 >> 2500
    chevrolet >> malibu >> 1999 >> 60 >> 3000
    ford mustang 1965 45 10000
    volvo s80 1998 102 9850
    ford thundbd 2003 15 10500
    chevrolet >> malibu >> 2000 >> 50 >> 3500
    bmw 325i 1985 115 450
    honda accord 2001 30 6000
    ford taurus 2004 10 17000
    toyota rav4 2002 180 750
    chevrolet >> impala >> 1985 >> 85 >> 1550
    ford explor 2003 25 9500

    printf

    dahu@dahu-OptiPlex-3046:~/myfile$ cat aaa  #printf改善输出格式
    #!/usr/bin/awk -f
    BEGIN{
        print "                               Miles"
        print "Make       Mode       Year     (000)   Price "
        print 
        "--------------------------------------------"
    }
    {
        if ($1 ~ /ply/) $1 ="plymouth"
        if ($1 ~ /chev/) $1 ="chevrolet"
        printf "%-10s %-8s   %2d   %5d     $ %8.2f
    ",
               $1,$2,$3,$4,$5
    }
    
    dahu@dahu-OptiPlex-3046:~/myfile$ ./aaa car
                                   Miles
    Make       Mode       Year     (000)   Price 
    --------------------------------------------
    plymouth   fury       1970      73     $  2500.00
    chevrolet  malibu     1999      60     $  3000.00
    ford       mustang    1965      45     $ 10000.00
    volvo      s80        1998     102     $  9850.00
    ford       thundbd    2003      15     $ 10500.00
    chevrolet  malibu     2000      50     $  3500.00
    bmw        325i       1985     115     $   450.00
    honda      accord     2001      30     $  6000.00
    ford       taurus     2004      10     $ 17000.00
    toyota     rav4       2002     180     $   750.00
    chevrolet  impala     1985      85     $  1550.00
    ford       explor     2003      25     $  9500.00
    dahu@dahu-OptiPlex-3046:~/myfile$ cat redirect     #重定向输出
    #!/usr/bin/awk -f
    /chevy/ {print > "chevfile"}
    /ford/ {print > "fordfile"}
    END {print "done."}
    
    dahu@dahu-OptiPlex-3046:~/myfile$ ./redirect car
    done.
    dahu@dahu-OptiPlex-3046:~/myfile$ cat chevfile 
    chevy malibu 1999 60 3000
    chevy malibu 2000 50 3500
    chevy impala 1985 85 1550
    dahu@dahu-OptiPlex-3046:~/myfile$ cat fordfile 
    ford mustang 1965 45 10000
    ford thundbd 2003 15 10500
    ford taurus 2004 10 17000
    ford explor 2003 25 9500

    FS

    输入字段分隔符

    for

    dahu@dahu-OptiPlex-3046:~/myfile$ cat manuf   #for结构,第一列的内容放入这个字典
    awk '{manuu[$1]++}
    END {for (name in manuu) {print name ,manuu[name]}}' car    #里面的花括号也可以不用加,命令之间用;间隔
    dahu@dahu-OptiPlex-3046:~/myfile$ ./manuf 
    honda 1
    bmw 1
    volvo 1
    ford 4
    plym 1
    chevy 3
    toyota 1
    dahu@dahu-OptiPlex-3046:~/myfile$ cat mmanuf   #这个程序感觉厉害了
    if [ $# !=2 ]
        then
            echo "something wrong!"
            exit 1
    fi
    awk < $2 '
    {count[$'$1']++}      #注意这里,单引号成对出现,两端引号内容连接起来,中间还是直接引用传入的第一个参数,在这里是1,所以调用第一列的内容,666,在END里也试过,单引号随便加,反正连起来的.哪怕是把变量名拆掉也行...吊
    END{for (item in count) print item,count[item]}
    #END{fo''r (item in count) ''print it''em,count[item]}  #你敢信吗?
    ' dahu@dahu-OptiPlex-3046:~/myfile$ ./mmanuf 1 car ./mmanuf: line 1: [: 2: unary operator expected honda 1 bmw 1 volvo 1 ford 4 plym 1 chevy 3 toyota 1

    实操:

    1.统计不重复的个数:

    xch27@lanzhou:/asrdata/users/ql826/lmwork/comm_cloud/aicar_solution/v28_24Apr2017/data/radio/slot$ awk 'BEGIN{FS="&"}{print $1}' slot.map |sort -u
    CLASS-FM频道
    CLASS-主持人
    CLASS-序列号
    CLASS-频道
    CLASS-频道类型

    2.统计某一项的个数:

    xch27@lanzhou:/asrdata/users/ql826/lmwork/comm_cloud/aicar_solution/v28_24Apr2017/data/radio/slot$ awk 'BEGIN{FS="&";a=0}{if($1 =="CLASS-频道")a++}END{print a}' slot.map 
    114716

     3.找到某几行的内容

    dahu@dahu-OptiPlex-3046:~/Downloads$ awk '{if(NR<=4900 && NR>=4800)print $2}' enwords.oov.cnt-gt800.cnt-pron > en

     4.匹配我要找的CLASS-XXX,且不重复,for的简单应用

    xch27@lanzhou:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v30_23May2017/data/life/music/pat$ head gequ_geshou
    <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 唱 的 </s>
    <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s>
    <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s>
    <s> CLASS-动作二 CLASS-歌曲名 CLASS-歌手名 的 </s>
    <s> CLASS-动作二 CLASS-歌手名 的 CLASS-语种 歌曲 CLASS-歌曲名 </s>
    <s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 唱 的 </s>
    <s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 的 </s>
    <s> CLASS-动作三 CLASS-歌曲名 的 歌词 CLASS-歌手名 演唱 的 </s>
    <s> CLASS-动作三 CLASS-歌手名 唱 的 CLASS-歌曲名 的 歌词 </s>
    <s> CLASS-动作三 CLASS-歌手名 的 CLASS-歌曲名 的 歌词 </s>
    xch27@lanzhou:/.../pat$ awk '{for(i=2;i<NF;i++)if($i ~ "CLASS-"){print $i}}' gequ_geshou |sort -u
    CLASS-动作三
    CLASS-动作二
    CLASS-操作
    CLASS-歌手名
    CLASS-歌曲名
    CLASS-语种

     统计 不匹配"CLASS-"的行数

    awk '{a=0;for(i=1;i<=NF;i++){if($i ~ "CLASS-"){a=1}};if(a!=1){count++}}END{print count}' music.comm.mrg.v3.pat.wseg

     修改文件内容,多变量传递

    #!/bin/bash
    #awk传入变量练习,直接修改小麦
    fs=`awk '/你好小迈/{print $2}' wakeup.logp`
    #echo $fs
    #多个变量这样添加
    #p=321
    #echo |awk -v tt="$fs" -v tg="$p" 'BEGIN{print tt,tg}'
    ft=`awk '/小迈你好/{print $2}' wakeup.logp`
    awk -v nhxm="$fs" -v xmnh="$ft" '{if($1~"你好小麦"){a=nhxm+2;print $1,a}else if($1~"小麦你好"){a=xmnh+2;print $1,a}else{print $0}}' wakeup.logp >tmp

     awk根据不同名称输入不同文件.知识点:FS,RS,substr,split,awk内部输出

    xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ head t1
    名称:北京南顺油脂有限公司
    拼音:BeiJing NanShun YouZhi YouXianGongSi
    别称:
    地址:良乡南肖庄道口西
    类型:公司企业;公司;公司
    省:北京市
    市:北京市
    区县:房山区
    热度:9.6901977
    
    xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ awk 'BEGIN{FS="
    ";RS="
    
    "}{a=substr($5,4);split(a,A,";");print substr($1,4)>>A[1];}' t1
    
    xch27@taiyuan:/asrdata/users/xch27/lmwork/comm_cloud/aicar_solution/v32_30june2017/data/navi/slot$ ls
    gaode.alldata.0620.txt  readme.sh  tmp       体育休闲服务       公司企业      商务住宅      搜索词_0620.txt     生活服务      购物服务      餐饮服务
    lineprocess.py          t1         住宿服务  全量数据_0620.txt  医疗保健服务  地名地址信息  政府机构及社会团体  科教文化服务  金融保险服务
  • 相关阅读:
    周记(第六周)
    周记(第五周)
    周记(第四周)
    周记(第三周)
    周记(第二周)
    《大道至简》读后感
    __proto__
    Object.prototype
    Object.setPrototypeOf(obj, proto)
    Object.getPrototypeOf(obj)
  • 原文地址:https://www.cnblogs.com/dahu-daqing/p/6838489.html
Copyright © 2020-2023  润新知