• linux shell实现统计 位点缺失率


    1、脚本

    [root@centos79 test]# cat test.sh
    #!/bin/bash
    
    #step1 check ped file
    uniqn=$(sed 's/\r//g' $1 | cut -d " " -f 7- | sed 's/ /\n/g' | sort -u | wc -l)
    if [ $uniqn -gt 5 ]
    then
    echo "error, exception nucleotide: "
    sed 's/^M//g' $1 | cut -d " " -f 7- | sed 's/ /\n/g' | sort -u | grep -v [ATGC0]
    exit
    fi
    
    
    temp1=$(head -n 1 $1 | awk '{print NF}')
    for i in $(seq `sed -n "$=" $1`)
    do
    temp2=$(sed -n "$i"p $1 | awk '{print NF}')
    if [ $temp2 -ne $temp1 ]
    then
    echo "inconsistent number of columns!"
    echo -e "colun1 1: $temp1\ncolume $i: $temp2"
    exit
    fi
    done
    echo "step 1 done!"
    
    
    #step2 check consistence of ped and map file
    
    mapline=$(sed -n "$=" $2)
    locinum=$(head -n 1 $1 | awk '{print (NF - 6)/2}')
    
    if [ $mapline -ne $locinum ]
    then
    echo "error, map file and ped file do not match!"
    echo -e "mapline: $mapline\nlocinum: $locinum"
    exit
    fi
    echo "step 2 done!"
    
    #step 3 statistics
    lines=$(sed -n "$=" $1)
    for i in $(seq `head -n 1 $1 | awk '{print (NF - 6)/2}'`); do awk -v a=$i '{print $(a * 2 + 5), $(a * 2 + 6)}' $1 | awk -v a=$lines -v RS="@#$j" '{print gsub(/0/,"&")/(a*2)}' >> missresult1; done
    echo "step 3 done!"
    
    #step 4 add loci and headline
    cut -f 1-2,4 $2 | paste - missresult1 | sed '1i chr\tsnp\tpos\tmissing_rate' > missresult.txt
    rm -f  missresult1
    echo "fineshed!"

    2、测试数据及用法

    [root@centos79 test]# ls
    outcome.map  outcome.ped  test  test.sh
    [root@centos79 test]# cat outcome.map
    1       snp1    0       55910
    1       snp2    0       85204
    1       snp3    0       122948
    1       snp4    0       203750
    1       snp5    0       312707
    1       snp6    0       356863
    1       snp7    0       400518
    1       snp8    0       487423
    [root@centos79 test]# cat outcome.ped
    DOR 1 0 0 0 -9 G G 0 0 0 0 0 0 A A T T G G 0 0
    DOR 2 0 0 0 -9 0 0 G G 0 0 0 0 A A T T A G 0 0
    DOR 3 0 0 0 -9 0 0 C C 0 0 G G G G T T A G 0 0
    DOR 4 0 0 0 -9 0 0 C C 0 0 G G G G A A G G G G
    DOR 5 0 0 0 -9 G G C C 0 0 G G G G A A A G G C
    DOR 6 0 0 0 -9 G G C C 0 0 G G G G A A A A C C
    [root@centos79 test]# bash test.sh outcome.ped outcome.map
    step 1 done!
    step 2 done!
    step 3 done!
    fineshed!
    [root@centos79 test]# ls
    missresult.txt  outcome.map  outcome.ped  test  test.sh
    [root@centos79 test]# cat missresult.txt
    chr     snp     pos     missing_rate
    1       snp1    55910   0.5
    1       snp2    85204   0.166667
    1       snp3    122948  1
    1       snp4    203750  0.333333
    1       snp5    312707  0
    1       snp6    356863  0
    1       snp7    400518  0
    1       snp8    487423  0.5

    3、plink软件验证

    [root@centos79 test]# ls
    missresult.txt  outcome.map  outcome.ped  test  test.sh
    [root@centos79 test]# plink --file outcome --missing --out temp > /dev/null; rm *.log *.nosex
    [root@centos79 test]# ls
    missresult.txt  outcome.map  outcome.ped  temp.imiss  temp.lmiss  test  test.sh
    [root@centos79 test]# cat  temp.lmiss
     CHR  SNP   N_MISS   N_GENO   F_MISS
       1 snp1        3        6      0.5
       1 snp2        1        6   0.1667
       1 snp3        6        6        1
       1 snp4        2        6   0.3333
       1 snp5        0        6        0
       1 snp6        0        6        0
       1 snp7        0        6        0
       1 snp8        3        6      0.5
    [root@centos79 test]# cat missresult.txt
    chr     snp     pos     missing_rate
    1       snp1    55910   0.5
    1       snp2    85204   0.166667
    1       snp3    122948  1
    1       snp4    203750  0.333333
    1       snp5    312707  0
    1       snp6    356863  0
    1       snp7    400518  0
    1       snp8    487423  0.5

  • 相关阅读:
    Python实现杨辉三角
    第8.30节 重写Python __setattr__方法实现属性修改捕获
    转:Cookie详解
    第8.29节 使用MethodType将Python __setattr__定义的实例方法与实例绑定
    第8.28节 Python中使用__setattr__定义实例变量和实例方法
    第8.27节 Python中__getattribute__与property的fget、@property装饰器getter关系深入解析
    第8.26节 重写Python类中的__getattribute__方法实现实例属性访问捕获
    关于open函数文件打开模式的有意思的一个现象
    第8.25节 Python风格的__getattribute__属性访问方法语法释义及使用
    转:关于Python中的lambda,这篇阅读量10万+的文章可能是你见过的最完整的讲解
  • 原文地址:https://www.cnblogs.com/liujiaxin2018/p/15489116.html
Copyright © 2020-2023  润新知