• python 中统计不同scafflod的GC含量并输出GC含量最高的scafflod


    001、方法1

    root@PC1:/home/test# ls
    a.fasta  test.py
    root@PC1:/home/test# cat a.fasta                                   ## 测试fasta文件
    >Rosalind_6404
    CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
    TCCCACTAATAATTCTGAGG
    >Rosalind_5959
    CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
    ATATCCATTTGTCAGCAGACACGC
    >Rosalind_0808
    CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
    TGGGAACCTGCGGGCAGTAGGTGGAAT
    root@PC1:/home/test# cat test.py                                   ## 测试程序
    #!/usr/bin/python
    in_file = open("a.fasta", "r")
    out_file = open("result.txt", "w")
    dict1 = {}
    dict2 = {}
    
    for i in in_file:
        i = i.strip()
        if i.startswith(">"):
            key = i.split(">")[1]
            dict1[key] = []
        else:
            dict1[key].append(i)
    for i,j in dict1.items():
        j = "".join(j).upper()
        dict2[i] = (j.count("C") + j.count("G"))/len(j)
    
    result = max(dict2.items(), key = lambda x: x[1])
    print(result[0] + "\n" +  str(result[1]), file = out_file, end = "\n")
    
    in_file.close()
    out_file.close()
    root@PC1:/home/test# python test.py                             ## 运行程序
    root@PC1:/home/test# ls
    a.fasta  result.txt  test.py
    root@PC1:/home/test# cat result.txt                              ## 运行结果
    Rosalind_0808
    0.6091954022988506

    002、方法2

    root@PC1:/home/test# ls
    a.fasta  test.py
    root@PC1:/home/test# cat a.fasta                                   ## 测试fasta文件
    >Rosalind_6404
    CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
    TCCCACTAATAATTCTGAGG
    >Rosalind_5959
    CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
    ATATCCATTTGTCAGCAGACACGC
    >Rosalind_0808
    CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
    TGGGAACCTGCGGGCAGTAGGTGGAAT
    root@PC1:/home/test# cat test.py                            ## 测试程序
    #!/usr/bin/python
    
    import re                                                   ## 利用re包中 findall函数查找指定的碱基数目
    in_file = open("a.fasta", "r")
    out_file = open("result.txt", "w")
    dict1 = {}
    dict2 = {}
    
    for i in in_file:
        i = i.strip()
        if i[0] == ">":
            key = i.split(">")[1]
            dict1[key] = []
        else:
            dict1[key].append(i)
    
    for i,j in dict1.items():
        seq = "".join(j).upper()
        gc_count = len(re.findall("[GC]", seq))
        dict2[i] = "%.4f" % (gc_count/len(seq))
    
    result = max(dict2.items(), key = lambda x: x[1])
    out_file.write(result[0] + "\n" + result[1] + "\n")
    
    in_file.close()
    out_file.close()
    root@PC1:/home/test# python test.py                            ## 执行程序
    root@PC1:/home/test# ls
    a.fasta  result.txt  test.py
    root@PC1:/home/test# cat result.txt                            ## 运行结果
    Rosalind_0808
    0.6092

    003、方法3

    root@PC1:/home/test# ls
    a.fasta  test.py
    root@PC1:/home/test# cat a.fasta                                  ## 测试fasta文件
    >Rosalind_6404
    CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
    TCCCACTAATAATTCTGAGG
    >Rosalind_5959
    CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
    ATATCCATTTGTCAGCAGACACGC
    >Rosalind_0808
    CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
    TGGGAACCTGCGGGCAGTAGGTGGAAT
    root@PC1:/home/test# cat test.py                               ## 测试程序
    #!/usr/bin/python
    
    in_file = open("a.fasta", "r")
    out_file = open("result.txt", "w")
    dict1 = dict()
    dict2 = dict()
    
    for i in in_file:
        i = i.strip()
        if i.startswith(">"):
            key = i.split(">")[1]
            dict1[key] = []
        else:
            dict1[key].append(i)
    
    for i,j in dict1.items():
        seq = "".join(j).upper()
        gc_count = 0
        for k in seq:
            if k == "G" or k == "C":
                gc_count += 1
        gc_ratio = "%.4f" % (gc_count/len(seq))
        dict2[i] = gc_ratio
    
    result = max(dict2.items(), key = lambda x: x[1])
    out_file.write(result[0] + "\n" + result[1] + "\n")
    
    in_file.close()
    out_file.close()
    root@PC1:/home/test# python test.py                            ## 执行程序
    root@PC1:/home/test# ls
    a.fasta  result.txt  test.py
    root@PC1:/home/test# cat result.txt                             ## 运行结果
    Rosalind_0808
    0.6092

    参考:https://mp.weixin.qq.com/s?__biz=MzIxMjQxMDYxNA==&mid=2247484172&idx=1&sn=d8dec9ae5ffea81ef02e8f0d7ea4672b&chksm=9747ca95a030438313f483f6c62c9c32551e23682f98be6868edf423ea88180165e21c5dedc8&scene=178&cur_album_id=1635727573621997580#rd

  • 相关阅读:
    无线安全
    下载安装Emacs和基本配置--待更新中
    uv-pv-vv的区别
    tesseract安装及问题处理
    POJ 2187 Beauty Contest【凸包周长】
    POJ 1113 Wall【凸包周长】
    POJ 2187 Beauty Contest【旋转卡壳求凸包直径】
    POJ 2031 Building a Space Station【经典最小生成树】
    URAL 1181 Cutting a Painted Polygon【递归+分治】
    POJ 1845-Sumdiv【经典数学题目---求因子和】
  • 原文地址:https://www.cnblogs.com/liujiaxin2018/p/16597481.html
Copyright © 2020-2023  润新知