• 在gff中切fa的内容


    #!/usr/bin/python
    import re
    
    def readfa(l):
        col={}
        arr =[]
        sca =''
        li = open(l)
        for line in li:
            if re.match(r'>(w*)',line):
                match = re.match(r'>(w*)',line)
                sca = match.group(1)
                col[sca]=arr
                arr =[]
            else:
                without = re.sub(r'
    ',"",line)
                arr.append(without)
        return col
    
    def readgff(l):
        col ={}
        arr =[]
        li = open(l)
        for line in li:
            sp = line.split( )
            if sp[2] == 'mRNA':
                gene = re.match(r'ID=(.*?);',sp[8]).group(1)
                start =sp[3]
                arr=[]
                col[gene]=[sp[3],sp[4],arr,sp[0],sp[6]]
            elif sp[2] == 'CDS':
                gene = re.match(r'Parent=(.*?);',sp[8]).group(1)
                col[gene][2].append([sp[3],sp[4]])
        return col
    
    def deal_gff(l):
        col ={}
        for key,value in l.items():
            start=value[0]
            end = value[1]
            arr = value[2]
            sca = value[3]
            pos = value[4]
            if pos == '+':
                for single in arr:
                    single[0] = int(single[0]) - int(start)
                    single[1] = int(single[1]) - int(start)+1
            elif pos =='-':
                for single in arr:
                    off= int(end)-int(single[1])
                    lon= int(end)-int(single[0])+1
                    single[0] = off
                    single[1] = lon
                arr.reverse
            col[sca]=arr
            del value[0]
            del value[0]
            del value[2]
        return l
    ###main###
    
    
    
    gff=readgff('gff')
    c=gff
    
    fa =readfa('fa')
    
    g=deal_gff(c)
    
    col = {}
    s=''
    
    for k,v in g.items():
        sca = v[1]
        if fa[sca]:
            lon=s.join(fa[sca])
            short=''
            for i in v[0]:
                short += lon[i[0]:i[1]]
                col[k]=short
    
    for k1,v1 in col.items():
        print k1,"
    ",v1
  • 相关阅读:
    串口通讯编程一日通2(Overlapped IO模型)
    串口通讯编程一日通1(整合资料)
    Overlapped I/O模型深入分析(转)
    JVM7、8详解及优化
    vmstat工具
    spring 每个jar的作用
    Linux查看内存使用情况
    mysql datetime与timestamp精确到毫秒的问题
    eclipse UTF-8
    java.security
  • 原文地址:https://www.cnblogs.com/yuanjingnan/p/11135973.html
Copyright © 2020-2023  润新知