在gff中切fa的内容

#!/usr/bin/python
import re

def readfa(l):
    col={}
    arr =[]
    sca =''
    li = open(l)
    for line in li:
        if re.match(r'>(w*)',line):
            match = re.match(r'>(w*)',line)
            sca = match.group(1)
            col[sca]=arr
            arr =[]
        else:
            without = re.sub(r'
',"",line)
            arr.append(without)
    return col

def readgff(l):
    col ={}
    arr =[]
    li = open(l)
    for line in li:
        sp = line.split( )
        if sp[2] == 'mRNA':
            gene = re.match(r'ID=(.*?);',sp[8]).group(1)
            start =sp[3]
            arr=[]
            col[gene]=[sp[3],sp[4],arr,sp[0],sp[6]]
        elif sp[2] == 'CDS':
            gene = re.match(r'Parent=(.*?);',sp[8]).group(1)
            col[gene][2].append([sp[3],sp[4]])
    return col

def deal_gff(l):
    col ={}
    for key,value in l.items():
        start=value[0]
        end = value[1]
        arr = value[2]
        sca = value[3]
        pos = value[4]
        if pos == '+':
            for single in arr:
                single[0] = int(single[0]) - int(start)
                single[1] = int(single[1]) - int(start)+1
        elif pos =='-':
            for single in arr:
                off= int(end)-int(single[1])
                lon= int(end)-int(single[0])+1
                single[0] = off
                single[1] = lon
            arr.reverse
        col[sca]=arr
        del value[0]
        del value[0]
        del value[2]
    return l
###main###



gff=readgff('gff')
c=gff

fa =readfa('fa')

g=deal_gff(c)

col = {}
s=''

for k,v in g.items():
    sca = v[1]
    if fa[sca]:
        lon=s.join(fa[sca])
        short=''
        for i in v[0]:
            short += lon[i[0]:i[1]]
            col[k]=short

for k1,v1 in col.items():
    print k1,"
",v1

相关阅读:
串口通讯编程一日通2（Overlapped IO模型）
串口通讯编程一日通1（整合资料）
Overlapped I/O模型深入分析(转)
JVM7、8详解及优化
vmstat工具
spring 每个jar的作用
Linux查看内存使用情况
mysql datetime与timestamp精确到毫秒的问题
eclipse UTF-8
java.security

原文地址：https://www.cnblogs.com/yuanjingnan/p/11135973.html