#!/usr/bin/python #-*- coding:utf-8 -*- "处理fasta文件,将ID号和序列放在一行" import sys with open(sys.argv[1]) as f: fw=open('out.fasta', 'w') line=f.read() line=line.replace(' ', '').replace('>', ' >') for aa in line: fw.write(aa) fw.close() """ >chr1|hos107.1#gene1 ACACTCCCGGGCCCCCCCCCCCC ACCTTTCAAAAAAAAAAAAAAA AATTTTCCCCCCAAAGGGG >chr1|hos107.2#gene2 ACACTCCCGGGCCCCCCCCCCCC ACCTTTCAAAAAAAAAAAAAAA AATTTTC >chr1|hos107.4#gene3 ACACTCCCGGGCCCCCCCCCCCC ACCTTTCAAAAAAAAAAAAAAA AATTTTC >chr1|hos107.5#gene4 ACACTCCCGGGCCCCCCCCCCCC ACCTTTCAAAAAAAAAAAAAAA AATTTTC """ """ >chr1|hos107.1#gene1ACACTCCCGGGCCCCCCCCCCCCACCTTTCAAAAAAAAAAAAAAAAATTTTCCCCCCAAAGGGG >chr1|hos107.2#gene2ACACTCCCGGGCCCCCCCCCCCCACCTTTCAAAAAAAAAAAAAAAAATTTTC >chr1|hos107.4#gene3ACACTCCCGGGCCCCCCCCCCCCACCTTTCAAAAAAAAAAAAAAAAATTTTC >chr1|hos107.5#gene4ACACTCCCGGGCCCCCCCCCCCCACCTTTCAAAAAAAAAAAAAAAAATTTTC """ #提取目标序列 f=open('./out.fasta', 'r') fw=open('target.fasta', 'w') for line in f.readlines(): if line.startswith('>chr1|hos107.1'): fw.write(line) f.close() fw.close() """可以从上述处理好的单行文件out.fasta中提取指定目标ID的文件,并将其 写入到target.fasta文件中""" #整体思路: #先统一fasta文件格式从test.fasta----out.fasta #取出目标ID序列:out.fasta----target.fasta