读取fasta文件genome_test.fa,并计算染色体总长,同时输出最长染色体编号、序列以及长度
fasta文件genom_test.fa的内容如下:
>chr1
ATATATATAT
>chr2
ATATATATATCGCGCGCGCG
>chr3
ATATATATATCGCGCGCGCGATATATATAT
>chr4
ATATATATATCGCGCGCGCGATATATATATCGCGCGCGCG
>chr5
ATATATATATCGCGCGCGCGATATATATATCGCGCGCGCGATATATATAT
python脚本
1 #传递命令行参数 2 import sys # 导入模块 3 4 # 从命令行获取文件名称 5 f_fasta = sys.argv[1] 6 7 # 打开文件 open('文件路径') 8 f = open(f_fasta) 9 10 # 逐行读取 11 total_len = 0 12 max_chr = '' 13 max_seq = '' 14 max_len = 0 15 # 求总长并输出最长染色体编号、序列以及长度 16 lines = f.readlines() # 是一个列表 17 for line in lines: 18 #去掉行尾的换行符 19 line = line.strip() 20 if (line.startswith(">")): 21 chr = line 22 else: 23 chr_len = len(line) 24 chr_seq = line 25 max_chr = chr 26 max_seq = chr_seq 27 max_len = chr_len 28 total_len += len(line) 29 30 # 输出结果 31 print("total_len = " + str(total_len)) 32 print("max_chr = " + max_chr) 33 print("max_seq = " + max_seq) 34 print("max_len = " + str(max_len))
cmd命令行输入
E:15_pythonDEBUG>python fasta_stat6.py genome_test.fa