001、
(base) root@PC1:/home/test2# ls a.fasta test.py (base) root@PC1:/home/test2# head a.fasta ## 测试数据 >scaffold_1 CCCGGGTAAAACGGGTCTTCAAGAAAACGCTCCTCCGTTAATGCCGGCCGATTCAAATAA CCTCTGGCAACACCCGCTCCGGCAATGTATAGTTCACCGATACATCCAACAGGCAGCATC CGCTGATTCTGATTCAGGATATACAATCTGACATGATGAACAGGTTTTCCAATTGGAATC CGTTCAAGTTTTTCTTGCGGCGGACAATCAAAGAATGCAGCTTCTACGGTTGCTTCCGTT GGCCCATAGGAATTGGTTATTGAAACATTTGGAAGCAACACGTGAAATCGGGAGACAAGA TGGGTCCCCAGCTGTTCTCCCCCAGAAAACACTCGCTTGAGTCTGTTTGTCTTAATCGGT ACAGAGCGATATTTTATATGTTCTAAAAATGCATGGAGCATTGAAGGCACAAAATGCATA GCTGTGATCTTTTGTTCTTCTATGGCCTTCGCGATCACTTCAGGTTCTTTTTCGCCTCCC TGAGGAAGCAGATAAACAGAAGCTCCGGCATAAGGCCACCAAAACAGCTCCCATATTGAA (base) root@PC1:/home/test2# cat test.py ## 测试程序 #!/usr/bin/python import re in_file = open("a.fasta", "r") out_file = open("result.txt", "w") dict1 = dict() for i in in_file: i = i.strip() if i[0] == ">": key = i dict1[key] = "" else: dict1[key] += i step = 5000 print("id", "start", "end", "a", "c", "g", "t", "cg", file = out_file, sep = "\t") for i,j in dict1.items(): length = len(j) start = 0 if length < step and start == 0: block = j[::] a = len(re.findall("[aA]", block)) c = len(re.findall("[cC]", block)) g = len(re.findall("[gG]", block)) t = len(re.findall("[tT]", block)) cg = len(re.findall("[cCgG]", block)) len_block = len(block) print(i, 1, len(j), a/len_block, c/len_block, g/len_block, t/len_block, cg/len_block, file = out_file, sep = "\t") while length > step: end = start + step block = j[start:end] a = len(re.findall("[aA]", block)) c = len(re.findall("[cC]", block)) g = len(re.findall("[gG]", block)) t = len(re.findall("[tT]", block)) cg = len(re.findall("[cCgG]", block)) len_block = len(block) print(i, start + 1, end, a/len_block, c/len_block, g/len_block, t/len_block, cg/len_block, file = out_file, sep = "\t") start += step length -= step if length < step: block = j[end::] a = len(re.findall("[aA]", block)) c = len(re.findall("[cC]", block)) g = len(re.findall("[gG]", block)) t = len(re.findall("[tT]", block)) cg = len(re.findall("[cCgG]", block)) len_block = len(block) print(i, end, len(j), a/len_block, c/len_block, g/len_block, t/len_block, cg/len_block, file = out_file, sep = "\t") in_file.close() out_file.close() (base) root@PC1:/home/test2# python test.py ## 运行程序 (base) root@PC1:/home/test2# ls a.fasta result.txt test.py (base) root@PC1:/home/test2# head result.txt ## 运行结果 id start end a c g t cg >scaffold_1 1 5000 0.2562 0.2254 0.2242 0.2942 0.4496 >scaffold_1 5001 10000 0.2416 0.233 0.2286 0.2968 0.4616 >scaffold_1 10001 15000 0.249 0.2544 0.221 0.2756 0.4754 >scaffold_1 15001 20000 0.2556 0.2422 0.2158 0.2864 0.458 >scaffold_1 20001 25000 0.2604 0.2352 0.2104 0.294 0.4456 >scaffold_1 25001 30000 0.2524 0.2404 0.219 0.2882 0.4594 >scaffold_1 30001 35000 0.3002 0.219 0.1934 0.2874 0.4124 >scaffold_1 35001 40000 0.287 0.219 0.2126 0.2814 0.4316 >scaffold_1 40001 45000 0.2424 0.25 0.2096 0.298 0.4596
参考:https://mp.weixin.qq.com/s?__biz=MzIxNzc1Mzk3NQ==&mid=2247491497&idx=1&sn=5759613bc25c175b71aaf5a5091d0a51&chksm=97f5afb1a08226a7b9eac750c7df99fdd6b5ec110f2f21e79ee0ef542caf93c8105d9b5358c4&scene=178&cur_album_id=2403674812188688386#rd