001、
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat test.py ## 测试程序 #1/usr/bin/python in_file = open("a.fasta", "r") dict1 = dict() for i in in_file: i = i.strip() if i[0] == ">": key = i dict1[key] = [] else: dict1[key].append(i[::-1]) for i,j in dict1.items(): print(i) for k in j: print(k) in_file.close() root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >gene1 myc AGCTGCCTAAGC GGCATAGCTAATCG >gene2 jun ACCGAATCGGAGCGATG GGCATTAAAGATCTAGCT >gene3 malat1 AGGCTAGCGAG GCGCGAG GATTAGGCG root@PC1:/home/test# python test.py ## 执行程序 >gene1 myc CGAATCCGTCGA GCTAATCGATACGG >gene2 jun GTAGCGAGGCTAAGCCA TCGATCTAGAAATTACGG >gene3 malat1 GAGCGATCGGA GAGCGCG GCGGATTAG
002、同时去互补序列
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat test.py ## 测试程序 #1/usr/bin/python in_file = open("a.fasta", "r") dict1 = dict() for i in in_file: i = i.strip() if i[0] == ">": key = i dict1[key] = [] else: temp = i[::-1].replace("A","t").replace("T","a").replace("G","c").replace("C","g").upper() dict1[key].append(temp) for i,j in dict1.items(): print(i) for k in j: print(k) in_file.close() root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >gene1 myc AGCTGCCTAAGC GGCATAGCTAATCG >gene2 jun ACCGAATCGGAGCGATG GGCATTAAAGATCTAGCT >gene3 malat1 AGGCTAGCGAG GCGCGAG GATTAGGCG root@PC1:/home/test# python test.py ## 执行程序 >gene1 myc GCTTAGGCAGCT CGATTAGCTATGCC >gene2 jun CATCGCTCCGATTCGGT AGCTAGATCTTTAATGCC >gene3 malat1 CTCGCTAGCCT CTCGCGC CGCCTAATC
003、同时指定每行输出的碱基数目
root@PC1:/home/test# ls a.fasta test.py root@PC1:/home/test# cat test.py ## 测试程序 #1/usr/bin/python in_file = open("a.fasta", "r") dict1 = dict() for i in in_file: i = i.strip() if i[0] == ">": key = i dict1[key] = [] else: temp = i[::-1].replace("A","t").replace("T","a").replace("G","c").replace("C","g").upper() dict1[key].append(temp) for i,j in dict1.items(): print(i) j = "".join(j) for k in range(0, len(j), 6): print(j[k:k+6]) in_file.close() root@PC1:/home/test# cat a.fasta ## 测试fasta文件 >gene1 myc AGCTGCCTAAGC GGCATAGCTAATCG >gene2 jun ACCGAATCGGAGCGATG GGCATTAAAGATCTAGCT >gene3 malat1 AGGCTAGCGAG GCGCGAG GATTAGGCG root@PC1:/home/test# python test.py ## 执行程序 >gene1 myc GCTTAG GCAGCT CGATTA GCTATG CC >gene2 jun CATCGC TCCGAT TCGGTA GCTAGA TCTTTA ATGCC >gene3 malat1 CTCGCT AGCCTC TCGCGC CGCCTA ATC
参考:https://www.jianshu.com/p/5ee54bea4cb0