001、读取fasta文件
root@PC1:/home/test# ls a.fasta root@PC1:/home/test# cat a.fasta ## 测试数据 >Rosalind_1 ATCCAGCT >Rosalind_2 GGGCAACT >Rosalind_3 ATGGATCT >Rosalind_4 AAGCAACC >Rosalind_5 TTGGAACT >Rosalind_6 ATGCCATT >Rosalind_7 ATGGCACT root@PC1:/home/test# python ## 启动python Python 3.10.4 (main, Mar 31 2022, 08:41:55) [GCC 7.5.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import pysam ## 导入pysam包 >>> in_file = pysam.FastxFile("a.fasta") ## 读取数据,生成可迭代对象 >>> for i in in_file: ... print(i) ... >Rosalind_1 ATCCAGCT >Rosalind_2 GGGCAACT >Rosalind_3 ATGGATCT >Rosalind_4 AAGCAACC >Rosalind_5 TTGGAACT >Rosalind_6 ATGCCATT >Rosalind_7 ATGGCACT
>>> in_file = pysam.FastxFile("a.fasta") >>> for i in in_file: ... print(i.name) ## 输出scaffold ... Rosalind_1 Rosalind_2 Rosalind_3 Rosalind_4 Rosalind_5 Rosalind_6 Rosalind_7 >>> in_file = pysam.FastxFile("a.fasta") >>> for i in in_file: ... print(i.sequence) ## 输出序列 ... ATCCAGCT GGGCAACT ATGGATCT AAGCAACC TTGGAACT ATGCCATT ATGGCACT
002、
(base) root@PC1:/home/test# ls a.fasta b.fasta (base) root@PC1:/home/test# cat b.fasta ## 测试fasta文件 >Rosalind_1 ATCCAGCT ATCCAGCT TTTTT >Rosalind_2 GGGCAACT GGGCAACT GGGCAACT >Rosalind_3 ATGGATCT ATGGATCT ATGGATCT (base) root@PC1:/home/test# python ## 启动python Python 3.10.4 (main, Mar 31 2022, 08:41:55) [GCC 7.5.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import pysam ## 导入pysam包 >>> in_file = pysam.FastxFile("b.fasta") >>> for i in in_file: ... print(i) ## 将多行序列合并为一行 ... >Rosalind_1 ATCCAGCTATCCAGCTTTTTT >Rosalind_2 GGGCAACTGGGCAACTGGGCAACT >Rosalind_3 ATGGATCTATGGATCTATGGATCT
参考:https://mp.weixin.qq.com/s?__biz=MzIxMjQxMDYxNA==&mid=2247484172&idx=1&sn=d8dec9ae5ffea81ef02e8f0d7ea4672b&chksm=9747ca95a030438313f483f6c62c9c32551e23682f98be6868edf423ea88180165e21c5dedc8&scene=178&cur_album_id=1635727573621997580#rd