import re import xlrd f1 = open("v9_c8_a3_a16.txt","w") f2 = open("a9_not_c8a3a16.txt","w") f3 = open("c8_not_v9a3a16.txt","w") f4 = open("a3_not_v9c8a16.txt","w") f5 = open("a16_not_v9c8a3.txt","w") def read(file, sheet_index=0): workbook = xlrd.open_workbook(file) sheet = workbook.sheet_by_index(sheet_index) print("工作表名称:", sheet.name, "行数:", sheet.nrows, "列数:", sheet.ncols) data = [] for i in range(0, sheet.nrows): data.append(sheet.row_values(i)) return data def red(text): with open(text, 'r') as f: file = f.read() regexp = r'MGG_d{5}' pat = re.compile(regexp) MGG_all = re.findall(pat, file) Mgg_unique = set(MGG_all) return Mgg_unique v9 = read(r'zhu.xlsx') c8 = read(r'liu.xlsx') a3 = red(r'ATG3.csv') a16 = red(r'ATG16.csv') def reg(data): regexp = r'MGG_d{5}' pat = re.compile(regexp) MGG_all = re.findall(pat, str(data))#需为string格式 Mgg_unique = set(MGG_all) return Mgg_unique def vps9(): return reg(v9) def cdk8(): return reg(c8) def Atg3(): return reg(a3) def Atg16(): return reg(a16) def Mgg1_Mgg2(): v9 = vps9() c8 = cdk8() a3 = Atg3() a16 = Atg16() v9_c8_a3_a16 = v9&c8&a3&a16 v9_not_c8a3a16 = v9-(c8|a3|a16) c8_not_v9a3a16 = c8-(v9|a3|a16) a3_not_v9c8a16 = a3-(v9|c8|a16) a16_not_v9c8a3 = a16-(v9|a3|c8) return v9_c8_a3_a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16,a16_not_v9c8a3 def message(): v9_c8_a3a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16, a16_not_v9c8a3 = Mgg1_Mgg2() with open('magnaporthe.txt','r') as f: file = f.read() infile = file.split('>') for m in infile: for i in v9_c8_a3a16: if i in m: f1.write(i+' '+m) for i2 in v9_not_c8a3a16: if i2 in m: f2.write(i2+' '+m ) for i3 in c8_not_v9a3a16: if i3 in m: f3.write(i3+' '+m ) for i4 in a3_not_v9c8a16: if i4 in m: f4.write(i4+' '+m ) for i5 in a16_not_v9c8a3: if i5 in m: f5.write(i5+' '+m ) message()