import requests, time from lxml import etree def src_tiqu(yeshu): for i in range(1,int(yeshu)+1): try: url = 'https://src.sjtu.edu.cn/list/?i=' + str(i) print('提取->',str(i)+'页数') data = requests.get(url).content # print(data) soup = etree.HTML(data.decode('utf-8')) result = soup.xpath('//td[@class=""]/a/text()') # print(result) results = ' '.join(result) resultss = results.split() #去空 print(resultss) for edu in resultss: with open(r'src.txt', 'a+', encoding='utf-8') as f: f.write(edu + ' ') f.close() except Exception as e: time.sleep(0.5) pass if __name__ == '__main__': yeshu = input("提取多少页:") src_tiqu(yeshu)