import codecs import requests from bs4 import BeautifulSoup import re from lxml import etree url = 'https://jbk.jiankang.com/zhengzhuang/pinyin-z/?p=5' r = requests.get(url).text sym = re.findall('<a href="/zhengzhuang/[0-9]{1,5}.*(?=</a>)',r) print(sym) for line in sym: print(line) with codecs.open('症状.txt','a','utf8') as f: for line in sym: f.write(line+' ')