1 import urllib.request 2 from bs4 import BeautifulSoup 3 import re 4 5 def main(): 6 response= urllib.request.urlopen('http://baike.baidu.com/view/284853.htm').read() 7 soup = BeautifulSoup(response,'html.parser')#使用python默认的解析器 8 for each in soup.find_all(href = re.compile('view')): 9 print(each.text,'->',''.join(['http://baike.baidu.com/',each['href']]))#join函数明显比+提高 10 if __name__=='__main__': 11 main()