# coding=utf-8
import lxml, bs4, re, requests
csvContent=''
# file = open('D:\tyc_demo.htm','rb')
# soup = bs4.BeautifulSoup(file,'html.parser')
resultPage = requests.get("http://mp.weixin.qq.com/s/u_WmkE5meMWuZ81G5gHhBQ")
soup = bs4.BeautifulSoup(resultPage.text,'html.parser')
for link in soup.find_all('a'):
if (link.get('href')).startswith('http://mp.weixin.qq.com') :
# print(link.get('href'))
resultPage = requests.get(link.get('href'))
tempSoup = bs4.BeautifulSoup(resultPage.text,'html.parser')
pics=tempSoup.find_all(attrs={'class': 'rich_media_title'})
title =pics[0].string.strip()
title= title.replace("计算机程序的思维逻辑","")
title= title.replace(")","")
title= title.replace("(","")
title= title.replace(":","")
title= title.strip()
if title[-1:]=="/":
title= title[0:-1]
print(title)
fileName='D:\Java编程的逻辑\'+title+'.html'
with open(fileName,'w',encoding='utf-8') as of:
of.write(resultPage.text)
效果如下