爬网页

# coding=utf-8  
  
import lxml, bs4, re, requests  


csvContent=''  
  
# file = open('D:\tyc_demo.htm','rb')  
# soup = bs4.BeautifulSoup(file,'html.parser') 

resultPage = requests.get("http://mp.weixin.qq.com/s/u_WmkE5meMWuZ81G5gHhBQ")
soup = bs4.BeautifulSoup(resultPage.text,'html.parser') 
  
for link in soup.find_all('a'):
    
    if (link.get('href')).startswith('http://mp.weixin.qq.com') :
#         print(link.get('href'))
        resultPage = requests.get(link.get('href'))
         
        tempSoup = bs4.BeautifulSoup(resultPage.text,'html.parser')  
        pics=tempSoup.find_all(attrs={'class': 'rich_media_title'})
        title =pics[0].string.strip()
        
        title= title.replace("计算机程序的思维逻辑","")
        title= title.replace(")","")
        title= title.replace("(","")
        title= title.replace(":","")
        
        title= title.strip()
        if title[-1:]=="/":
            title= title[0:-1]
        
        
        print(title)
        fileName='D:\Java编程的逻辑\'+title+'.html'
         
        with open(fileName,'w',encoding='utf-8') as of:  
          of.write(resultPage.text)

效果如下

相关阅读:
Java 强、弱、软、虚，你属于哪一种？
为啥阿里巴巴规定要在 @Transactional 指定 rollbackFor？
DataGrip 上手体验，真香！
日志框架选型，Logback 还是 Log4j2？
基于OpenLayers和ArcGIS Server REST的要素信息保存功能
基于Openlayers和Arcgis Server REST实现地图(要素)点击查询功能
使用OpenLayers加载ArcGIS Server矢量图层
ArcGIS Server在线编辑功能
使用OpenLayers显示百度地图
如何分析SAP ABAP的SYSTEM_NO_ROLL运行时错误

原文地址：https://www.cnblogs.com/xiang--liu/p/9710363.html