简单爬取汽车之家新闻（requests模块+bs4）

import requests

ret = requests.get('https://www.autohome.com.cn/all/#pvareaid=3311230')
ret.encoding = 'gb2312'
# print(ret.text)

from bs4 import BeautifulSoup

soup = BeautifulSoup(ret.text, 'lxml')
li_list = soup.find_all(name='li')
# print(li_list)
for li in li_list:
    h3 = li.find(name='h3')
    # print(h3)
    if not h3:
        continue
    # 标题
    title = h3.text
    # print(title)
    # 描述
    desc = li.find(name='p').text
    # print(desc)
    # 图片
    img = li.find(name='img')['src']  # type:str
    # print(img)
    # 链接
    url = li.find(name='a')['href']
    # print(url)
    # 图片下载到本地
    ret_imgs = requests.get('https:' + img)
    img_name = img.rsplit('/', 1)[-1]
    with open('bs4_img/' + img_name, 'wb') as f:
        for line in ret_imgs.iter_content():
            f.write(line)
    print('''
    标题：%s
    摘要：%s
    图片：%s
    链接：%s
    ''' % (title, desc, img, url))

相关阅读:
uniapp 环境变量
vue 生命周期
浏览器的工作原理
monent
维基百科
vue 生命周期（二） uniapp
二进制安装K8S kubctl get node 返回No resources found
k8s master节点高可用 nginx+keepalived配置文件
elesticsearch启动
qt多线程内存崩溃

原文地址：https://www.cnblogs.com/godlover/p/12684079.html