• 线程池爬取汽车之家.py


    import time
    import requests
    #线程池、进程池
    from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
    #多线程:
    from threading import Thread
    #多进程:
    from multiprocessing import Process
    #进程池:
    from multiprocessing import Pool
    from bs4 import BeautifulSoup
    #导入cpu_count查看CPU信息获取本机CPU核数:
    from multiprocessing import cpu_count

    def task(url):
    #format格式化页数:
    response = requests.get("https://www.autohome.com.cn/all/{}/#liststart".format(url))
    #获取编码:
    # print(response.encoding)
    #转码:
    response.encoding = "gbk"
    #获取文本:
    text = response.text
    #解析文本:
    soup = BeautifulSoup(text,"html.parser")
    #获取div:
    div = soup.find(name = "div",attrs={"id":"auto-channel-lazyload-article"})
    #获取img:
    img_list = div.find_all(name = "img")
    #获取第一个链接和长度:
    # print(img_list[0],len(img_list))
    print(response.url)
    for i in img_list:
    print("https:" + i.get("src"))
    break

    if __name__ == '__main__':
    """进程池一般开CPU核数、线程池开CPU核数的2-5倍、"""
    # print(cpu_count())
    stat = time.time()
    #开启线程池、4核是4进程乘以2总共是8个线程:
    t = ThreadPoolExecutor(max_workers=cpu_count() * 2)
    for i in range(1,110):
    t.submit(task,i)
    t.shutdown()
    print("耗时:%s" %(time.time() - stat))
  • 相关阅读:
    通过git向github提交项目
    git连接github mac
    char如何储存3个字节或者4个字节
    centOS 7安装jdk
    在移动端语言react中使用video.js
    小程序自定义头部navbar组件
    git常用指令汇总学习
    react表单
    react从入门到熟悉(回顾react)
    react生命周期
  • 原文地址:https://www.cnblogs.com/zhang-da/p/12215518.html
Copyright © 2020-2023  润新知