• 爬qqhentai


    import requests
    from bs4 import BeautifulSoup
    import time
    import re
    import os
    import random

    agentlist = ["Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0","Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"]



    def get_nomal_headers():
    headers = {
    "User-Agent":random.choice(agentlist)
    }
    # print(headers)
    return headers
    def get_pages():
    url1 = "https://zh.qqhentai.com/g/334792/list2/cdnwp/"
    headers = get_nomal_headers()
    r = requests.get(url1,headers = headers)
    soup = BeautifulSoup(r.text,"html.parser")
    # print(soup)
    container = soup.find("section",id="image-container")
    img_list = container.find_all("img",class_="list-img lazyload")
    # print(img_list)
    for i in img_list:
    page = i["alt"]
    page = ''.join(page.split(" "))
    imgurl = i["data-src"]

    downloadimg(page,imgurl)

    def downloadimg(page,imgurl):
    imgpath = (page + ".jpg")
    if os.path.exists(imgpath):
    print("已存在"+page + ".jpg")
    pass
    else:
    headers = get_nomal_headers()
    r = requests.get(imgurl,headers = headers)
    print("downloading"+page)
    fin = open(imgpath, "wb")
    fin.write(r.content)
    fin.flush()
    time.sleep(0.2)

    get_pages()



  • 相关阅读:
    php解决与处理网站高并发 大流量访问的方法
    mysql事务和锁InnoDB
    从一个死锁看mysql innodb的锁机制
    Git如何删除自己创建的项目
    公众号的坑
    字符串转Unicode码
    字符串转UTF-8码(%开头)
    git介绍和使用
    ng2中文文档地址
    两个数组的排序方法
  • 原文地址:https://www.cnblogs.com/lijifei/p/13946976.html
Copyright © 2020-2023  润新知