• 爬百度图片


    #!/usr/bin/env python
    # _*_ coding: utf-8 _*_
    # @Time : 2022/9/8 14:31
    # @Author : AndyXi
    # @Version:V 0.1
    # @File : 爬img.py
    # @desc :

    import json
    from datetime import datetime
    import time
    import requests
    from tqdm import tqdm

    def get_filename():
    return datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")


    if __name__ == "__main__":
    word = input("请输入要爬取的关键字: ")
    page_size = int(input("请输入要爬取的张数: "))

    header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
    }


    res_img = requests.get(f"https://image.baidu.com/search/acjson?tn=resultjson_com&logid=8057700054872665483&ipn=rj&ct=201326592&is="
    f"&fp=result&fr=&word={word}"
    f"&cg=star&queryWord={word}"
    "&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=&copyright=&s=&se=&tab=&width="
    "&height=&face=0&istype=2&qc=&nc=1&expermode="
    f"&nojc=&isAsync=&pn=60&rn={page_size}&gsm=3c&1662621074446=",
    headers=header)
    res_dic = json.loads(res_img.text)

    ################以下能实现功能,但性能不行#######################
    # i=1
    # for item in res_dic["data"]:
    # img_url = item.get("thumbURL", "")
    # img_data = requests.get(img_url,headers=header)
    # with open(f"downlod_img/{get_filename()}.jpg","wb") as f:
    # print(f"正在下载第{i}张图片")
    # f.write(img_data.content)
    # i+=1
    # time.sleep(1)

    ###################################进度条功能及内存使用优化功能######################
    for item in res_dic["data"]:
    img_url = item.get("thumbURL","")
    img_data = requests.get(img_url,headers=header,stream=True)
    if "content-length" in img_data.headers:
    ###获取图片大小,大小为b
    content_size = int(img_data.headers["content-length"])
    img_name = get_filename() + ".jpg"
    with open(f"downlod_img/{get_filename()}.jpg","wb") as f,tqdm(desc=img_name,total=content_size) as bar:
    for chunk in img_data.iter_content(chunk_size=1024):
    if chunk:
    f.write(chunk)
    bar.update(len(chunk))
  • 相关阅读:
    一种可以实时检测IP地址合法性的EditText输入框
    LVDS 屏幕 M215HGE-L21 在 rk3288 上的适配过程
    轻读一下 Android 应用开发中的 assets 目录
    XML与其在Android下的解析
    Linux Shell脚本实现根据进程名杀死进程
    RSA host key has changed 错误
    Linux下安装jdk8步骤详述
    Windows/Linux javac/java编译运行引入所需的jar包
    No cached version of ..... available for offline mode.
    Java学习之InputStream中read()与read(byte[] b)
  • 原文地址:https://www.cnblogs.com/chinaops/p/16670012.html
Copyright © 2020-2023  润新知