• python爬取图片


    一、给定url,直接下载到本地

    import re
    import requests
    from bs4 import BeautifulSoup
    
    
    def get_gif(url, a):
        response = requests.get(url)
        # 自己电脑上的文件夹路径,默认命名 a.gif
        with open("C:\Users\acm\Desktop\新建文件夹\%d.gif" % a, 'wb') as file:
            file.write(response.content)
    
    
    if __name__ == '__main__':
        # url 获取动态图片地址
        url = 'http://game.gtimg.cn/images/nz/cp/a20201117decbeta/m1-prop1.gif'
        get_gif(url, 1)
    View Code

     二、给定网页链接url,按照规律爬取网页上的所有图片

    import re
    import requests
    from bs4 import BeautifulSoup
    
    
    def get_url(url):
        # 获取网页链接
        response = requests.get(url)
        response.encoding = 'utf-8'
        # print(response.text)
        # 根据正则表达式查找一系列url地址, 即下面括号中的部分, 这根据网络代码中的规律来自行更改
        url_addr = r'<img src="(.*?)" alt=".*?">'
        # 从网页上查找所有符合条件的图片链接
        url_list = re.findall(url_addr, response.text)
        return url_list
    
    
    def get_photo(url, a):
        response = requests.get(url)
        # 命名为 a.jpg 下载到本地
        with open("C:\Users\acm\Desktop\新建文件夹\%d.jpg" % a, 'wb') as file:
            file.write(response.content)
    
    
    if __name__ == '__main__':
        # 网页链接url
        url = 'http://www.netbian.com/'
        url_list = get_url(url)
        a = 1
        for url in url_list:
            get_photo(url, a)
            a += 1
    View Code

     三、加上了网页的headers和文件操作

    import re
    import os
    import requests
    from bs4 import BeautifulSoup
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
    }
    
    if __name__ == '__main__':
        url = 'http://www.netbian.com/weimei/'
        response = requests.get(url, headers)
        response.encoding = 'utf-8'
        urls = re.findall('img src="(.*?)" alt=".*?"', response.text)
        print(urls)
        a = 1
        for i in urls:
            response = requests.get(i)
            dir_name = 'photos'
            if not os.path.exists(dir_name):
                os.mkdir(dir_name)
            file_name = str(a) + '.jpg'
            with open(dir_name + '/' + file_name, 'wb') as file:
                file.write(response.content)
            a += 1
    View Code
  • 相关阅读:
    Python常用第三方库总结
    Python爬虫技术--入门篇--爬虫介绍
    X sql解惑 25 里程碑问题 答案
    X sql解惑 34 咨询顾问收入问题
    从小变大的照片
    获取属性的顺序
    for...in
    判断元素是否存在
    自由的元素名称
    ES6语法糖-简洁属性表示
  • 原文地址:https://www.cnblogs.com/cherish-lin/p/14073901.html
Copyright © 2020-2023  润新知