• python爬取百度图片


    import requests
    import re
    from urllib import parse
    import os
    from threading import Thread
    
    def download(i,j,key,url):
        header = {'content-type': 'application/json',
                 "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
                  "Connection":"keep-alive",
                  "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                  "Accept-Language":"zh-CN,zh;q=0.8"
                  }
        response = requests.get(url,headers=header)
    
        link = re.findall(r'"objURL":"(.*?)"',response.text,re.S)
        if not os.path.exists(key):
            os.mkdir("./"+key+"/")
        for web in link:
            url = decodeurl(web)
            print(url)
    
            try:
                #allow_redirects=False 关闭重定向
                pic = requests.get(url,timeout=10,headers=header,allow_redirects=False)
                dirfile = "./"+key+"/" +key + '_' + str(j) + '.jpg'
                fp = open(dirfile, 'wb')
                fp.write(pic.content)
                fp.close()
                j += 1
            except requests.exceptions.ConnectionError:
                print(web,"【错误】当前图片无法下载")
                continue
            except requests.exceptions.ReadTimeout:
                print(web, "【错误】超时")
                continue
            except requests.exceptions.ChunkedEncodingError:
                print(web, "【错误】远程主机强迫关闭了一个现有的连接")
                continue
    
    
    def decodeurl(url):
        str_table = {
        '_z2C$q': ':',
        '_z&e3B': '.',
        'AzdH3F': '/'
        }
        char_table = {
            'w': 'a',
            'k': 'b',
            'v': 'c',
            '1': 'd',
            'j': 'e',
            'u': 'f',
            '2': 'g',
            'i': 'h',
            't': 'i',
            '3': 'j',
            'h': 'k',
            's': 'l',
            '4': 'm',
            'g': 'n',
            '5': 'o',
            'r': 'p',
            'q': 'q',
            '6': 'r',
            'f': 's',
            'p': 't',
            '7': 'u',
            'e': 'v',
            'o': 'w',
            '8': '1',
            'd': '2',
            'n': '3',
            '9': '4',
            'c': '5',
            'm': '6',
            '0': '7',
            'b': '8',
            'l': '9',
            'a': '0'
        }
        char_table = {ord(key): ord(value) for key, value in char_table.items()}
        for key,value in str_table.items():
            url = url.replace(key,value)
        url = url.translate(char_table)
        return url
    
    def main():
        j = 0
        key = "树叶标本"
        data = parse.quote(str(key))
        
        for i in range(j,2000,30):
            if j == 0:
                j +=1
            url = "http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord+=&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&word="+data+"&z=&ic=&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=1&fr=&step_word="+data+"&pn="+str(i)+"&rn=30&gsm=3c&1527055161957="
            download(i,j,key,url)
            j += 30
    
    if __name__ == "__main__":
        main()
  • 相关阅读:
    头像切换封装
    JSON数据的序列化方法
    HTML5 LocalStorage 本地存储
    onhashchange实现下一页与上一页功能,并且实现当前页面刷新时停留在当前页面
    提交页面可输入的数据-----XSS漏洞
    js冒泡排序
    html实现将网页页面分享到微信朋友圈添加缩略图图片的方法
    js常用正则表达式
    C 传递指针给函数
    C 指向指针的指针
  • 原文地址:https://www.cnblogs.com/xypbk/p/9099168.html
Copyright © 2020-2023  润新知