• python 爬取百度图片


    import requests
    from bs4 import BeautifulSoup
    import re
    import os
    import json
    from urllib import parse
    headers='''
    Accept-Ranges: bytes
    Access-Control-Allow-Origin: *
    Age: 570820
    Cache-Control: max-age=2628000
    Connection: keep-alive
    Content-Length: 45163
    Content-Type: image/jpeg
    Date: Sat, 11 May 2019 06:17:00 GMT
    ETag: 3448023fd5dc275ff4088c50d1da7d5f
    Expires: Tue, 04 Jun 2019 01:43:20 GMT
    Last-Modified: Thu, 01 Jan 1970 00:00:00 GMT
    Ohc-Response-Time: 1 0 0 0 0 0
    Server: JSP3/2.0.14
    '''

    class DownBaiDuImg(object):
    listheader='''
    Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
    Accept-Encoding: gzip, deflate
    Accept-Language: zh-CN,zh;q=0.9,en;q=0.8
    Cache-Control: max-age=0
    Connection: keep-alive
    Cookie: BDIMGISLOGIN=0; winWH=%5E6_1366x631; BDqhfp=%E6%AF%94%E5%9F%BA%E5%B0%BC%26%26-10-1undefined%26%260%26%261; BAIDUID=ED5602028E2013468035151C8C3C3A53:FG=1; BIDUPSID=ED5602028E2013468035151C8C3C3A53; PSTM=1552569672; BDSFRCVID=ZoFOJeC62GC4q3c9ZolNh5mNHGcamB3TH6aoUWSSBZNRGvSy07o7EG0PqU8g0Kub55HBogKK0mOTHv8F_2uxOjjg8UtVJeC6EG0P3J; H_BDCLCKID_SF=tJAq_D0hfIP3fP36q45Mq4tHen6y0fRZ5mAqoq3nJPD5HITLhPvFM5LDX47x5-oL0J7naIQqaM5RVUOtWxTCQnK92H0f25b43bRTQxKy5KJvfJ_Gjf7IhP-UyN3LWh37bJblMKoaMp78jR093JO4y4Ldj4oxJp8eWJQ2QJ8BJI02MDJP; BDUSS=k5MTWt1V2RvRHRBMVBrUVFMeURRY243ZWRMNDEtMkg1Mm94VnNYcVp5cUh5cmxjQVFBQUFBJCQAAAAAAAAAAAEAAAA64oOWs8y36rPYMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIc9klyHPZJcW; uploadTime=1557547291054; cleanHistoryStatus=0; BDRCVFR[Tp5-T0kH1pb]=mk3SLVN4HKm; delPer=0; PSINO=1; BDRCVFR[CCf63Vmik7b]=mk3SLVN4HKm; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; H_PS_PSSID=1441_28939_28981_21126_28519_28775_28723_28963_28836_28585_26350_22157; indexPageSugList=%5B%22%E6%AF%94%E5%9F%BA%E5%B0%BC%22%2C%22%E7%BE%8E%E5%A5%B3%22%5D
    Host: image.baidu.com
    Upgrade-Insecure-Requests: 1
    User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36
    '''
    """docstring for DownBaiDuImg"""
    def __init__(self,header,kw):
    super(DownBaiDuImg, self).__init__()
    self.heades=self.gen_headers(header)
    self.num=0
    self.kw=parse.quote(kw)

    def gen_headers(self,s):
    ls = s.split(' ')
    lsl = []
    ls = ls[1:-1]
    headers = {}
    for l in ls:
    l = l.split(': ')
    lsl.append(l)
    for x in lsl:
    headers[str(x[0]).strip(' ')] = x[1]
    return headers

    def downimg(self,url,name):
    try:
    content=requests.get(url,timeout=2).content
    with open('../images/'+name,'wb') as f:
    f.write(content)
    f.close()
    return True
    except Exception as e:
    return False
    else:
    pass
    finally:
    pass

    def doing(self,page):
    listheader=self.gen_headers(self.listheader);
    page=str(page)
    # print('http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord='+self.kw+'&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&word='+self.kw+'&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&cg=girl&pn=60&rn='+page)
    text=requests.get('http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord='+self.kw+'&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word='+self.kw+'&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&cg=girl&pn=60&rn='+page,headers=listheader)
    text=text.text
    if json.loads(text)['data']:
    for x in json.loads(text)['data']:
    if 'thumbURL' in x.keys():
    h=parse.quote(x['hoverURL'])
    g=parse.quote(x['thumbURL'])
    imgurl='http://image.baidu.com/search/down?tn=download&ipn=dwnl&word=download&ie=utf8&fr=result&url='+h+'&thumburl='+g
    exe=os.path.splitext(imgurl)[-1]
    self.num+=1
    filename=str(self.num)+exe;
    if self.downimg(imgurl,filename):
    print('下载成功')
    else:
    print('下载失败')
    obj=DownBaiDuImg(headers,'绿色护眼壁纸大全')
    for x in range(0,5):
    obj.doing(x*30)

  • 相关阅读:
    SCILAB简介[z]
    UG OPEN API编程基础 2约定及编程初步
    Office 2003与Office 2010不能共存的解决方案
    UG OPEN API 编程基础 3用户界面接口
    NewtonRaphson method
    UG OPEN API编程基础 13MenuScript应用
    UG OPEN API编程基础 14API、UIStyler及MenuScript联合开发
    UG OPEN API编程基础 4部件文件的相关操作
    UG OPEN API编程基础 1概述
    16 UG Open的MFC应用
  • 原文地址:https://www.cnblogs.com/chengfengchi/p/10849864.html
Copyright © 2020-2023  润新知