• 爬取百度图片中的图片(代码)


    import requests
    import os
    import uuid
    
    
    def get_id_uuid1():
        s_uuid = str(uuid.uuid1())
        l_uuid = s_uuid.split('-')
        s_uuid = ''.join(l_uuid)
        return s_uuid
    
    def test(pages_start,page_stop):
        for i in range(30 * pages_start, 30 * page_stop + 30, 30):
            print(i)
    
    #def getManyPages(keyword, pages):
    def getManyPages(keyword, pages_start,page_stop):
        params = []
        for i in range(30 * pages_start, 30 * page_stop + 30, 30):
            # print(i)
            params.append({
                'tn': 'resultjson_com',
                'ipn': 'rj',
                'ct': 201326592,
                'is': '',
                'fp': 'result',
                'queryWord': keyword,
                'cl': 2,
                'lm': -1,
                'ie': 'utf-8',
                'oe': 'utf-8',
                'adpicid': '',
                'st': -1,
                'z': '',
                'ic': 0,
                'word': keyword,
                's': '',
                'se': '',
                'tab': '',
                'width': '',
                'height': '',
                'face': 0,
                'istype': 2,
                'qc': '',
                'nc': 1,
                'fr': '',
                'pn': i,
                'rn': 30,
                'gsm': '3',
                '1550217860355': ''
            })
        url = 'https://image.baidu.com/search/acjson'
        urls = []
        for i in params:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
                'Connection': 'keep-alive',
                'content-type': 'application/json'
            }
            # response = requests.get(url, headers=headers, params=i, timeout=5000)
            # if response.content:
            #     data = response.json().get('data')
            #     urls.append(data)
            # else:
            #     print("出错了!")
            try:
                response = requests.get(url, headers=headers, params=i, timeout=5000)
                data = response.json().get('data')
                urls.append(data)
            except Exception as e:
                print(e)
        return urls
    
    
    def getImg(dataList, localPath):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
            'Connection': 'keep - alive',
            'content-type': 'application/json'
        }
        if not os.path.exists(localPath):  # 新建文件夹
            os.mkdir(localPath)
        for list in dataList:
            for i in list:
                if i.get('thumbURL') != None:
                    print('正在下载:%s' % i.get('thumbURL'))
                    ir = requests.get(i.get('thumbURL'), headers=headers)
                    name = get_id_uuid1()
                    open(localPath + '%s.jpg' % name, 'wb').write(ir.content)
                else:
                    print('图片链接不存在')
    
    
    if __name__ == '__main__':
        keylist = ['小泽玛利亚']
        for i in keylist:
            print(i)
            dataList = getManyPages(i, 1, 30)
            getImg(dataList, '/root/img/')
    
  • 相关阅读:
    final发布用户使用报告
    PSP总结报告
    每周例行报告
    王者荣耀交流协会final发布-第3次scrum立会
    每周例行报告
    王者荣耀交流协会互评Beta版本--爱阅app
    Beta发布用户使用报告
    每周例行报告
    Beta冲刺第二周王者荣耀交流协会第六次会议
    每周例行报告
  • 原文地址:https://www.cnblogs.com/sdhzdtwhm/p/10437018.html
Copyright © 2020-2023  润新知