爬取百度图片中的图片（代码）

import requests
import os
import uuid


def get_id_uuid1():
    s_uuid = str(uuid.uuid1())
    l_uuid = s_uuid.split('-')
    s_uuid = ''.join(l_uuid)
    return s_uuid

def test(pages_start,page_stop):
    for i in range(30 * pages_start, 30 * page_stop + 30, 30):
        print(i)

#def getManyPages(keyword, pages):
def getManyPages(keyword, pages_start,page_stop):
    params = []
    for i in range(30 * pages_start, 30 * page_stop + 30, 30):
        # print(i)
        params.append({
            'tn': 'resultjson_com',
            'ipn': 'rj',
            'ct': 201326592,
            'is': '',
            'fp': 'result',
            'queryWord': keyword,
            'cl': 2,
            'lm': -1,
            'ie': 'utf-8',
            'oe': 'utf-8',
            'adpicid': '',
            'st': -1,
            'z': '',
            'ic': 0,
            'word': keyword,
            's': '',
            'se': '',
            'tab': '',
            'width': '',
            'height': '',
            'face': 0,
            'istype': 2,
            'qc': '',
            'nc': 1,
            'fr': '',
            'pn': i,
            'rn': 30,
            'gsm': '3',
            '1550217860355': ''
        })
    url = 'https://image.baidu.com/search/acjson'
    urls = []
    for i in params:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
            'Connection': 'keep-alive',
            'content-type': 'application/json'
        }
        # response = requests.get(url, headers=headers, params=i, timeout=5000)
        # if response.content:
        #     data = response.json().get('data')
        #     urls.append(data)
        # else:
        #     print("出错了！")
        try:
            response = requests.get(url, headers=headers, params=i, timeout=5000)
            data = response.json().get('data')
            urls.append(data)
        except Exception as e:
            print(e)
    return urls


def getImg(dataList, localPath):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
        'Connection': 'keep - alive',
        'content-type': 'application/json'
    }
    if not os.path.exists(localPath):  # 新建文件夹
        os.mkdir(localPath)
    for list in dataList:
        for i in list:
            if i.get('thumbURL') != None:
                print('正在下载：%s' % i.get('thumbURL'))
                ir = requests.get(i.get('thumbURL'), headers=headers)
                name = get_id_uuid1()
                open(localPath + '%s.jpg' % name, 'wb').write(ir.content)
            else:
                print('图片链接不存在')


if __name__ == '__main__':
    keylist = ['小泽玛利亚']
    for i in keylist:
        print(i)
        dataList = getManyPages(i, 1, 30)
        getImg(dataList, '/root/img/')

相关阅读:
final发布用户使用报告
 PSP总结报告
 每周例行报告
 王者荣耀交流协会final发布-第3次scrum立会
 每周例行报告
 王者荣耀交流协会互评Beta版本--爱阅app
Beta发布用户使用报告
 每周例行报告
 Beta冲刺第二周王者荣耀交流协会第六次会议
 每周例行报告
原文地址：https://www.cnblogs.com/sdhzdtwhm/p/10437018.html