import requests
import os
import uuid
def get_id_uuid1():
s_uuid = str(uuid.uuid1())
l_uuid = s_uuid.split('-')
s_uuid = ''.join(l_uuid)
return s_uuid
def test(pages_start,page_stop):
for i in range(30 * pages_start, 30 * page_stop + 30, 30):
print(i)
#def getManyPages(keyword, pages):
def getManyPages(keyword, pages_start,page_stop):
params = []
for i in range(30 * pages_start, 30 * page_stop + 30, 30):
# print(i)
params.append({
'tn': 'resultjson_com',
'ipn': 'rj',
'ct': 201326592,
'is': '',
'fp': 'result',
'queryWord': keyword,
'cl': 2,
'lm': -1,
'ie': 'utf-8',
'oe': 'utf-8',
'adpicid': '',
'st': -1,
'z': '',
'ic': 0,
'word': keyword,
's': '',
'se': '',
'tab': '',
'width': '',
'height': '',
'face': 0,
'istype': 2,
'qc': '',
'nc': 1,
'fr': '',
'pn': i,
'rn': 30,
'gsm': '3',
'1550217860355': ''
})
url = 'https://image.baidu.com/search/acjson'
urls = []
for i in params:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
'Connection': 'keep-alive',
'content-type': 'application/json'
}
# response = requests.get(url, headers=headers, params=i, timeout=5000)
# if response.content:
# data = response.json().get('data')
# urls.append(data)
# else:
# print("出错了!")
try:
response = requests.get(url, headers=headers, params=i, timeout=5000)
data = response.json().get('data')
urls.append(data)
except Exception as e:
print(e)
return urls
def getImg(dataList, localPath):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
'Connection': 'keep - alive',
'content-type': 'application/json'
}
if not os.path.exists(localPath): # 新建文件夹
os.mkdir(localPath)
for list in dataList:
for i in list:
if i.get('thumbURL') != None:
print('正在下载:%s' % i.get('thumbURL'))
ir = requests.get(i.get('thumbURL'), headers=headers)
name = get_id_uuid1()
open(localPath + '%s.jpg' % name, 'wb').write(ir.content)
else:
print('图片链接不存在')
if __name__ == '__main__':
keylist = ['小泽玛利亚']
for i in keylist:
print(i)
dataList = getManyPages(i, 1, 30)
getImg(dataList, '/root/img/')