爬虫所用网址:https://www.h128.com/list/animal/0/0/1/4/t/1.html
共13页免费图片,就是网址的最后数字从1变化到13了
总是看别人的,自己不动手真不知道会出现那么多错误,多练才是王道啊
直接上代码了,想直接复制就能运行的 记得现在D盘创建PIC 文件夹(D:PIC),运行后直接保存到PIC文件夹内
import requests
import re
import time
def get_one_page(url):
headers = {"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0"}
res = requests.get(url,headers=headers)
time.sleep(1)
if res.status_code == 200:
#print(res.text)
return res.text
return None
def parse_one_page(text):
pattern = '<img src="(.*?)?'
url_list = re.findall(pattern, text)
#print(url_list)
for url in url_list:
res = requests.get(url)
#print(res.text)
path = 'D:PIC' + "\" + url.split('/')[-1]
with open(path, 'wb') as f:
f.write(res.content)
f.close()
print(f'{path}--保存成功。')
def main():
for i in range(1, 14):
url = "https://www.h128.com/list/animal/0/0/1/4/t/" + str(i) + ".html"
text = get_one_page(url)
#print(text)
parse_one_page(text)
main()
参考链接:https://blog.csdn.net/Six23333/article/details/107965742