# coding: utf8 import requests from bs4 import BeautifulSoup PhotoName = 1 DATA = [] def save_img(url,name): ##保存图片 # print('开始保存图片...') img = requests.get(url) file_name = name+ '.jpg' #print('开始保存文件') f = open(file_name, 'ab') f.write(img.content) print(url,'is ok!') f.close() #print(res.text) html_doc = res.text soup = BeautifulSoup(html_doc, 'lxml').find_all('img') #声明BeautifulSoup对象 #for m in soup: # print(m['src']) for n in range(68): try: res = requests.get('http://qq.yh31.com/zjbq/0551964_'+str(n+2)+'.html',timeout=15) except: #(可以自己添加错误类型) pass #print(res.text) soup = BeautifulSoup(res.text, 'lxml').find_all('img',alt="")#class_='c_content_overflow' mess ='第' +str(n+2)+'页,爬取图片地址'+ str(len(soup))+'张...' print(mess) for vl in soup: DATA.append('http://x.yh31.com:85'+vl['src']) PhotoName+=1 save_img('http://x.yh31.com:85'+vl['src'],str(PhotoName))
#取标签内值 vl.string
了解 requests 库 简单获取网页数据 ,然后利用bs4来获取到我们想要的数据结构。