...
import requests from bs4 import BeautifulSoup import os import sys class mzitu(): def html(self, href): html = self.request(href) a = BeautifulSoup(html.text, 'lxml').find('h2', class_='main-title') title = a.get_text() print(u'开始保存:', title) path = str(title) self.mkdir(path) max_span = BeautifulSoup(html.text, 'lxml').find('div', class_='pagenavi').find_all('span')[-2].get_text() for page in range(1, int(max_span) + 1): page_url = href + '/' + str(page) self.img(page_url) def img(self, page_url): img_html = self.request(page_url) img_url = BeautifulSoup(img_html.text, 'lxml').find('div', class_='main-image').find('img')['src'] self.save(img_url) def save(self, img_url): name = img_url[-6:-4] img = self.request(img_url) with open(name + '.jpg','ab') as f: f.write(img.content) def request(self, url): headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"} content = requests.get(url, headers=headers) return content def mkdir(self, path): ##这个函数创建文件夹 path = path.strip() isExists = os.path.exists(os.path.join("D:mzitu", path)) if not isExists: print(u'创建', path, u'文件夹') os.makedirs(os.path.join("E:MZITU", path)) os.chdir(os.path.join("E:mzitu", path)) ##切换到目录 return True else: print(u'名字叫做', path, u'的文件夹已经存在了') return False Mzitu = mzitu() Mzitu.html('http://www.mzitu.com/92251')
...