import urllib.request from bs4 import BeautifulSoup import re def validateTitle(title): rstr = r"[/\:*?"<>|]" # '/ : * ? " < > |' new_title = re.sub(rstr, "_", title) # 替换为下划线 return new_title url = "https://tieba.baidu.com/p/5407739329?see_lz=1" request = urllib.request.Request(url) response = urllib.request.urlopen(request) soup = BeautifulSoup(response,'lxml') try: #获取总页数 a = soup.find(text=re.compile("回复贴")) total_page = a.find_next_sibling('span').string total_page = int(total_page) if total_page > 0 : for j in range(1,int(total_page) + 1): url = "https://tieba.baidu.com/p/5407739329?see_lz=1&pn="+str(j) request1 = urllib.request.Request(url) response1 = urllib.request.urlopen(request1) soup1 = BeautifulSoup(response1, 'lxml') title = soup1.title.string link = soup1.find_all('img',class_="BDE_Image") i = 1 for li in link : print(li.get('src')) file_name = "D:/www/spider/" + validateTitle(title) + str(j) +"-"+ str(i) + ".jpg" print(file_name) urllib.request.urlretrieve(li.get('src'),file_name) i = i + 1 except Exception as e: print(e)