说明:无意滋生事端,仅学习分享,如有侵权,立即删除
import requests import json class QiushibaikeSpider(): def __init__(self): self.url='https://www.qiushibaike.com/text/?page={}' self.headers = { 'user-agent':'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Mobile Safari/537.36', } def get_url_list(self): url_lists = list() for num in range(12): url_lists.append(self.url.format(num+1)) return url_lists def get_response_content_lists(self,url_lists): response_content_lists = list() for url in url_lists: ret = requests.get(url,headers=self.headers).content.decode() ret = json.loads(ret) content = list() for num in range(25): ret_ = ret[num]["data"]["content"] content.append(ret_) response_content_lists.append(content) return response_content_lists def save_file(self,ret): len_num = len(ret) content_list = list() for i in ret: for j in i: content = j with open('糗事百科的{}个段子.txt'.format(len_num*24),'a+',encoding="utf8") as f: f.write("*"*20+" "+content+' '*5) print("保存成功,请查看") def run(self): # 获取url列表 url_lists = self.get_url_list() print(len(url_lists)) # 获取content response_content_lists = self.get_response_content_lists(url_lists) # 存入文件 self.save_file(response_content_lists) if __name__=='__main__': obj = QiushibaikeSpider() obj.run()