• Spider—糗事百科段子爬取


    说明:无意滋生事端,仅学习分享,如有侵权,立即删除

    import requests
    import json
    
    
    class QiushibaikeSpider():
        def __init__(self):
            self.url='https://www.qiushibaike.com/text/?page={}'
            self.headers = {
                'user-agent':'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Mobile Safari/537.36',
            }
    
        def get_url_list(self):
            url_lists = list()
            for num in range(12):
                url_lists.append(self.url.format(num+1))
            return url_lists
    
        def get_response_content_lists(self,url_lists):
            response_content_lists = list()
            for url in url_lists:
                ret = requests.get(url,headers=self.headers).content.decode()
                ret = json.loads(ret)
                content = list()
                for num in range(25):
                    ret_ = ret[num]["data"]["content"]
                    content.append(ret_)
                response_content_lists.append(content)
            return response_content_lists
    
        def save_file(self,ret):
            len_num = len(ret)
            content_list = list()
            for i in ret:
                for j in i:
                    content = j
                    with open('糗事百科的{}个段子.txt'.format(len_num*24),'a+',encoding="utf8") as f:
                        f.write("*"*20+"
    "+content+'
    '*5)
            print("保存成功,请查看")
    
        def run(self):
            # 获取url列表
            url_lists = self.get_url_list()
            print(len(url_lists))
            # 获取content
            response_content_lists = self.get_response_content_lists(url_lists)
            # 存入文件    
            self.save_file(response_content_lists)
    
    if __name__=='__main__':
        obj = QiushibaikeSpider()
        obj.run()
  • 相关阅读:
    TS流批量下载----------Xamarin开发系列
    TS视频流下载合成
    B站缓存视频音频合并
    ActiveX录音插件
    FZU-2191 完美的数字 简单数论
    COCI2011/2012 破解密文 KMP
    COCI2010/2011 踢踏舞 线段树区间子段和
    [USACO03FALL] 受欢迎的牛-G tarjan缩点
    洛谷 P1967 货车运输 最大生成树+倍增LCA
    洛谷 P1613 跑路 倍增+最短路
  • 原文地址:https://www.cnblogs.com/nuochengze/p/12767351.html
Copyright © 2020-2023  润新知