• python3.7爬取墨菲定律保存在本地txt


    #!/usr/local/bin/python3.7
    # -*- coding: utf-8 -*-
    # @Time: 2019/07/15
    # @Function 获取在线文本内容
    
    import requests
    from bs4 import BeautifulSoup
    import re
    import codecs
    
    url = 'https://www.shuhaige.com/7518/'
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36',
        'Origin': 'https://www.shuhaige.com',
        'Host': 'www.shuhaige.com'
    }
    # 设置代理服务器
    proxies = {
        'http:': 'http://121.232.146.184',
        'https:': 'https://144.255.48.197'
    }
    
    
    def getContent():
        contents = requests.get(url, headers=header).text
        html = BeautifulSoup(contents, 'html.parser')
        lists = html.select('dl')[0].select('a')
        for list in lists:
            itemUrl = f'https://www.shuhaige.com{list["href"]}'
            itemContent = requests.get(itemUrl, headers=header).text
            itemHtml = BeautifulSoup(itemContent, 'html.parser')
            saveToTxt(itemHtml.select('div .content')[0], list.string)
    
    
    # 写入文本文件
    def saveToTxt(comments, title):
        commentsList = ''
        for item in comments:
            comment_info = f'{item}'.replace(f'<br/>', '')
            comment_info = re.sub(f'<p>.*</p>', '', comment_info)
            commentsList += comment_info
        with codecs.open(f'MoFeiDingLv/{title}.txt', 'w', encoding='utf-8') as file:
            file.writelines(commentsList)
    
        print(f'{title}写入文件成功!')
    
    
    getContent()
    

      书本内容来自 书海阁《墨菲定律》

  • 相关阅读:
    火狐下button标签子元素无法点击
    js里面的this指向
    (转载)http协议的Request Payload 和 Form Data 的区别
    (转载)http压缩 Content-Encoding: gzip
    函数的length属性
    Expires
    Etag 和 If-None-Match
    Mac下升级node到最新版本
    高级函数之函数绑定
    Java数据结构和算法day01 稀疏数组与队列
  • 原文地址:https://www.cnblogs.com/Man-Dream-Necessary/p/11193415.html
Copyright © 2020-2023  润新知