• Python 解析RSS xml文本


    用python解析博客园RSS订阅的xml文本

    源码

    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    # Author:Jruing
    # FileName:RSS
    # DateTime:2020/5/29 13:59
    # SoftWare: PyCharm
    
    from xml.dom.minidom import parseString
    import requests
    
    
    class RSS():
        def __init__(self, rss_url):
            self.rss_url = rss_url
    
        def get_context(self):
            response = requests.get(self.rss_url).text
            self.parse_context(response)
    
        def parse_context(self, response):
            # 创建解析对象
            domtree = parseString(response)
            collect = domtree.documentElement
            # 根据标签获取博主名称
            author = collect.getElementsByTagName('author')
            print(author[0].getElementsByTagName('name')[0].childNodes[0].data)
            tags = collect.getElementsByTagName('entry')
            # 解析文章属性信息
            for info in tags:
                art_url = info.getElementsByTagName('id')[0].childNodes[0].data
                art_title = info.getElementsByTagName('title')[0].childNodes[0].data
                art_publish = info.getElementsByTagName('published')[0].childNodes[0].data
                art_update = info.getElementsByTagName('updated')[0].childNodes[0].data
                for j in info.getElementsByTagName('author'):
                    art_author = j.getElementsByTagName('name')[0].childNodes[0].data
                data = f"""
                文章标题:{'-'.join(art_title.split('-')[:-1])}
                文章作者:{art_author}
                文章地址:{art_url}
                发布时间:{art_publish}
                更新时间:{art_update}
                """
                data = {"art_url": '-'.join(art_title.split('-')[:-1]),
                        "art_title": art_title,
                        "art_publish": art_publish,
                        "art_update": art_update,
                        "art_author": art_author}
                print(data)
                return data
    
    
    if __name__ == '__main__':
        rss = RSS("http://feed.cnblogs.com/blog/u/565725/rss/")
        rss.get_context()
    
  • 相关阅读:
    [HAOI2018]苹果树
    [TJOI2013]拯救小矮人
    [SDOI2016]硬币游戏
    一辈子都学不会的有上下界的网络流
    [AHOI2014/JSOI2014]支线剧情
    [JSOI2009]球队收益
    hdu-1856 More is better---带权并查集
    hdu-1325 Is It A Tree?---并查集
    hdu-1272 小希的迷宫---并查集或者DFS
    hdu1213-How Many Tables---基础并查集
  • 原文地址:https://www.cnblogs.com/jruing/p/12988472.html
Copyright © 2020-2023  润新知