• xml dom minidom


    一. xml相关术语:

    1.Document(文档): 对应一个xml文件

    2.Declaration(声明):

    <?xml version="1.0" encoding="utf-8"?>

    version指定了版本,encoding指定了文件编码

    3.Comment(注释),同html中的注释

    <!--just a comment about book_store-->

    4.Element(元素):指的是从( 且包括) 开始标签直到
    ( 且包括) 结束标签的部分,如<book_store></book_store>

    <book_store name="newhua" website="https://www.amazon.cn/b?node=1876097071">
        <book1>
            <name>hamlet</name>
            <author>William Shakespeare</author>
        </book1>
    </book_store>

    5.Tag(标签): 用于表示素的起始与结束,如book1,name,author等

    6.Attribute(属性),如上面的name,website

    7.Text(文本),如hamelt

    二.解析xml

    有三种方法

    from xml.dom.minidom import parse,parseString
    
    dom1 = parse('test.xml')  #通过文件名解析xml
    data = open('test.xml')
    dom2 = parse(data)  #通过解析已打开的xml文件
    
    note = """
    <note>
    <to>Peter</to>
    <from>Jane</from>
    <heading>Reminder</heading>
    <body>Don't be late for the meeting</body>
    </note>
    """
    dom3 = parseString(note)  #解析字符串

    2.得到根元素

    doc = parse('test.xml')  #通过文件名解析xml
    root = doc.documentElement

    三.创建xml

    from xml.dom.minidom import Document
    doc = Document()  #创建一篇空的文档
    
    
    from xml.dom.minidom import getDOMImplementation
    impl = getDOMImplementation()
    #创建doc,并且添加根节点book_store
    doc = impl.createDocument(None,"book_store",None)
    print(doc.documentElement.tagName)  #book_store
    
    #doc同doc=Document()
    doc2 = impl.createDocument(None,None,None)

    四.类及层次结构

    可以发现:Element,Text,Comment,Attribute的创建工作全部由Document完成,然后通过appendChild或insertBefore方法将新的对象插入到Document中。

    五.具体操作

    1.解析xml文件

    movies.xml

    <collection shelf="New Arrivals">
    <movie title="Enemy Behind">
       <type>War, Thriller</type>
       <format>DVD</format>
       <year>2003</year>
       <rating>PG</rating>
       <stars>10</stars>
       <description>Talk about a US-Japan war</description>
    </movie>
    <movie title="Transformers">
       <type>Anime, Science Fiction</type>
       <format>DVD</format>
       <year>1989</year>
       <rating>R</rating>
       <stars>8</stars>
       <description>A schientific fiction</description>
    </movie>
       <movie title="Trigun">
       <type>Anime, Action</type>
       <format>DVD</format>
       <episodes>4</episodes>
       <rating>PG</rating>
       <stars>10</stars>
       <description>Vash the Stampede!</description>
    </movie>
    <movie title="Ishtar">
       <type>Comedy</type>
       <format>VHS</format>
       <rating>PG</rating>
       <stars>2</stars>
       <description>Viewable boredom</description>
    </movie>
    </collection>

    代码:

    import xml.dom.minidom
    from xml.dom.minidom import parse
    
    #使用minidom解析器打开xml文档
    tree = xml.dom.minidom.parse('movies.xml')
    #print(type(tree)) #<class 'xml.dom.minidom.Document'>
    collection = tree.documentElement  #获取文档根元素
    if collection.hasAttribute('shelf'):
        print("root element attribute:",collection.getAttribute("shelf"))
    #print(collection.getAttribute('shelf'))  #获取属性
    movie_list = collection.getElementsByTagName('movie')
    movie_info_list = []
    for movie in movie_list:
        print('******Movie*****')
        if movie.hasAttribute("title"):
            title = movie.getAttribute('title')
            print('Title',title)
        type = movie.getElementsByTagName('type')[0].childNodes[0].data
        format = movie.getElementsByTagName('format')[0].childNodes[0].data
        rating = movie.getElementsByTagName("rating")[0].firstChild.data
        stars = movie.getElementsByTagName('stars')[0].firstChild.data
        description = movie.getElementsByTagName("description")[0].firstChild.data
        print("type: ", type)
        print("format: ", format)
        print("rating: ", rating)
        print("stars: ", stars)
        print('description: ', description)

    2.写xml文件

    效果:

    <?xml version="1.0" encoding="utf-8"?>
    <!--just a comment about book_store-->
    <book_store name="amzon" website="https://www.amazon.cn/b?node=1876097071">
        <book1>
            <name>hamlet</name>
            <author>William Shakespeare</author>
        </book1>
    </book_store>

    代码:

    from xml.dom.minidom import Document
    doc = Document()

     comment = doc.createComment('just a comment about book_store') #添加注释
      doc.appendChild(comment)

    # from xml.dom.minidom import getDOMImplementation
    # impl = getDOMImplementation()
    # doc = impl.createDocument(None, None, None)
    
    book_store = doc.createElement('book_store')  # 创建根节点
    book_store.setAttribute('name', 'amazon')  #设置属性
    book_store.setAttribute('website', 'https://www.amazon.cn/b?node=1876097071')
    doc.appendChild(book_store)  #添加节点
    book1 = doc.createElement('book1') #创建元素book1
    book1_name = doc.createElement('name')
    book1_name_value = doc.createTextNode('hamlet')  #创建text节点
    book1_name.appendChild(book1_name_value)
    book1_author = doc.createElement('author')
    book1_author_value = doc.createTextNode('William Shakespeare')
    book1_author.appendChild(book1_author_value)
    book1.appendChild(book1_name)
    book1.appendChild(book1_author)
    book_store.appendChild(book1)
    
    print(doc.toprettyxml(indent='	', newl='
    ', encoding='utf-8').decode('utf-8'))
    # with open('book_store.xml','wb') as f:  #写入的数据是bytes类型,所以wb方法写入
    #     data = doc.toprettyxml(indent='	', newl='
    ', encoding='utf-8') #bytes类型数据
    #     f.write(data)
    with open('test_store.xml', 'w') as f:
        doc.writexml(f, indent='	', newl='
    ', encoding='utf-8') #写入的是str类型数据,所以w方法写入

     3.简单封装

    class MyXMLGenerator:
        def __init__(self,xml_name):
            self.xml_name = xml_name
            self.doc = xml.dom.minidom.Document()
    
        def createComment(self,value):
            c = self.doc.createComment(value)
            self.doc.appendChild(c)
    
        def setNodeAttribute(self,node,attname,value):
            node.setAttribute(attname,value)
    
        def createElement(self,tagName):
            ele = self.doc.createElement(tagName)
            return ele
    
        def appendChild(self,node,parent_node=None):
            if parent_node is not None:
                parent_node.appendChild(node)
            else:
                self.doc.appendChild(node)
    
        def setNodeValue(self,node,value):
            text_node = self.doc.createTextNode(value)
            node.appendChild(text_node)
    
        def genXML(self):
            #print(self.doc.toprettyxml(indent='	',newl='
    ',encoding='utf-8').decode('utf-8'))
            with open(self.xml_name,'wb') as f:
                data = self.doc.toprettyxml(indent='	',newl='
    ',encoding='utf-8') #python3中二进制数据
                f.write(data)

     参考:

    https://docs.python.org/3/library/xml.dom.minidom.html

    https://docs.python.org/3/library/xml.dom.html

  • 相关阅读:
    BZOJ1187 [HNOI2007]神奇游乐园(插头dp)
    BZOJ4926 皮皮妖的递推
    BZOJ3684 大朋友和多叉树(多项式相关计算)
    BZOJ4574 [Zjoi2016]线段树
    杜教筛进阶+洲阁筛讲解+SPOJ divcnt3
    从几场模拟考试看一类分块算法
    bzoj3142 luogu3228 HNOI2013 数列
    luogu3244 bzoj4011 HNOI2015 落忆枫音
    codeforces 286E Ladies' Shop
    BZOJ4825 单旋
  • 原文地址:https://www.cnblogs.com/hupeng1234/p/6684667.html
Copyright © 2020-2023  润新知