一. xml相关术语:
1.Document(文档): 对应一个xml文件
2.Declaration(声明):
<?xml version="1.0" encoding="utf-8"?>
version指定了版本,encoding指定了文件编码
3.Comment(注释),同html中的注释
<!--just a comment about book_store-->
4.Element(元素):指的是从( 且包括) 开始标签直到
( 且包括) 结束标签的部分,如<book_store></book_store>
<book_store name="newhua" website="https://www.amazon.cn/b?node=1876097071"> <book1> <name>hamlet</name> <author>William Shakespeare</author> </book1> </book_store>
5.Tag(标签): 用于表示素的起始与结束,如book1,name,author等
6.Attribute(属性),如上面的name,website
7.Text(文本),如hamelt
二.解析xml
有三种方法
from xml.dom.minidom import parse,parseString dom1 = parse('test.xml') #通过文件名解析xml data = open('test.xml') dom2 = parse(data) #通过解析已打开的xml文件 note = """ <note> <to>Peter</to> <from>Jane</from> <heading>Reminder</heading> <body>Don't be late for the meeting</body> </note> """ dom3 = parseString(note) #解析字符串
2.得到根元素
doc = parse('test.xml') #通过文件名解析xml root = doc.documentElement
三.创建xml
from xml.dom.minidom import Document doc = Document() #创建一篇空的文档 from xml.dom.minidom import getDOMImplementation impl = getDOMImplementation() #创建doc,并且添加根节点book_store doc = impl.createDocument(None,"book_store",None) print(doc.documentElement.tagName) #book_store #doc同doc=Document() doc2 = impl.createDocument(None,None,None)
四.类及层次结构
可以发现:Element,Text,Comment,Attribute的创建工作全部由Document完成,然后通过appendChild或insertBefore方法将新的对象插入到Document中。
五.具体操作
1.解析xml文件
movies.xml
<collection shelf="New Arrivals"> <movie title="Enemy Behind"> <type>War, Thriller</type> <format>DVD</format> <year>2003</year> <rating>PG</rating> <stars>10</stars> <description>Talk about a US-Japan war</description> </movie> <movie title="Transformers"> <type>Anime, Science Fiction</type> <format>DVD</format> <year>1989</year> <rating>R</rating> <stars>8</stars> <description>A schientific fiction</description> </movie> <movie title="Trigun"> <type>Anime, Action</type> <format>DVD</format> <episodes>4</episodes> <rating>PG</rating> <stars>10</stars> <description>Vash the Stampede!</description> </movie> <movie title="Ishtar"> <type>Comedy</type> <format>VHS</format> <rating>PG</rating> <stars>2</stars> <description>Viewable boredom</description> </movie> </collection>
代码:
import xml.dom.minidom from xml.dom.minidom import parse #使用minidom解析器打开xml文档 tree = xml.dom.minidom.parse('movies.xml') #print(type(tree)) #<class 'xml.dom.minidom.Document'> collection = tree.documentElement #获取文档根元素 if collection.hasAttribute('shelf'): print("root element attribute:",collection.getAttribute("shelf")) #print(collection.getAttribute('shelf')) #获取属性 movie_list = collection.getElementsByTagName('movie') movie_info_list = [] for movie in movie_list: print('******Movie*****') if movie.hasAttribute("title"): title = movie.getAttribute('title') print('Title',title) type = movie.getElementsByTagName('type')[0].childNodes[0].data format = movie.getElementsByTagName('format')[0].childNodes[0].data rating = movie.getElementsByTagName("rating")[0].firstChild.data stars = movie.getElementsByTagName('stars')[0].firstChild.data description = movie.getElementsByTagName("description")[0].firstChild.data print("type: ", type) print("format: ", format) print("rating: ", rating) print("stars: ", stars) print('description: ', description)
2.写xml文件
效果:
<?xml version="1.0" encoding="utf-8"?> <!--just a comment about book_store--> <book_store name="amzon" website="https://www.amazon.cn/b?node=1876097071"> <book1> <name>hamlet</name> <author>William Shakespeare</author> </book1> </book_store>
代码:
from xml.dom.minidom import Document doc = Document()
comment = doc.createComment('just a comment about book_store') #添加注释
doc.appendChild(comment)
# from xml.dom.minidom import getDOMImplementation # impl = getDOMImplementation() # doc = impl.createDocument(None, None, None) book_store = doc.createElement('book_store') # 创建根节点 book_store.setAttribute('name', 'amazon') #设置属性 book_store.setAttribute('website', 'https://www.amazon.cn/b?node=1876097071') doc.appendChild(book_store) #添加节点 book1 = doc.createElement('book1') #创建元素book1 book1_name = doc.createElement('name') book1_name_value = doc.createTextNode('hamlet') #创建text节点 book1_name.appendChild(book1_name_value) book1_author = doc.createElement('author') book1_author_value = doc.createTextNode('William Shakespeare') book1_author.appendChild(book1_author_value) book1.appendChild(book1_name) book1.appendChild(book1_author) book_store.appendChild(book1) print(doc.toprettyxml(indent=' ', newl=' ', encoding='utf-8').decode('utf-8')) # with open('book_store.xml','wb') as f: #写入的数据是bytes类型,所以wb方法写入 # data = doc.toprettyxml(indent=' ', newl=' ', encoding='utf-8') #bytes类型数据 # f.write(data) with open('test_store.xml', 'w') as f: doc.writexml(f, indent=' ', newl=' ', encoding='utf-8') #写入的是str类型数据,所以w方法写入
3.简单封装
class MyXMLGenerator: def __init__(self,xml_name): self.xml_name = xml_name self.doc = xml.dom.minidom.Document() def createComment(self,value): c = self.doc.createComment(value) self.doc.appendChild(c) def setNodeAttribute(self,node,attname,value): node.setAttribute(attname,value) def createElement(self,tagName): ele = self.doc.createElement(tagName) return ele def appendChild(self,node,parent_node=None): if parent_node is not None: parent_node.appendChild(node) else: self.doc.appendChild(node) def setNodeValue(self,node,value): text_node = self.doc.createTextNode(value) node.appendChild(text_node) def genXML(self): #print(self.doc.toprettyxml(indent=' ',newl=' ',encoding='utf-8').decode('utf-8')) with open(self.xml_name,'wb') as f: data = self.doc.toprettyxml(indent=' ',newl=' ',encoding='utf-8') #python3中二进制数据 f.write(data)
参考: