使用python解析xml
python自带了几种解析方法 xml.parsers.expat、xml.dom、xml.etree.ElementTree、xml.sax等
这里介绍xml.dom和xml.etree.ElementTree
示例文件:
<?xml version="1.0" encoding="utf-8"?> <parent id="top"> <!--中文注释--> <child1 name="paul">Text goes here</child1> <child2 name="fred">More text</child2> <child3 name="小明" age="12"> 中文说明 </child3> </parent>
ElementTree解析
#!/usr/bin/python # -*- coding: utf-8 -*- #python file #Filename:xtree.py # etree 解析,注释会丢失 import xml.etree.ElementTree as xparser import os def FindNodeAndSetAttr(xml, nodeName, attrMap): ele = xml.find(nodeName) atts = ele.attrib for k in attrMap: print(atts[k]) for k, v in attrMap.items(): print("%s--%s" % (k,v)) ele.set(k, v) print("current dir is %s" % (os.getcwd())) params = {'name':'小明', 'age':'12'} xfile = "./example.xml" xml1 = xparser.parse(xfile) FindNodeAndSetAttr(xml1, 'child3', params) xml1.write('aa.xml', 'UTF-8', True)
dom解析
#!/usr/bin/python # -*- coding: utf-8 -*- #python file #Filename:xdom.py # 使用dom可以保留注释 # 但是编码指令会丢失,需要手动写回去 import xml.dom.minidom as xparser import os import shutil # 查找node并设置一些属性 def FindNodeAndSetAttr(xml, nodeName, attrMap): eles = xml.getElementsByTagName(nodeName) assert(eles[0].nodeName == nodeName) ele = eles[0] for k in attrMap: assert(ele.hasAttribute(k)) for k, v in attrMap.items(): print("%s--%s" % (k,v)) ele.setAttribute(k, v) # 把xml以utf8编码写文件 def WriteAsUTF8File(xmlparser, fileName): xmlstr = xmlparser.toxml() assert(xmlstr.startswith('<?xml version="1.0" ?>')) xmlstr = xmlstr.replace('<?xml version="1.0" ?>', '<?xml version="1.0" encoding="UTF-8" ?>\n') fout = open(fileName, 'wb') utf8bytes = b'\xEF\xBB\xBF' + xmlstr.encode('utf-8') fout.write(utf8bytes) fout.close() # 删除目录树,含异常处理 def myrmtree(dirtree): try: shutil.rmtree(dirtree) except WindowsError as e: print(e) print("current dir is %s" % (os.getcwd())) params = {'name':'小明', 'age':'12'} cfg = "./example.xml" cfg_bak = cfg + "_bak" print("=====%s=======" % (cfg)) if os.path.exists(cfg_bak): print("%s has been processed" % (cfg)) exit(-1) shutil.move(cfg, cfg_bak) xml1 = xparser.parse(cfg_bak) FindNodeAndSetAttr(xml1, 'child3', params) WriteAsUTF8File(xml1, 'bb.xml')
输出
current dir is I:\新建文件夹 (2) 12 小明 age--12 name--小明 current dir is I:\新建文件夹 (2) =====./example.xml======= age--12 name--小明