• python 解析XML文件


    比较高效的python 解析XML文件

    参考 http://codingpy.com/article/parsing-xml-using-python/

    try:
        import xml.etree.cElementTree as ET
    except ImportError:
        import xml.etree.ElementTree as ET
    import time
    
    
    def parse_poi_by_elementTree(filepath):
        t0 = time.time()
    
        tree = ET.ElementTree(file=filepath)
        pois_element_num = 0
        vde_poi = 0
        # for elem in tree.iter(tag='Pois'):
        #     pois_element_num = elem.get('Num')
        #     print pois_element_num
        #     for e in elem.iter():
        #         if e.tag == 'Poi':
        #             vde_poi += 1
        pois_element_num = tree.iter(tag='Pois').next().get('Num')
        vde_poi = len(list(tree.iter(tag='Poi')))
    
        cost_time = time.time() - t0
        print 'parse_poi_by_elementTree time cost is %s' % cost_time
        return pois_element_num, vde_poi
    
    
    def parse_poi_by_iterparse(filepath):
        t0 = time.time()
    
        pois_element_num = 0
        vde_poi = 0
        for event, elem in ET.iterparse(filepath):
            # if event == 'end':
            if elem.tag == 'Poi':
                vde_poi += 1
            if elem.tag == "Pois":
                pois_element_num = int(elem.get('Num'))
    
            elem.clear()
    
        cost_time = time.time() - t0
        print 'parse_poi_by_iterparse time cost is %s' % cost_time
        return pois_element_num, vde_poi
    
    
    from statistic import StatisticItem, XML_STREET, XML_POI
    import os
    
    
    def parse_street_xml_by_ET(street_file):
        if not os.path.exists(street_file):
            return StatisticItem(XML_STREET, [0, 0])
    
        street_num = 0
        vde_streets = 0  # actual count
    
        for event, elem in ET.iterparse(street_file):
            if elem.tag == 'Street':
                vde_streets += 1
            if elem.tag == "Streets":
                street_num = int(elem.get('Num'))
    
            elem.clear()
        return StatisticItem(XML_STREET, [vde_streets, street_num])
    
    
    def parse_poi_xml_by_ET(poi_file):
        if not os.path.exists(poi_file):
            return StatisticItem(XML_POI, [0, 0])
    
        poi_num = 0
        vde_pois = 0  # actual count
    
        for event, elem in ET.iterparse(poi_file):
            if elem.tag == 'Poi':
                vde_pois += 1
            if elem.tag == "Pois":
                poi_num = int(elem.get('Num'))
    
            elem.clear()
        return StatisticItem(XML_POI, [vde_pois, poi_num])
    
    
    if __name__ == '__main__':
        # C:UsersshchshanDesktopvdeState_14120002POI_1414000018.xml
        # C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml
        print parse_poi_by_elementTree(r'C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml')
        print parse_poi_by_iterparse(r'C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml')
  • 相关阅读:
    c#实现冒泡、快速、选择和插入排序算法
    数据库>SQL Server>循环游标读取例子
    《道德经》程序员版第二章
    跨域访问JQuery+.NET实现的一种思路,以及极简单Demo
    《道德经》程序员版第三章
    ASP.NET初级>传智播客.net>第十一季asp.net中级 文字总结(未完)
    <div>里包含<p>标签疑问
    互联网音乐赚钱模式
    JQuery调用WebService,以及JS把单斜杠转换成双斜杠
    《道德经》程序员版第一章
  • 原文地址:https://www.cnblogs.com/dasheng-maritime/p/7491171.html
Copyright © 2020-2023  润新知