• python 解析XML文件


    比较高效的python 解析XML文件

    参考 http://codingpy.com/article/parsing-xml-using-python/

    try:
        import xml.etree.cElementTree as ET
    except ImportError:
        import xml.etree.ElementTree as ET
    import time
    
    
    def parse_poi_by_elementTree(filepath):
        t0 = time.time()
    
        tree = ET.ElementTree(file=filepath)
        pois_element_num = 0
        vde_poi = 0
        # for elem in tree.iter(tag='Pois'):
        #     pois_element_num = elem.get('Num')
        #     print pois_element_num
        #     for e in elem.iter():
        #         if e.tag == 'Poi':
        #             vde_poi += 1
        pois_element_num = tree.iter(tag='Pois').next().get('Num')
        vde_poi = len(list(tree.iter(tag='Poi')))
    
        cost_time = time.time() - t0
        print 'parse_poi_by_elementTree time cost is %s' % cost_time
        return pois_element_num, vde_poi
    
    
    def parse_poi_by_iterparse(filepath):
        t0 = time.time()
    
        pois_element_num = 0
        vde_poi = 0
        for event, elem in ET.iterparse(filepath):
            # if event == 'end':
            if elem.tag == 'Poi':
                vde_poi += 1
            if elem.tag == "Pois":
                pois_element_num = int(elem.get('Num'))
    
            elem.clear()
    
        cost_time = time.time() - t0
        print 'parse_poi_by_iterparse time cost is %s' % cost_time
        return pois_element_num, vde_poi
    
    
    from statistic import StatisticItem, XML_STREET, XML_POI
    import os
    
    
    def parse_street_xml_by_ET(street_file):
        if not os.path.exists(street_file):
            return StatisticItem(XML_STREET, [0, 0])
    
        street_num = 0
        vde_streets = 0  # actual count
    
        for event, elem in ET.iterparse(street_file):
            if elem.tag == 'Street':
                vde_streets += 1
            if elem.tag == "Streets":
                street_num = int(elem.get('Num'))
    
            elem.clear()
        return StatisticItem(XML_STREET, [vde_streets, street_num])
    
    
    def parse_poi_xml_by_ET(poi_file):
        if not os.path.exists(poi_file):
            return StatisticItem(XML_POI, [0, 0])
    
        poi_num = 0
        vde_pois = 0  # actual count
    
        for event, elem in ET.iterparse(poi_file):
            if elem.tag == 'Poi':
                vde_pois += 1
            if elem.tag == "Pois":
                poi_num = int(elem.get('Num'))
    
            elem.clear()
        return StatisticItem(XML_POI, [vde_pois, poi_num])
    
    
    if __name__ == '__main__':
        # C:UsersshchshanDesktopvdeState_14120002POI_1414000018.xml
        # C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml
        print parse_poi_by_elementTree(r'C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml')
        print parse_poi_by_iterparse(r'C:UsersshchshanDesktopvdeState_14120001POI_1414000001.xml')
  • 相关阅读:
    【华为云技术分享】ArcFace简介
    [学]PHP爬虫框架phpspider
    zend studio 自动注释、备注和常用有用快捷键
    mysql 为什么加了排序字段后不会自动按ID升序排序?
    【PHP转义字符】单引号双引号以及转义字符【原创】
    Javascript小结(四)----包装对象
    JavaScript小结(三)----字符串操作
    JavaScript小结(二)-----Date()函数
    Javascript小结(一)----prototype对象
    PHP底层原理分析和底层扩展编写
  • 原文地址:https://www.cnblogs.com/dasheng-maritime/p/7491171.html
Copyright © 2020-2023  润新知