<?xml version="1.0" encoding="utf-8" ?> <!--this is a test about xml.--> <collection shelf="New Arrivals"> <movie title="Enemy Behind"> <type>War, Thriller</type> <format>DVD</format> <year>2003</year> <rating>PG</rating> <stars>10</stars> <description>Talk about a US-Japan war</description> </movie> <movie title="Transformers"> <type>Anime, Science Fiction</type> <format>DVD</format> <year>1989</year> <rating>R</rating> <stars>8</stars> <description>A schientific fiction</description> </movie> <movie title="Trigun"> <type>Anime, Action</type> <format>DVD</format> <episodes>4</episodes> <rating>PG</rating> <stars>10</stars> <description>Vash the Stampede!</description> </movie> <movie title="Ishtar"> <type>Comedy</type> <format>VHS</format> <rating>PG</rating> <stars>2</stars> <description>Viewable boredom</description> </movie> </collection>
#练习:计算movie文件中有多少个名字叫War, Thriller的电影
import sys
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
tree = ET.parse("e:\movie.xml")
count = 0
for elem in tree.iter(tag='movie'): #遍历树中的movie节点
print elem.tag
if elem[0].text == 'War, Thriller':
count += 1
print count
#以下代码实现了边读文件边解析的作用,节省了内存
count = 0
for event, elem in ET.iterparse("e:\movie.xml"): #遍历所有xml文件中的标签
#print elem.tag
if event == 'end': #检测“闭合的”(end)事件,标签关闭
if elem.tag == 'type' and elem.text == 'War, Thriller': #标签为type,且文本内容为War, Thriller ,则count+1
count += 1
elem.clear() #清除元素内容,不清除则整个儿树也会在内存中,没有起到节省内存的作用。
print count