from lxml import etree import codecs import sys from lxml import etree def parser(p): tree = etree.HTML(open(str(p)+'.html', 'r').read()) nodes = tree.xpath("//div[@class='BlueTable']//tr") #nodes = tree.xpath("//text()") for n in nodes: #mystr=etree.tostring(n,pretty_print=True) mystr=etree.tostring(n, method="html") print(etree.HTML(mystr).xpath("//text()")) for p in range(1,211): parser(p);