一、安装(win7 64)
1、安装lxml,pip install lxml
2、如果安装出错,下载lxml-3.5.0b1.win-amd64-py2.7.exe:
地址:http://www.lfd.uci.edu/~gohlke/pythonlibs/#lxml
3、
from lxml import etree
def city_list(url): ''' 获取58站点的所有城市链接入口的 url 和 城市名称 :param url: :return: ''' try: pattern = r'.//dl[@id="clist"]' root = utils.get_root(url) data = root.xpath(pattern) if len(data) > 0: r_data = etree.tostring(data[0], encoding='utf-8') pattern = re.compile(r's*<a href="(S+?)"[sS]*?>(S+?)</a>s*', re.S) # 表示多行匹配.将这个字符串作为一个整体,在整体中进行匹配。 l_data = re.findall(pattern, r_data) if l_data is not None: l_data = list(set(l_data)) return l_data except Exception, e: raise e
def get_root(url): ''' 获取url的root节点 :param url: :return: ''' try: if url is not None: request = urllib2.Request(url) response = urllib2.urlopen(request) html = response.read() root = etree.HTML(html) return root except Exception, e: raise e
多利用xpath函数:比如:string(.),name(),count() 等等
参考:http://www.cnblogs.com/cxd4321/archive/2007/09/24/903917.html