1 import urllib.request 2 import json 3 from lxml import etree 4 5 url='http://www.ximalaya.com/dq/8.ajax' 6 headers ={ 7 "User-Agent":'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' 8 } 9 req = urllib.request.Request(url, headers= headers) 10 response = urllib.request.urlopen(req) 11 jsonobj=json.loads(response.read().decode('utf-8')) 12 html=jsonobj['html'] 13 xml= etree.HTML(html) 14 nodeList = xml.xpath('//div[@class="discoverAlbum_item"]') 15 for node in nodeList: 16 img=node.xpath('.//img/@src') 17 print(img[0],end=' ') 18 title=node.xpath('.//img/@alt') 19 print(title[0],end=' ') 20 href = node.xpath('./a/@href') 21 print(href[0],end=' ')
采用xpath解析html