import requests from lxml import etree if __name__ == "__main__": headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } # UA伪装 # 实例化好了一个etree对象,且将被解析的文件加载到了对象中 tree = etree.parse('test.html') # r = tree.xpath('/html/body/div') # '/html'中'/'从根目录(根节点) # r1 = tree.xpath('/html//div') # '//'表示示的是多个层级 # r2 = tree.xpath('//div') # '//'表示的是多个层级 # r = tree.xpath('//div[@class="song"]') # r = tree.xpath('//div[@class="song"]/p[3]') # 索引定位从1开始 # r = tree.xpath('//div[@class="tang"]//li[5]/a/text()')[0] # 索引定位从1开始 # r = tree.xpath('//div[@class="tang"]//li[7]//text()')[0] # 索引定位从1开始 r = tree.xpath('//div[@class="song"]/img/@src')# 索引定位从1开始 print(r)