1. 安装:pip install pyquery
2. 导入:from pyquery import PyQuery as pq
3. 使用:
import requests from pyquery import PyQuery as pq url = '' headers = {} # resp_text = requests.get(url=url, headers=headers).text resp_text = requests.get(url=url, headers=headers).content doc_resp = pq(resp_text) a = doc_resp("#wrap") # id用法 b = doc_resp(".wrap") # class用法 中间有空格的用.替代 c = doc_resp(".wrap").attr('href') # 获取href属性 e = doc_resp(".wrap").text() # 获取文本 f = doc_resp(".wrap").parent() # 父元素 g = doc_resp(".wrap").siblings() # 兄弟元素 h = doc_resp(".wrap").html() # html信息 i = doc_resp(".wrap").remove('ul') # 移除标签 # 遍历查找结果 result_items = doc_resp(".wrap").items() for result_item in result_items: print(result_items) # 选择标签 html = '' doc = pq(html) its=doc("link:first-child") print('第一个标签:%s'%its) its=doc("link:last-child") print('最后一个标签:%s'%its) its=doc("link:nth-child(2)") print('第二个标签:%s'%its) its=doc("link:gt(0)") #从零开始 print("获取0以后的标签:%s"%its) its=doc("link:nth-child(2n-1)") print("获取奇数标签:%s"%its) its=doc("link:contains('hello')") print("获取文本包含hello的标签:%s"%its)