• 使用pyquery解析知乎发现【【CSS选择器】


    使用pyquery解析

     1 import requests
     2 from pyquery import PyQuery as pq
     3 import json
     4 
     5 url = 'https://www.zhihu.com/explore'
     6 headers = {
     7     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
     8 }
     9 html = requests.get(url, headers=headers).text
    10 doc = pq(html)
    11 items = doc('.explore-tab .feed-item').items()
    12 for item in items:
    13     question = item.find('h2').text()
    14     author = item.find('.author-link-line').text()
    15     answer = pq(item.find('.content').html()).text()
    16     q = item.find('.bio').text()
    17 
    18     explore = {
    19         "question" : question,
    20         "author" : author,
    21         "answer" : answer,
    22         "q": q,
    23     } 
    24 
    25     with open("explore.json", "a") as f:
    26         #f.write(json.dumps(items, ensure_ascii = False).encode("utf-8") + "
    ")
    27         f.write(json.dumps(explore, ensure_ascii = False) + "
    ")

     attr()方法可获取属性

     1 import requests
     2 from pyquery import PyQuery as pq
     3 import json
     4 
     5 url = 'https://www.zhihu.com/explore'
     6 headers = {
     7     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
     8 }
     9 html = requests.get(url, headers=headers).text
    10 doc = pq(html)
    11 items = doc('.explore-tab .feed-item').items()
    12 for item in items:
    13     question = item.find('h2').text()
    14     #author = item.find('.author-link-line').text()
    15     author = item('.author-link-line').text()
    16     #print(author)
    17     answer = pq(item.find('.content').html()).text()
    18     #q = item.find('.bio').text()
    19     q = item.find('.bio').attr('title')
    20     #print(q)
    21 
    22     explore = {
    23         "question" : question,
    24         "author" : author,
    25         "answer" : answer,
    26         "q": q,
    27     } 
    28 
    29     with open("explore.json", "a") as f:
    30         #f.write(json.dumps(items, ensure_ascii = False).encode("utf-8") + "
    ")
    31         f.write(json.dumps(explore, ensure_ascii = False) + "
    ")
  • 相关阅读:
    链路追踪
    Zuul网关
    MyBatis批量插入
    自定义组件使用v-model
    正则表达式入门
    博客园主题1【备份】
    关于我
    input输入框内容规范正则总结
    实例003:完全平方数
    python基础day5dict
  • 原文地址:https://www.cnblogs.com/wanglinjie/p/9249280.html
Copyright © 2020-2023  润新知