• python(17) 获取acfun弹幕,评论和视频信息


    每天一点linux命令:新建文件夹

    一,使用python获得acfun的所有番剧的信息,评论,弹幕

     1 #! /usr/bin/env python
     2 # -*- coding=utf-8 -*-
     3 import re
     4 import requests
     5 import sys
     6 import json
     7 reload(sys)
     8 sys.setdefaultencoding("utf-8")
     9 num = 1
    10 head = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'} #防陷阱
    11 def dm(ht):
    12     oldURL= 'http://danmu.aixifan.com/V2/' + ht + '?pageSize=500&pageNo=0'
    13     #print oldURL
    14     for i in range(1,5):
    15         newURL = re.sub('pageNo=d+','pageNo=%d'%i,oldURL,re.S)
    16         print newURL
    17         html = requests.get(newURL,headers = head)
    18         type = sys.getfilesystemencoding()
    19         aa = json.loads(html.text)
    20         #print len(aa[1])
    21         try:
    22             for i in range(0,501):
    23                 print aa[2][i]['m']
    24         except Exception,e:
    25           break
    26 def PL(ht):
    27      url = 'http://www.acfun.tv/comment/bangumi/web/list?bangumiId=' + ht #评论首地址,可获得评论数,评论的
    28      print url
    29      jscontent = requests.get(url,headers = head).content
    30      jsDict = json.loads(jscontent)
    31      pag =  jsDict['data']['totalPage']
    32      print pag
    33      nurl = url + '&pageNo=1'
    34      for i in range(1,pag+1):
    35          ourl = re.sub('pageNo=d+','pageNo=%d'%i,nurl,re.S)
    36          jscontent = requests.get(ourl,headers = head).content
    37          jsDict = json.loads(jscontent)
    38 
    39 def geturl():
    40     ourl = 'http://www.acfun.tv/bangumi/bangumi/page?pageSize=42&isWeb=1&pageNo=1&sort=1'
    41     for i in range(1,8):
    42         nurl = re.sub('pageNo=d+','pageNo=%d'%i,ourl,re.S)
    43         print nurl
    44         jscontent = requests.get(nurl,headers = head).content
    45         jsDict = json.loads(jscontent)
    46         for j in range(1,42):
    47            info( str(jsDict['data']['list'][j]['id']) )
    48            break
    49         break
    50 def info(ht):
    51     url = "http://www.acfun.tv/v/ab" + ht
    52     sc = "http://www.acfun.tv/bangumi/stow/isStowed?bangumiId=" + ht           #收藏数
    53     pl = "http://www.acfun.tv/bangumi/count/bangumi_view.aspx?bangumiId="+ht   #评论数
    54     html = requests.get(url)
    55     htpl = requests.get(pl)
    56     title = re.findall('h3 class="title">(.*?)</h3><span',html.text,re.S)[0]
    57     print '名称:' + title
    58     up = re.findall('</h3><span class="last">(.*?)</span>',html.text,re.S)[0]
    59     print '更新:'+ up
    60     pp = re.search('[(.*?)]',htpl.text,re.S).group(1)
    61     print '评论总数:' + pp
    62     jsconten = requests.get(sc,headers = head).content
    63     jsDict = json.loads(jsconten)
    64     print '收藏总数:' + str(jsDict['data']['stowCount'])
    65     jianjie = re.findall('pan class="desc">(.*?)</span>',html.text,re.S)[0]
    66     print '简介:' + jianjie
    67     page = re.findall('" data-count="(.*?)" data-index="',html.text,re.S)[0]
    68     page = int(page)
    69     nurl = url + '_1'
    70     for i in range(1,page+1):#有多少话 多少页
    71           nurl = re.sub('_d+','_%d'%i,nurl,re.S)#每个话的地址
    72           print nurl
    73           print '' + str(i) + '话弹幕:'
    74           html = requests.get(nurl)
    75           id = re.findall('data-vid="(.*?)" data-sid',html.text,re.S)[0]#获取每个话的弹幕,地址
    76           # dm(id)
    77           print '' + str(i) + '话评论:'
    78           PL(ht)
    79 if __name__ == "__main__":
    80     geturl()



  • 相关阅读:
    IIS是如何处理ASP.NET请求的
    c# Socket通讯中关于粘包,半包的处理,加分割符
    windows2008(64位)下iis7.5中的url伪静态化重写(urlrewrite)
    C#微信公众号/订阅号开发 接口源码
    C#线程池多线程Socket通讯 服务器端和客户端示例
    百度地图JS调用示例
    c# 图片转二进制/字符串 二进制/字符串反转成图片
    电商项目面试总结
    96. Unique Binary Search Trees
    92.Reverse Linked List II
  • 原文地址:https://www.cnblogs.com/lovychen/p/5152281.html
Copyright © 2020-2023  润新知