腾讯视频
import requests
import xlwt
import re
file = xlwt.Workbook(encoding = 'utf-8')
sheet = file.add_sheet('汇总表')
sheet.write(0,0,'影片名')
sheet.write(0,1,'评分')
sheet.write(0,2,'主演')
sheet.write(0,3,'影片链接')
response = requests.get('https://v.qq.com/')
#print(response.text)
result = re.findall('<a href="(.*?)" data-key="çµå½±" ',response.text)
#print(result)
detail = 'https://v.qq.com' + result[0]
#print(detail)
response = requests.get(detail)
#print(response.text)
result = re.findall('<a href="(.*?)" data-value="itype=100004"',response.text)
#print(result)
detail = 'https://v.qq.com' + result[0]
#print(detail)
response = requests.get(detail)
#print(response.text)
result = re.findall('<a href="(.*?)" class="filter_item " data-key="year" data-value="100039"',response.text)
#print(result)
detail = result[0]
#print(detail)
response = requests.get(detail)
#print(response.text)
#response1 = response
#response1.encoding = 'utf-8'
result = re.findall('<a href="(.*?)amp;(.*?)&itype=100004"',response.text)
#print(result)
i = 0
k = 0
l = 1
w = 1
detail1 = detail
result1 =result
while i < 300:
detail = detail1 + result1[0][0][12] + 'itype=100004&' + result1[0][1][0:7] + str(i)
i = i + 30
#print(detail)
response = requests.get(detail)
#print(response.text)
result = re.findall('<strong class="figure_title"><a href="(.*?)"',response.text)
actor = re.findall('''主演:(.*?)</div>''',response.text,re.S)
j = 0
for a in actor:
actor = re.findall('title="(.*?)"',a,re.S)
print(actor)
for b in actor:
actor[j] = b + ' '
j = j + 1
j = 0
sheet.write(w,2,actor)
w = w + 1
if w == 194 :
w = w + 1
title = re.findall('alt="(.*?)" r-imgerr="v">',response.text)
score1 = re.findall('<em class="score_l">(.*?)</em>',response.text)
score2 = re.findall('<em class="score_s">(.*?)</em>',response.text)
score = [score1[k] + score2[k] for k in range(min(len(score1),len(score2)))]
#print(result)
#print(title)
#print(score)
for titl in title:
print('{:u3000<18}{:u3000>10}{:u3000>50}'.format(titl,score[j],result[j]))
#response1 = requests.get(result[j])
sheet.write(l,0,titl)
sheet.write(l,1,score[j])
sheet.write(l,3,result[j])
j = j + 1
l = l + 1
file.save('80年代喜剧电影.xls')
爱奇艺
import requests
import xlwt
import re
j = 1
m = 0
n = 1
file = xlwt.Workbook('80年代喜剧电影')
sheet1 = file.add_sheet('汇总表')
sheet1.write(0, 0, 'video_name')
sheet1.write(0, 1, 'score')
sheet1.write(0, 2, 'link')
while j < 8:
url = 'https://list.iqiyi.com/www/1/8-----------1980_1989--11-' + str(j) + '-1-iqiyi--.html'
response = requests.get(url)
# print(response.text)
detail = re.findall('class="qy-mod-link" href="(.*?)" target="_blank"', response.text)
score = re.findall('<span class="text-score">(.*?)</span>', response.text)
video_name = re.findall('<img alt="(.*?)"', response.text)
# print(score)
# print(video_name)
# print(detail)
# for i,d in enumerate(detail):
# detail[i] = 'https:' + d
for i in range(len(detail)):
detail[i] = 'https:' + detail[i]
for i in detail:
print(video_name[m], ' ', score[m], ' ', i)
sheet1.write(n, 0, video_name[m])
sheet1.write(n, 1, score[m])
sheet1.write(n, 2, i)
m = m + 1
n = n + 1
j = j + 1
m = 0
# print(detail)
file.save('爱奇艺80年代喜剧电影.xls')