#!/usr/bin/env python # -*- coding:utf-8 -*- #爬虫,搜索热点排行 import urllib.request import urllib import re import json import xlwt import os #获取网站首页全部内容 cnt = 50 #只能1-50 url = 'https://zhidao.baidu.com/question/api/hotword?rn='+cnt.__str__()+'&t=1535421904906' print(url) user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6' req = urllib.request.Request(url, headers={'User-Agent': user_agent}) response = urllib.request.urlopen(req) content = response.read().decode('utf-8') #print(content) workbook = xlwt.Workbook() sheet1 = workbook.add_sheet('sheet1',cell_overwrite_ok=True) sheet1.write(0,0,'排名') sheet1.write(0,1,'新闻名称') sheet1.write(0,2,'搜索人数') sheet1.write(0,3,'变化数量') sheet1.write(0,4,'新的新闻') sheet1.write(0,5,'热度上升') dataList = json.loads(content)['data'] j = 1 for data in dataList: print(data) sheet1.write(j, 0,j) sheet1.write(j, 1,data['keyword']) sheet1.write(j, 2, data['searches']) sheet1.write(j, 3, data['changeRate']) isNew = data['isNew']; if isNew==0: isNew = '否' elif isNew==1: isNew = '是' sheet1.write(j, 4, isNew.__str__()) trend = data['trend'] style5 = xlwt.XFStyle() font = xlwt.Font() style5.font = font if trend == 'fall': font.colour_index = 3 trend = '下降' elif trend == 'rise': font.colour_index = 2 trend = '上升' sheet1.write(j, 5, trend,style5) j = j + 1 #保存该excel文件,有同名文件时直接覆盖 path = 'D:\Python' if not os.path.isdir(path): os.makedirs(path) paths = path + '\' filename = 'test1' workbook.save('{}{}.xls'.format(paths,filename)) print('创建excel文件完成!')
百度时候总能看到热搜排行,以上代码就是爬虫获取排行