import requests import re import xlwt def Get_news(): url = 'https://www.jin10.com/' html = requests.get(url) html.encoding = html.apparent_encoding reg = re.compile('<div class="jin-flash_time">(.*?)</div></div><div class="jin-flash_b"><h4>(.*?)</h4></div></div>') flash_news = re.findall(reg,html.text) return flash_news def excel_write(flash_news): newtable = 'jin10.xls' #创建一个Excel文件名称 wb = xlwt.Workbook(encoding = 'utf-8') #创建Excel文件 ws = wb.add_sheet('jin_new') #创建一个Excel工作表格 headDate = ['时间','新闻内容'] #创建标题头 for colnum in range(0,2): ws.write(0,colnum,headDate[colnum]) index = 1 #表示从第二行开始 for flash_new in flash_news: for i in range(0,2): #print(flash_new[i]) ws.write(index,i,flash_new[i]) index += 1 wb.save(newtable) excel_write(Get_news())
还不是很完美,新闻的内容里面是有<b></b>标签,也还有空白的单元格没有处理,最关键的一点是没有做到实时监控