1 #! /usr/bin/env python 2 # -*- coding=utf-8 -*- 3 4 import requests 5 import json 6 import re 7 8 f = open('dianying.txt','a') 9 count = 0 #用于记录编号,也可不用 10 def write__(htm): 11 #with open('title.txt', 'a') as fff: 12 titl = re.findall('data-tit(.*?)data-enough',htm.text,re.S) #找到data-tit和data-enough之间的所有匹配串,titl是列表类型 13 for each in titl: #依次迭代列表中元素,针对每一组元素 分别匹配相应的字段 ,并将字段内容写入文本 14 #fff.write(each + " ") 15 info = {} 16 info['title'] = re.search('le="(.*?)"',each,re.S).group(1) #匹配值的第一个 17 info['year'] = re.search('data-release="(.*?)" data-rate',each,re.S).group(1) #匹配值的第一个 18 info['Rating']= re.findall('data-rate="(.*?)" data-star',each,re.S)[0] #返回匹配元素(列表)中的一个值,实际上只有 一个 19 info['duration'] = re.findall('data-duration="(.*?)" data-region',each,re.S)[0] 20 info['region'] = re.findall('data-region="(.*?)" data-director',each,re.S)[0] 21 info['actor'] = re.findall('data-actors="(.*?)" data-intro',each,re.S)[0] 22 global count #全局的定义 23 count = count + 1 24 f.write('%s ' %str(count)) 25 #print(info['title'],info['year'],info['Rating'],info['time'] ,info['reg'],info['act'] ) 26 f.writelines(u'电影名:'+info['title'] + ' ') 27 f.writelines(u'主演:'+info['actor'] + ' ') 28 f.writelines(u'电影地区:' + info['region']+' ') 29 f.writelines(u'上映年份:' + info['year']+' ') 30 f.writelines(u'电影时长:' + info['duration']+' ') 31 f.writelines(u'评分:' + info['Rating']+' ') 32 f.close() 33 def getremen(): 34 url = 'http://movie.douban.com/' 35 html = requests.get(url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0'}) 36 html.encoding = 'uft-8' 37 # print( html.text) 38 write__(html) 39 if __name__ == "__main__": 40 getremen()