一段简短的代码,来展示如何把爬取内容写到redis里面:
1 #! /usr/bin/env python 2 # -*- coding=utf-8 -*- 3 import requests 4 import json 5 import re 6 import sys 7 import redis 8 reload(sys) 9 sys.setdefaultencoding("utf-8") 10 classinfo = [] 11 f = open('info.txt','w') 12 num = 0 13 class RedisTT(object): 14 def __init__(self): 15 self.keyName = 'DouBan' 16 self.host = 'localhost' 17 self.port = '6379' 18 self.re = redis.Redis(host=self.host,port=self.port) 19 def insertRedis(self,jsonStr):#存入到redis中 20 self.re.lpush(self.keyName,jsonStr) 21 class Douban(object): 22 def write(self,htm): 23 titl = re.findall('data-tit(.*?)data-enough',htm.text,re.S) 24 for each in titl: 25 #print each 26 info = {} 27 #print each 28 info['title'] = re.search('le="(.*?)"',each,re.S).group(1) 29 info['year'] = re.search('data-release="(.*?)" data',each,re.S).group(1) 30 info['Rating']= re.findall('data-rate="(.*?)" data-star',each,re.S)[0] 31 info['time'] = re.findall('data-duration="(.*?)" data-re',each,re.S)[0] 32 info['reg'] = re.findall('data-region="(.*?)" data-dir',each,re.S)[0] 33 info['act'] = re.findall('data-actors="(.*?)" data-in',each,re.S)[0] 34 RedisTT().insertRedis(json.dumps(info)) 35 def getremen(self): 36 url = 'http://movie.douban.com/' 37 html = requests.get(url) 38 html.encoding = 'utf-8' 39 # print html.text 40 self.write(html) 41 if __name__ == "__main__": 42 Douban().getremen()
结果如下: