对某一视频共享网站电影名进行爬取
1 # Author:Winter Liu 2 import time 3 import urllib.request 4 import re 5 6 start_time = time.time() 7 html_start = 'https://yanghuanyu.com/dy' 8 result = [] 9 for i in range(2,31): 10 response = urllib.request.urlopen(html_start) 11 buff = response.read() 12 html = buff.decode('utf-8') 13 # with open('hpage.txt','w',encoding='UTF-8') as f: 14 # f.write(html) 15 print(html_start) 16 data = re.findall(r'[.+][dddd][.+][.+]', html) 17 data = list(set(data)) 18 print(data) 19 result.extend(data) 20 html_start = "https://yanghuanyu.com/dy/page/"+str(i) 21 print(len(result)) 22 print(result) 23 24 end_time = time.time() 25 print(end_time - start_time)