#mzitu
'''
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2
'''
# -*- coding=utf-8 -*-
import requests
import lxml
import json
from lxml import etree
def getOnePage(n):
url = f'http://maoyan.com/board/4?offset={n*10}'
url2 = 'http://www.mzitu.com/hot/'
header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2'}
r = requests.get(url,headers = header)
print(r)
return(r.text)
#global html=''
print('世界,你好!
hello world!
')
item = {}
id = 0
def parse(text):
html = etree.HTML(text)
names = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="name"]/a/@title')
starts = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="star"]/text()')
releasetimes = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
# for i in range(0,len(names)):
# print(names[i],starts[i],releasetimes[i])
for name,star,releasetime in zip(names,starts,releasetimes):
item['名称'] = name
#item['star'] = star
item['time'] = releasetime
yield item
def save2file(data):
with open('movie.json','a',encoding='utf-8') as f:
data = json.dumps(data,ensure_ascii = False)+',
'
f.write(data)
def run():
for n in range(0,10):
global id
text = getOnePage(n)
items = parse(text)
#print(item)
for item in items:
id += 1
print(id,item)
save2file(item)
#print(html)
if __name__ == '__main__':
run()