• 猫眼前100


    #mzitu
    '''
    User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2
    '''
    # -*- coding=utf-8 -*-
    import requests
    import lxml
    import json
    from lxml import etree

    def getOnePage(n):
    url = f'http://maoyan.com/board/4?offset={n*10}'
    url2 = 'http://www.mzitu.com/hot/'
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2'}
    r = requests.get(url,headers = header)
    print(r)
    return(r.text)
    #global html=''
    print('世界,你好! hello world! ')

    item = {}
    id = 0
    def parse(text):
    html = etree.HTML(text)
    names = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="name"]/a/@title')
    starts = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="star"]/text()')
    releasetimes = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
    # for i in range(0,len(names)):
    # print(names[i],starts[i],releasetimes[i])
    for name,star,releasetime in zip(names,starts,releasetimes):
    item['名称'] = name
    #item['star'] = star
    item['time'] = releasetime
    yield item


    def save2file(data):
    with open('movie.json','a',encoding='utf-8') as f:
    data = json.dumps(data,ensure_ascii = False)+', '
    f.write(data)

    def run():
    for n in range(0,10):
    global id
    text = getOnePage(n)
    items = parse(text)
    #print(item)
    for item in items:
    id += 1
    print(id,item)
    save2file(item)
    #print(html)

    if __name__ == '__main__':

    run()

  • 相关阅读:
    在Android studio中,测试输出数组中最大子数组的和
    我所理解的软件开发模式
    java实现随机输出300题四则运算
    Demo(3月28日)
    关于构建之法中小飞问题的个人看法
    对搭档代码的一些意见
    项目复审
    安卓UI测试(基于android studio环境 espresso框架)
    读构建之法后的一些个人感受
    思考题
  • 原文地址:https://www.cnblogs.com/pscc/p/9774919.html
Copyright © 2020-2023  润新知