• 第十三节 电影天堂项目实战


     1 from lxml import etree
     2 import requests
     3 
     4 
     5 baseurl = 'https://www.dytt8.net'
     6 headers = {
     7     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
     8     'Referer': 'https://www.dytt8.net/html/gndy/dyzz/index.html'
     9 }
    10 def agent(ur):
    11     resp = requests.get(ur,headers = headers)
    12     # parse = etree.HTMLParser()
    13     text = resp.text
    14     html = etree.HTML(text)
    15     # a = etree.tostring(html, encoding='utf-8').decode('utf-8')
    16     return html
    17 
    18 def movie_url_list(html):
    19     url = html.xpath("//table[@class='tbspan']//a/@href")
    20     return  url
    21 
    22 def parse_info(info,rule):
    23     return  info.replace(rule,'').strip()
    24 
    25 def xiangqingye(url):
    26     resp = requests.get(url, headers=headers)
    27     text = resp.content.decode('gbk')
    28     html = etree.HTML(text)
    29     a = html.xpath('//div[@id="Zoom"]//text()')
    30     movie = {}
    31     for info in a:
    32         if info.startswith("◎片  名"):
    33             info = parse_info(info, '◎片  名')
    34             movie['pianming'] = info
    35         if info.startswith("◎年  代"):
    36             info = parse_info(info, '◎年  代')
    37             movie['niandai'] = info
    38         if info.startswith("◎产  地"):
    39             info = parse_info(info, '◎产  地')
    40             movie['chandi'] = info
    41         if info.startswith("◎类  别"):
    42             info = parse_info(info, '◎类  别')
    43             movie['leixing'] = info
    44         if info.startswith("◎上映日期"):
    45             info = parse_info(info, '◎上映日期')
    46             movie['shangyingshijian'] = info
    47         if info.startswith("◎豆瓣评分"):
    48             info = parse_info(info, '◎豆瓣评分')
    49             movie['doubanpingfen'] = info
    50         if info.startswith("◎片  长"):
    51             info = parse_info(info, '◎片  长')
    52             movie['pianchang'] = info
    53         if info.startswith("◎标  签"):
    54             info = parse_info(info, '◎标  签')
    55             movie['biaoqian'] = info
    56     return movie
    57 
    58 def alldata():
    59     srt1 = 'https://www.dytt8.net/html/gndy/dyzz/list_23_'
    60     str2 = '.html'
    61     movies = []
    62     for i in range(1,2):
    63         url = srt1+str(i)+str2
    64         ura = agent(url)
    65         b = movie_url_list(ura)
    66         for z in b:
    67             c = baseurl + z
    68             movielist = xiangqingye(c)
    69             movies.append(movielist)
    70     return movies
    71 if __name__ == '__main__':
    72     print(alldata())
  • 相关阅读:
    移动web开发框架研究
    Android客户端缓存机制(文字缓存和多媒体文件缓存)
    edittext实现自动查询,刷新listview
    Android属性动画 nineoldandroids
    android 框架LoonAndroid,码农偷懒专用
    待看
    aidl
    国内外DNS服务器地址列表
    js 防止重复点击
    js 模块化
  • 原文地址:https://www.cnblogs.com/kogmaw/p/12506974.html
Copyright © 2020-2023  润新知