• 豆瓣top250.py


    #就成功了一波,然后被封了。已经着手准备爬去豆瓣所有的电影titleandgrade
    import
    requests from bs4 import BeautifulSoup import random headers = {'user_agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36'} proxy_list = [ 'http://117.177.250.151:8081', 'http://111.85.219.250:3129', 'http://122.70.183.138:8118', ] proxy_ip = random.choice(proxy_list) # 随机获取代理ip proxies = {'http': proxy_ip} def get_items_from(page): for i in range(0,page): list_view = 'https://movie.douban.com/top250?start={}&filter='.format(i) wb_data = requests.get(list_view,headers=headers,proxies=proxies) soup = BeautifulSoup(wb_data.text,'lxml') data={ 'title':soup.select('div.hd > a > span.title')[0].text, 'grade':soup.select('div.star > span.rating_num')[0].text } print(data) get_items_from(250)
  • 相关阅读:
    django之上传
    djano的ORM操作
    Python中的分页管理
    MySQL作业
    socket操作
    python的os模块
    django-debug-toolbar的配置及使用
    logging模板及配置说明
    使用StrictRedis连接操作有序集合
    学习总结
  • 原文地址:https://www.cnblogs.com/dws-love-jfl-1314/p/6047964.html
Copyright © 2020-2023  润新知