• 爬取图片(一)


    源码:

     1 import requests
     2 from lxml import etree
     3 from urllib import request
     4 import os
     5 
     6 # 获取页面图集链接
     7 def get_url_list(page,headers):
     8     url = 'http://www.meizitu.com/a/more_{}.html'.format(page)
     9     response = requests.get(url,headers=headers)
    10     response.encoding = 'gbk'
    11     # print(response.text)
    12     html_ele = etree.HTML(response.text)
    13     ele_list = html_ele.xpath('//ul[@class="wp-list clearfix"]/li')
    14     page_list = []
    15     for ele in ele_list:
    16         url = ele.xpath('./div/div/a/@href')
    17         # print(url)
    18         page_list.append(url[0])
    19     return page_list
    20 
    21 # 保存图片
    22 def get_pictures(url,headers):
    23     response = requests.get(url,headers=headers)
    24     response.encoding = 'gbk'
    25     html_ele = etree.HTML(response.text)
    26     dir = html_ele.xpath('//div[@class="metaRight"]/h2/a')[0].text
    27     dir_name = '妹子图/' + dir
    28     if not os.path.exists(dir_name):
    29         os.makedirs(dir_name)
    30     url_list = html_ele.xpath('//div[@id="picture"]/p/img/@src')
    31     for url in url_list:
    32         try:
    33             name = url.split('/')[-1]
    34             filename = dir_name + '/' + name
    35             if not os.path.exists(filename):
    36                 request.urlretrieve(url,filename)
    37                 print(filename)
    38         except:
    39             pass
    40 
    41 
    42 if __name__ == '__main__':
    43     headers = {
    44         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    45     }
    46     for page in range(1,73):
    47         page_list = get_url_list(page,headers)
    48         for url in page_list:
    49             get_pictures(url,headers)
  • 相关阅读:
    APP-Android:APK
    软件-版本控制:VCS(版本控制系统)
    协议-网络-安全协议-SSH(安全外壳协议):百科
    un-解决方案-BIM:百科
    MySQL:常用语句
    rsync+inotify-tools文件实时同步
    Java实现 Leetcode 169 求众数
    Java实现 Leetcode 169 求众数
    Java实现 LeetCode 137 只出现一次的数字
    Java实现 LeetCode 137 只出现一次的数字
  • 原文地址:https://www.cnblogs.com/zhxd-python/p/9501299.html
Copyright © 2020-2023  润新知