• [Python]简单的爬取图片


     1 import os
     2 import io
     3 import sys
     4 import re
     5 import urllib.request
     6 
     7 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb2312')
     8 
     9 """
    10 headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
    11            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    12            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
    13            'Accept-Encoding': 'none',
    14            'Accept-Language': 'en-US,en;q=0.8',
    15            'Connection': 'keep-alive'}
    16 """
    17 
    18 headers = {
    19     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
    20     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    21     'Accept-Encoding': 'gzip, deflate',
    22     'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4,en-GB;q=0.2'
    23 }
    24 """
    25 def get_image(url):
    26     request = urllib.request.Request(url, headers=headers)
    27     # params = urllib.urlencode(post_params)
    28     responseurl = urllib.request.urlopen(request)
    29     get_img = responseurl.read()
    30     with open('001.jpg', 'wb') as fp:
    31         fp.write(get_img)
    32         print('图片下载完成')
    33     return
    34 
    35 url = 'http://image.tianjimedia.com/uploadImages/2016/009/27/FW632S21L801.jpg'
    36 get_image(url)
    37 
    38 """
    39 # headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
    40 
    41 
    42 def download_page(url):
    43     # request = urllib.request.Request(url)
    44     request = urllib.request.Request(url, headers=headers)
    45     responseurl = urllib.request.urlopen(url)
    46     data = responseurl.read()
    47     # data = data.decode('gbk')
    48     return data
    49 
    50 
    51 def get_image(html):
    52     regx = r'http://[S]*.jpg'
    53     pattern = re.compile(regx)
    54     get_img = re.findall(pattern, repr(html))
    55     num = 1
    56     for img in get_img:
    57         image = download_page(img)
    58         with open('%s.jpg' % num, 'wb') as fp:
    59             fp.write(image)
    60             num += 1
    61             # fp.close()
    62             print(u'正在下载第%s张图片' % num)
    63     return
    64 
    65 url = 'http://pic.yesky.com/180/99839180_2.shtml'
    66 html = download_page(url)
    67 get_image(html)
  • 相关阅读:
    Linux(Ubuntu)总结(三)
    Linux(Ubuntu):搭建Docker
    JavaScript数字格式化方法:千分位,百分数
    解决mysqld --install:Install/Remove of the Service Denied!
    【转】Oracle基础结构认知——oracle内存结构 礼记八目 2017-12-15 20:31:27
    【转】Oracle基础结构认知—初识oracle 礼记八目 2017-12-12 21:19:30
    Linux 常用命令
    oracle AWR详解
    VMware 虚拟化技术 创建虚拟机
    MySQL数据库各个版本的区别
  • 原文地址:https://www.cnblogs.com/Start12/p/9342092.html
Copyright © 2020-2023  润新知