• python爬虫2——下载文件(中华网图片库下载)


    # -*- coding: utf-8 -*-
    import requests
    import re
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    
    if __name__ == '__main__':
        url = 'http://photostock.china.com.cn/Web_CHN/SpecialTopicPhoto.aspx?Id=296'
        html = requests.get(url)
        img_src = re.findall('<img alt=.*?src="..(.*?)".*?/>', html.text, re.S)
        imgUrl = []
        for each_src in img_src:
            imgUrl.append("http://photostock.china.com.cn" + each_src)
        picName = 100
        for each in imgUrl:
            imgContext = requests.get(each).content
            with open("lovelyAnimals/" + str(picName) + ".jpg", "wb") as code:
                code.write(imgContext)
            picName += 1
    
    '''
    下载文件的3种方法
    (1): 使用urllib.urlretrieve方法,可在callbackfunc函数中显示下载进度
    def callbackfunc(blocknum, blocksize, totalsize):
        # 回调函数
        # @blocknum:
        #     已经下载的数据块
    
        # @blocksize:
        #     数据块的大小
    
        # @totalsize:
        #     远程文件的大小
        percent = 100.0 * blocknum * blocksize / totalsize
        if percent > 100:
            percent = 100
        print "%.2f%%"% percent
    url = 'http://www.sina.com.cn'
    local = 'lovelyAnimals/sina.html'
    urllib.urlretrieve(url, local, callbackfunc)
    
    (2):使用urllib2.urlopen
    import urllib2
    url = 'http://www.sina.com.cn'
    f = urllib2.urlopen(url)
    data = f.read()
    with open("lovelyAnimals/sina.html", "wb") as code:
        code.write(data)
    
    (3):使用requests模块
    import requests
    url = 'http://www.sina.com.cn'
    html = requests.get(url)
    with open("lovelyAnimals/sina.html", "wb") as code:
        code.write(html.content)
    '''
  • 相关阅读:
    CString与char *互转总结
    string 与char* char[]之间的转换
    VC++下使用SQLite数据库
    VC连接数据库方式
    C/C++中判断某一文件或目录是否存在
    漂亮的CSS按钮样式集以及在线生成工具
    PhpStorm 4.0 & 5.0 部署本地Web应用 (转)
    PHP的serialize序列化数据与JSON格式化数据
    c/c++中产生随机数
    [STL系列]开篇简单介绍
  • 原文地址:https://www.cnblogs.com/everSeeker/p/5014843.html
Copyright © 2020-2023  润新知