• 1.4.1python下载网页(每天一更)


    # -*- coding: utf-8 -*-
    
    '''
    Created on 2019年4月27日
    
    @author: lenovo
    '''
    
    # import urllib3
    # def download(url):
    #     return urllib3.connection_from_url(url)
    # 
    # print(download('http://now.qq.com'))
    
    
    
    
    
    # 在python中,urllib2被urllib。request所取代
    
    # import urllib.request
    # def download(url):
    #     return urllib.request.urlopen(url).read()
    # 
    # print(download('https://baijiahao.baidu.com/s?id=1632775818269407606&wfr=spider&for=pc'))
    
    
    # import urllib.request
    # def download(url):
    #     print("Downloading:" + url)
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #     return html
    # 
    # print(download("htp://www.baidu.co"))
    
    
    # import urllib.request
    # def download(url, num_retries=2):
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #         if num_retries > 0 :
    #             if hasattr(e, "code") and 500 <= e.code < 600 :
    #                 return download(url, num_retries-1)
    #     return html
    #     
    # # print(download("http://httpstat.us/500"))
    # print(download("http://www.meetup.com/"))
    
    import urllib.request
    def download(url, user_agent="wswp",num_retries=2):
        print("Downloading: " , url)
        headers = { 'User-agent': user_agent}
        request = urllib.request.Request(url, headers=headers)
        try:
            html = urllib.request.urlopen(request).read()
        except urllib.request.URLError as e:
            print('Download error:' , e.reason)
            html = None
            if num_retries > 1 :
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    return download(url, user_agent, num_retries-1)
        return html
    
    print(download("http://www.meetup.com/"))
  • 相关阅读:
    一些数论公式
    一位ACMer过来人的心得
    hdu 2069 Coin Change (母函数)
    关于 A^x = A^(x % Phi(C) + Phi(C)) (mod C) 的若干证明
    upper_bound()与lower_bound()使用方法
    POJ 计算几何入门题目推荐
    图像识别C++读取bmp位图入门
    Centos7 上安装FastDFS
    java的IO包类分层结构
    ==和equels
  • 原文地址:https://www.cnblogs.com/xww115/p/10822196.html
Copyright © 2020-2023  润新知