• Python urllib2 调试


    #!/usr/bin/env python
    # coding=utf-8
    __author__ = 'zhaoyingnan'
    import urllib
    import urllib2
    import chardet
    
    
    class HtmlLoader:
        def urlLoad(self, sUrl, isPost=False, isDebug=False):
            try:
                sContent = None
                dictHeaders = {
                    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/51.0.2704.79 Chrome/51.0.2704.79 Safari/537.36',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                    'Accept-Charset': 'utf-8',
                    'Referer': 'https://www.baidu.com/?tn=92765401_hao_pg',
                    'Accept-Language': 'zh-CN,zh;q=0.8',
                }
    
                dictData = {}
                sData = urllib.urlencode(dictData)
                if (isPost is True):
                    Request = urllib2.Request(url=sUrl, data=sData, headers=dictHeaders)
                else:
                    Request = urllib2.Request(url=sUrl, headers=dictHeaders)
    
                # print Request.get_method()  # 打印请求类型
    
                # 代理
                ProxyHandler = urllib2.ProxyHandler({})
    
                # 调试
                httpDebugHandler = urllib2.HTTPHandler(debuglevel=1)
                httpsDebugHandler = urllib2.HTTPSHandler(debuglevel=1)
    
                if (isDebug is True):
                    Opener = urllib2.build_opener(ProxyHandler, httpDebugHandler, httpsDebugHandler)
                else:
                    Opener = urllib2.build_opener(ProxyHandler)
    
                urllib2.install_opener(Opener)
                Response = Opener.open(Request)
                iCode = Response.getcode()
                if (iCode == 200):
                    sContent = Response.read()
    
                if (sContent is not None):
                    # print(chardet.detect(sContent))
                    encoding = chardet.detect(sContent)
                    if (encoding['encoding'].startswith("GB")):
                        sContent = sContent.decode("GBK")
                return sContent
            except Exception as e:
                print(e)
            finally:
                pass
    
    
    if (__name__ == '__main__'):
        sUrl = 'http://www.cnblogs.com/yingnan/p/5659966.html'
        demo = HtmlLoader()
        sContent = demo.urlLoad(sUrl=sUrl, isPost=False, isDebug=True)
        print sContent
    

      

  • 相关阅读:
    Js变量类型
    前端面试题HTML
    AP聚类
    锚点
    html
    Active Learning主动学习
    z-index
    position
    学习笔记_卷积神经网络
    学习笔记_深度学习的数学基础
  • 原文地址:https://www.cnblogs.com/yingnan/p/5884293.html
Copyright © 2020-2023  润新知