• python2.x urllib2和urllib的使用


    1.最简单用法

      urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,...)

     1 import urllib2
     2 import urllib
     3 
     4 
     5 response = urllib2.urlopen("http://www.baidu.com")
     6 
     7 print 'getcode():',response.getcode()
     8 print 'geturl():',response.geturl()
     9 print 'url:',response.url
    10 print 'headers:
    ',response.headers
    11 print 'msg:',response.msg
    12 
    13 #-------------------------------------out--------------------------------------
    14 getcode(): 200
    15 geturl(): http://www.baidu.com
    16 url: http://www.baidu.com
    17 headers:
    18 Date: Thu, 29 Dec 2016 06:28:36 GMT
    19 Content-Type: text/html; charset=utf-8
    20 Transfer-Encoding: chunked
    21 Connection: Close
    22 Vary: Accept-Encoding
    23 Set-Cookie: BAIDUID=9A1E663B4C3AB33D11266F0D865A1F59:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
    24 Set-Cookie: BIDUPSID=9A1E663B4C3AB33D11266F0D865A1F59; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
    25 Set-Cookie: PSTM=1482992916; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
    26 Set-Cookie: BDSVRTM=0; path=/
    27 Set-Cookie: BD_HOME=0; path=/
    28 Set-Cookie: H_PS_PSSID=21858_1464_21112_17001_21553_20930; path=/; domain=.baidu.com
    29 P3P: CP=" OTI DSP COR IVA OUR IND COM "
    30 Cache-Control: private
    31 Cxy_all: baidu+0ba0b09e0fa305471b5e3b42c352570f
    32 Expires: Thu, 29 Dec 2016 06:27:54 GMT
    33 X-Powered-By: HPHP
    34 Server: BWS/1.1
    35 X-UA-Compatible: IE=Edge,chrome=1
    36 BDPAGETYPE: 1
    37 BDQID: 0x889c1bcd00004be7
    38 BDUSERID: 0
    39 
    40 msg: OK
    View Code

     获取html内容

    1 print response.read()     #以str字符串形式返回整个页面
    2 print response.readline() #每执行一次返回一行
    3 print response.readlines() #以列表形式返回
    View Code

    2.  构造Request 设置headers

     1 def set_headers():
     2     #构造Request,设置headers
     3     #__init__(self, url, data=None, headers={},origin_req_host=None, unverifiable=False)
     4     import urllib2
     5     headers = {'User-Agent':'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
     6     request = urllib2.Request("http://localhost:5000/urllib2testget",headers=headers)
     7 
     8     response = urllib2.urlopen(request)
     9     print request.headers
    10     #追加一个header
    11     request.add_header("addheader","nice")
    12     response = urllib2.urlopen(request)
    13     print request.headers
    14 
    15 set_headers()
    16 
    17 #--------------------------------输出:
    18 
    19 {'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
    20 {"a": "1", "": "2"}
    21 ------------------------------------------------
    22 {'Addheader': 'nice', 'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
    23 {"a": "1", "": "2"}
    View Code

    3.发送get请求,发送post请求

     1 def get_post():
     2     #get方式
     3     import urllib2
     4     import urllib
     5     headers = {'User-Agent':'liu bi'}
     6     values = {"username":"diaosir_get","password":"diao123_get"}
     7     data = urllib.urlencode(values)
     8     print '---------------------get:'
     9     url = "http://localhost:5000/urllib2testget"
    10     get_url=url+"?"+data
    11     request = urllib2.Request(get_url,headers=headers)
    12     response = urllib2.urlopen(request)
    13     print json.loads(response.read())
    14     print '---------------------post:'
    15     url = "http://localhost:5000/urllib2testpost"
    16     request = urllib2.Request(url,data,headers=headers)
    17     response = urllib2.urlopen(request)
    18     print json.loads(response.read())
    19 
    20 get_post()
    21 
    22 #---------------------------------------------------------输出:
    23 ---------------------get:
    24 {u'username': u'diaosir_get', u'password': u'diao123_get'}
    25 ---------------------post:
    26 {u'username': u'diaosir_get', u'password': u'diao123_get'}
    post&get

    4.代理模式设置

    def set_proxies():
        #1.proxy_handler
        #2.创建operner
        #3.安装opener[非必须]
        #4.拿operner去请求url
        enable_proxy = True
        proxy_handler = urllib2.ProxyHandler({"http":'http://120.24.73.165:3128'})
        null_proxy_handler = urllib2.ProxyHandler({})
        if enable_proxy:
            opener = urllib2.build_opener(proxy_handler)#挂载opener
        else:
            opener = urllib2.build_opener(null_proxy_handler)
        request = urllib2.Request('http://www.baidu.com')
        print '---------------------不使用代理'
        response = urllib2.urlopen(request)
        print response.getcode(),request.host
        print '---------------------使用代理'
        response = opener.open(request)
        print response.getcode(),request.host
    
    #----------------------------------------------------------输出
    ---------------------不使用代理
    200 www.baidu.com
    ---------------------使用代理
    200 120.24.73.165:3128
    View Code

    5.debug模式, 代码中urllib2.build_opener中的httpsHandler需要去掉,

     1 def debug_set():
     2     #代理,调试
     3     import  urllib2,urllib
     4     proxy_handler = urllib2.ProxyHandler({"http":'http://192.168.1.108:89'})
     5 
     6     #debuglog的使用
     7     httpHandler = urllib2.HTTPHandler(debuglevel=1)
     8     opener = urllib2.build_opener(httpHandler, httpsHandler,)
     9     urllib2.install_opener(opener) 
    10     request = urllib2.Request('http://127.0.0.1:5000/urllib2testget?a=2&b=3',headers={'User-Agent':'liubi00'})
    11     response = opener.open(request)
    12     print response.getcode(),response.read()
    13 
    14 
    15 
    16 
    17 #-------------------------------------------输出:
    18 send: 'GET /urllib2testget?a=2&b=3 HTTP/1.1
    Accept-Encoding: identity
    Host: 127.0.0.1:5000
    Connection: close
    User-Agent: liubi00
    
    '
    19 reply: 'HTTP/1.0 200 OK
    '
    20 header: Content-Type: text/html; charset=utf-8
    21 header: Content-Length: 20
    22 header: Server: Werkzeug/0.11.11 Python/2.7.12
    23 header: Date: Fri, 30 Dec 2016 15:12:40 GMT
    24 200 {"a": "2", "b": "3"}
    View Code

    6.获取cookie存到cookie.txt

    import cookielib
    import  urllib2
    
    def get_cookie():
        filename = 'cookie.txt'
        #声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件
        cookie = cookielib.MozillaCookieJar(filename)
        #利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器
        handler = urllib2.HTTPCookieProcessor(cookie)
        #通过handler来构建opener
        opener = urllib2.build_opener(handler,)
        request = urllib2.Request('http://www.baidu.com')
        request.add_header('User-Agent','fuckyou')
        response = opener.open(request)
        #保存cookie到文件
        cookie.save(ignore_discard=True, ignore_expires=True)
        print response.getcode()
    
    get_cookie()
    
    #----------------------------------------------输出:
    200
    View Code

    7.通过cookie请求,更多查看http://www.cnblogs.com/sysu-blackbear/p/3629770.html

     1 import cookielib
     2 import urllib2
     3 def use_cookie():
     4     #cookie--从cookies.txt读取cookies,携带cookies请求
     5     cookie_file = 'cookie.txt'
     6     #创建MozillaCookieJar实例对象
     7     cookie = cookielib.MozillaCookieJar(cookie_file)
     8     #从文件中读取cookie内容到变量
     9     cookie.load( ignore_discard=True, ignore_expires=True)
    10     #创建请求的request
    11     req = urllib2.Request("http://www.baidu.com")
    12     #利用urllib2的build_opener方法创建一个opener
    13     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
    14     response = opener.open(req)
    15     print response.read()
    View Code

    8.异常处理

     1 def deal_errors():
     2     #异常处理
     3     import urllib2
     4     #HTTPError
     5     req = urllib2.Request('http://blog.csdn.net/cqcre')
     6     try:
     7         urllib2.urlopen(req)
     8     except urllib2.HTTPError, e:
     9         print e.code
    10         print e.reason
    11 
    12     #URLError
    13     requset = urllib2.Request('http://www.xxxxx.com')
    14     try:
    15         urllib2.urlopen(requset)
    16     except urllib2.URLError, e:
    17         print e.reason
    18 
    19     #HTTPERROR&URLERROR
    20     req = urllib2.Request('http://blog.csdn.net/cqcre')
    21     try:
    22         urllib2.urlopen(req)
    23     except urllib2.URLError, e:
    24         if hasattr(e,"code"):
    25             print e.code
    26         if hasattr(e,"reason"):
    27             print e.reason
    28     else:
    29         print "OK"
    View Code
  • 相关阅读:
    Java实现 LeetCode 697 数组的度(类似于数组的map)
    Java实现 LeetCode 697 数组的度(类似于数组的map)
    Java实现 LeetCode 697 数组的度(类似于数组的map)
    Java实现 LeetCode 696 计数二进制子串(暴力)
    Java实现 LeetCode 696 计数二进制子串(暴力)
    Java实现 LeetCode 696 计数二进制子串(暴力)
    Java实现 LeetCode 695 岛屿的最大面积(DFS)
    Java实现 LeetCode 695 岛屿的最大面积(DFS)
    PHP serialize() 函数
    PHP print_r() 函数
  • 原文地址:https://www.cnblogs.com/diaosir/p/6233240.html
Copyright © 2020-2023  润新知