• python常用模块


    urllib

    1. urllib.urlopen() 打开网页

    from urllib import request
    import json
    
    response = request.urlopen("http://www.baidu.com")
    
    #获取的网页信息
    html = response.read().decode("utf-8")
    print(html)
    

    urlopen返回对象,支持操作:

    •   read()        readline()  readlines()  fileno()  close() 这些方法的使用方式与文件对象完全一致
    •   info()         返回一个httplib.HTTPMessage对象,表示远程服务器返回的头信息
    •   getcode()   返回Http状态码。如果是http请求,200请求成功完成;404网址未找到
    •   geturl()      返回请求的url

    urlopen返回对象的的属性:

    • status        返回状态吗
    • reason       返回状态信息

    操作示例:

    #header头信息
    info = response.info()
    print(info)  
    
    #返回码
    code = response.getcode()
    print(code)
    
    #访问url信息
    url = response.geturl()
    print(url)                 
    
    #遍历所有header信息
     for k,v in info.items():
         print("%s -> %s"%(k,v))
    
    #获取header中特定内容信息,包括Date,Server等
    if "Date" in info:
         print(info["date"])
    

    另一种操作方式:

    from urllib import request
    
    with request.urlopen('https://api.douban.com/v2/book/2129650') as f:
        data = f.read()
        print('Status:', f.status, f.reason)                   
        for k, v in f.getheaders():
            print('%s: %s' % (k, v))
        print('Data:', data.decode('utf-8'))
    

     

    2. urllib.urlretrieve()将网页保存到本地

    3. urllib.urlencode(query)

    将URL中的键值对以连接符&划分,可以与urlopen结合以实现post方法和get方法

    Get:

    >>> import urllib
    >>> params=urllib.urlencode({'spam':1,'eggs':2,'bacon':0})
    >>> params
    'eggs=2&bacon=0&spam=1'
    >>> f=urllib.urlopen("http://python.org/query?%s" % params)
    >>> print f.read()
    

    Post:

    >>> import urllib
    >>> parmas = urllib.urlencode({'spam':1,'eggs':2,'bacon':0})
    >>> f=urllib.urlopen("http://python.org/query",parmas)
    >>> f.read()
    

      

    案例:

    1. 通过urllib实现图片的简单下载

    url = "http://n.sinaimg.cn/tech/transform/20170512/P0He-fyfeutp7573474.jpg"
    
    from urllib import request
    response = request.urlopen(url)
    html = response.read()
    with open("a.img", "wb") as f:
        f.write(html)
    

    2. 通过POST实现调用百度的在线翻译:

    from urllib import request, parse
    import json
    
    req = request.Request("http://fanyi.baidu.com/v2transapi")
    req.add_header("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393")
    req.add_header("Content-Type","application/x-www-form-urlencoded; charset=UTF-8")
    req.add_header("Referer", "http://fanyi.baidu.com")
    
    post_data = {}
    #英文到中文的翻译
    # post_data["from"] = "en"
    # post_data["query"] = "hello world"
    # post_data["simple_means_flag"] = 3
    # post_data["to"] = "zh"
    
    #中文到英文的翻译
    post_data["from"] = "zh"
    post_data["query"] = "你好"
    post_data["simple_means_flag"] = 3
    post_data["to"] = "en"
    
    post_data = parse.urlencode(post_data)
    
    response = request.urlopen(req, data=post_data.encode("utf-8"))
    html = response.read().decode("utf-8")
    target = json.loads(html)
    print(target["trans_result"]["data"][0]["dst"])
    		
    #print(html)
    

    3. 通过GET获取新浪新闻信息:

    from urllib import request,parse
    import  json
    
    #http://feed.mix.sina.com.cn/api/roll/get?pageid=1&lid=21
    values = {
        "pageid":1,
        "lid":21
    }
    param = parse.urlencode(values)
    req = request.Request("http://feed.mix.sina.com.cn/api/roll/get?%s" % param)
    
    req.add_header("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393")
    req.add_header("Referer", "http://tech.sina.com.cn/")
    
    result = request.urlopen(req)
    html = result.read().decode("utf-8")
    
    target = json.loads(html)
    
    #print(target)
    
    news_time = target["result"]["timestamp"]
    all_data = target["result"]["data"]
    
    print(news_time)
    for i in all_data:
        print("标题:", i["title"])
        print("	汇总:", i["summary"])
        print("	信息:", i["intro"])
        print("	url:", i["url"])
        print("	img:", (i["img"]["u"]))
    

     

    4.

  • 相关阅读:
    优先队列总结
    CodeForces 567D One-Dimensional Battle Ships
    CodeForces 567D One-Dimensional Battle Ships
    codeforces 1016B. Segment Occurrences
    codeforces 1016B. Segment Occurrences
    poj3249(求最长路)
    poj3249(求最长路)
    poj 2186
    2017年第八蓝桥杯C/C++ A组国赛 —— 第二题:生命游戏
    Fence Repair POJ
  • 原文地址:https://www.cnblogs.com/onlybobby/p/6848384.html
Copyright © 2020-2023  润新知