• Requests库入门实例


    爬虫入门5个实例

    实例1:京东商品页面的爬取

    import requests
    
    def getHTMLText(url):
        try:
            r = requests.get(url,timeout = 30)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return "Something Wrong!!!"
    
    url = "https://item.jd.com/27528447148.html"
    print(getHTMLText(url)[:1000])
    

    实例2:亚马逊商品页面的爬取

    import requests
    kv = {'user-agent':'Mozilla/5.0'} 
    def getHTMLText(url):
        try:
            r = requests.get(url,timeout = 30,headers = kv)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return "Something Wrong!!!"
    
    url = "https://www.amazon.cn/dp/B0083DP0CY/ref=cngwdyfloorv2_recs_0/460-1382173-5298568?pf_rd_m=A1AJ19PSB66TGU&pf_rd_s=desktop-2&pf_rd_r=07R056YCCZREBTBFN41G&pf_rd_r=07R056YCCZREBTBFN41G&pf_rd_t=36701&pf_rd_p=d2aa3428-dc2b-4cfe-bca6-5e3a33f2342e&pf_rd_p=d2aa3428-dc2b-4cfe-bca6-5e3a33f2342e&pf_rd_i=desktop"
    print(getHTMLText(url)[1000:2000])
    

    实例3:百度搜索关键字提交

    import requests
    keyword = "Python"
    kv = {'wd':keyword}
    def getHTMLText(url):
        try:
            r = requests.get(url,timeout = 30,params = kv)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return "Something Wrong!!!"
    
    url = "http://www.baidu.com/s"
    print(getHTMLText(url)[1000:2000])
    

    实例4:网络图片的爬取和存储

    import requests
    import os
    
    url = "https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1546773451861&di=7cd06f87b97e5a337e6e60a2986098dd&imgtype=jpg&src=http%3A%2F%2Fimg0.imgtn.bdimg.com%2Fit%2Fu%3D78010880%2C3063559069%26fm%3D214%26gp%3D0.jpg"
    root = "D://pics//"
    path = root + url.split('%')[-1]
    
    try:
        if not os.path.exists(root):
            os.mkdir(root)
        if not os.path.exists(path):
            r = requests.get(url)
            with open(path,'wb') as f:
                f.write(r.content)
                f.close()
                print("Saved!")
        else:
            print("Already Exists")
    except:
        print("Something Wrong!!!")
    

    实例5:IP地址归属地的自动查询

    import requests
    
    def getHTMLText(url):
        try:
            r = requests.get(url,timeout = 30)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return "Something Wrong!!!"
    
    url = "http://m.ip138.com/ip.asp?ip="
    ip = "202.204.80.112"
    urls = url + ip
    print(getHTMLText(urls)[-500:])
    
  • 相关阅读:
    Guava缓存list集合进行滤操作导致多次获取相同key返回数据不一致问题
    公私钥、证书、哈希、加密的要点
    .Net Core MVC Razor输出字符串方法(javascript中嵌入razor)
    AspNetCore.Mvc 使用CreatedAtRoute返回新创建的值
    Asp.Net Core MVC 中富文本编辑器CKEditor 5的配置及使用
    ubuntu PC/嵌入式 开机启动项问题
    速腾雷达没有数据的问题
    VS2017 + Visual Leak Detector 内存泄漏排查(VLD内存泄漏排查)
    基于SpringBoot实现SSM框架整合
    Spring之IOC思想
  • 原文地址:https://www.cnblogs.com/machine-lyc/p/10229387.html
Copyright © 2020-2023  润新知