• Python 简单爬虫案例


    Python 简单爬虫案例

    import requests
    url = "https://www.sogou.com/web"
    # 封装参数
    wd = input('enter a word')
    param = {
        'query':wd
    }
    response = requests.get(url=url,params=param)
    
    page_text = response.content
    fileName = wd+'.html'
    with open(fileName,'wb') as fp:
        fp.write(page_text)
        print('over')
    需求:爬去搜狗指定词条搜索后的页面数据
    import requests
    url = "https://fanyi.baidu.com/sug"
    wd = input('enter aword')
    data = {
        'kw':wd
    }
    response = requests.post(url=url,data=data)
    print(response.json())
    需求:抓取百度翻译
    import requests
    url = "https://movie.douban.com/j/chart/top_list"
    param = {
        "type": "5",
        "interval_id": "100:90",
        "action": "",
        "start": "40",
        "limit": "100",
    }
    
    movie_data = requests.get(url=url,params=param).json()
    print(movie_data)
    需求:抓取豆瓣电影分类https://movie.douban.com/排行榜中的电影详情数据
    import requests
    url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx'
    wd = input('enter aword:')
    data = {
        "cname": '',
        "pid": '',
        "keyword":wd ,
        "pageIndex": "1",
        "pageSize": "10",
    }
    response = requests.post(url=url,data=data).json()
    print(response)
    需求:抓取肯德基餐厅查询http://www.kfc.com.cn/kfccda/index.aspx中指定地点的餐厅数据
    http://125.35.6.84:81/xk/
    import requests
    url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList'
    
    headers = {
        'User-Agent':"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
    }
    
    id_list = []
    for page in range(1,11):
        data = {
            "on": "true",
            "page": str(page),
            "pageSize": "15",
            "productName": '',
            "conditionType": "1",
            "applyname": '',
            "applysn": '',
        }
        json_data = requests.post(url=url,data=data,headers=headers).json()
        for dic in json_data["list"]:
            id = dic["ID"]
            id_list.append(id)
    
    detail_url = "http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsById"
    for id in id_list:
        detail_data = {
            "id":id
        }
        detail_json = requests.post(url=detail_url,data=detail_data,headers=headers).json()
        print(detail_json)
    需求:爬去国家药品监督管理总局中基于中华人民共和国化妆品生产许个证相关数据 http://125.35.6.84:81/xk/
    import os
    import re
    import urllib
    import requests
    
    
    url = 'https://www.qiushibaike.com/pic/page/%d/?s=5170552'
    # page = 1
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
    }
    if not os.path.exists('./qiutu'):
        os.mkdir('./qiutu')
        
    start_page = int(input('enter a start pageNum:'))
    end_page = int(input('enter a end pageNum:'))
    
    for page in range(start_page,end_page+1):
        new_url = format(url%page)
    #     print(new_url)
        page_text = requests.get(url=new_url,headers=headers).text
        img_url_list = re.findall('<div class="thumb">.*?<img src="(.*?)" alt=.*?</div>',page_text,re.S)
        for img_url in img_url_list:
            img_url = 'https:'+img_url
            imgName = img_url.split('/')[-1]
            imgPath = 'qiutu/'+imgName
            urllib.request.urlretrieve(url=img_url,filename=imgPath)
            print(imgPath,'下载成功!')
            
    print('over!!!')
    需求:爬去糗事百科中所有图片进行保存
    import requests
    url = "https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1564643415&di=423648f96f24460811fc7a39e23d29f8&imgtype=jpg&er=1&src=http%3A%2F%2Fimg1.replays.net%2Flol.replays.net%2Fuploads%2Fbody%2F2017%2F06%2F1496734520iBi.jpg"
    headers = {
        'User-Agent':"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
    }
    img_data = requests.get(url=url,headers=headers).content
    with open('./kapai.jpg','wb') as fp:
        fp.write(img_data)
    需求:爬取卡牌大师4k照片
  • 相关阅读:
    void及void指针含义的深刻解析
    jbpm入门样例
    给字符数组赋值的方法
    linux tar.gz zip 解压缩 压缩命令
    android 文件上传
    职员有薪水了
    sublime配置全攻略
    [置顶] WPF数据修改demo
    Java实现快速排序
    Java实现快速排序
  • 原文地址:https://www.cnblogs.com/xiangsikai/p/11251658.html
Copyright © 2020-2023  润新知