• Python 爬取网易云歌手的50首热门作品


    使用 requests 爬取网易云音乐

    Python 代码:

    import json
    import os
    import time
    
    from bs4 import BeautifulSoup
    import requests
    
    
    class Music:
        """
        下载网易云歌手排行前50的歌曲
        """
    
        def __init__(self, init_url, download):
            self.init_url = init_url
            self.download = download
    
        def mkdir(self, path):
            """
            创建文件夹
            :param path:
            :return:
            """
            path = path.strip()
            if not os.path.exists(path):  # 判断此文件夹存不存在
                print('创建 ', path, '文件夹')
                os.makedirs(path)
                return True
            else:
                print(path, '文件夹已存在,无需创建')
                return False
    
        def download_video(self, video_url, name):
            """
            下载
            :param video_url: 音乐的链接
            :param name: 歌曲名称
            :return:
            """
            path = self.download + "\" + name + '.mp3'  # 拼接保存后的文件路径
            # print(path)
            headers = {
                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36",
            }
            header = {
                "Origin": "http://music.163.com/",
                "Referer": video_url,  # 请求头必须添加referer
            }
            headers.update(header)  # 更新头部信息
            size = 0
            start = time.time()
            try:
                result = requests.get(video_url, headers=headers, stream=True, verify=False)
                # print('result', result)
                with open(path, "wb") as f:
                    for chunk in result.iter_content(1024):
                        f.write(chunk)
                        f.flush()  # 清空缓存
                        size = size + len(chunk)
                print("已下载:%0.2f Mb" % (size / (1024 * 1024)))
            except Exception as e:
                print("url下载错误:%s" % video_url)
                print(e)
            stop = time.time()
            print("下载完成,耗时:%0.2f秒" % (stop - start))
    
        def spider(self):
            r = requests.get(self.init_url).text
            soupObj = BeautifulSoup(r, 'lxml')
            song_ids = soupObj.find('textarea').text
            # print(song_ids)
            jobj = json.loads(song_ids)
            list01 = []
            for item in jobj:
                dict01 = {}
                # print(item['id'])  # 歌曲id
                # print(item['name'])  # 歌曲名称
                dict01['name'] = item['name']
                dict01['id'] = item['id']
                list01.append(dict01)
    
            print(list01)
            len_list = len(list01)
            print("一共", len_list, "首歌曲")
            self.mkdir(self.download)
            print('开始切换文件夹')
            os.chdir(self.download)
            for i in list01:
                name = i['name']
                id = i['id']
                song_url = "http://music.163.com/song/media/outer/url?id=" + str(id) + ".mp3"
                print(song_url)  # 最终下载的音乐链接
                self.download_video(song_url, name)  # 下载
                len_list = len_list - 1
                print("还剩", len_list, "首歌曲需要下载")
    
    
    if __name__ == '__main__':
        #     init_url = 'https://music.163.com/artist?id=5781'  # 薛之谦
        #     download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\xzq'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=12429072'  # 隔壁老樊
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\gblf'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=861777'  # 华晨宇
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\hcy'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=6452'  # 周杰伦
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\zjl'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=2116'  # 陈奕迅
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\cyx'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=3684'  # 林俊杰
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\ljj'  # 保存地址
    
        # init_url = 'https://music.163.com/artist?id=12138269'  # 毛不易
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\mby'  # 保存地址
    
        # init_url = 'https://music.163.com/artist?id=4292'  # 李荣浩
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\lrh'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=30116848'  # 阿冗
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\ar'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=5771'  # 许嵩
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\xs'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=6472'  # 张杰
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\zj'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=5538'  # 汪苏泷
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\wsl'  # 保存地址
        # init_url = 'https://music.163.com/artist?id=1197168'  # 徐秉龙
        # download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\xbl'  # 保存地址
        init_url = 'https://music.163.com/artist?id=30284835'  # 枯木逢春
        download = 'C:\Users\AIERXUAN\Desktop\BeautifulPicture\Music\kmfc'  # 保存地址
    
        s = Music(init_url, download)
        s.spider()
    View Code
    "http://music.163.com/song/media/outer/url?id=417859631.mp3"  打开这个链接就可以直接播放音乐   后面的id代表的是歌曲在网易云里面的id

    由于网易云有的音乐链接已经弃用,所以有的音乐会下载失败

    网易云的许多post请求都是被加密的,如果你们破解不了可以点击这个链接去看看大佬是怎么破解的:https://blog.csdn.net/xiaoming_xiaoli/article/details/88019016

    关于网易云api的其他接口可以进去这里面查看:http://www.goodpm.net/postreply/python/1010000008139311/关于网易云音乐爬虫的api接口.html

    -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    每天一个表情包,给生活加个油

  • 相关阅读:
    js实现全选反选功能
    seajs笔记
    延迟加载图片
    JavaScript 数组基本操作
    图片上传,头像上传
    命令模式
    动态加载js css 插件
    发布-订阅模式
    js 切换全屏
    JavaScript 经典之一 闭包
  • 原文地址:https://www.cnblogs.com/yanzhongyixu/p/13356729.html
Copyright © 2020-2023  润新知