抓取QQ音乐歌单
1.通过分析歌曲下载路径来分析所需参数: 通过比较, 得出其中歌曲下载url与参数vkey是可变的,
歌曲下载url中可变得值是请求歌单返回的歌曲数据的strMediaMid参数, 而vkey是通过请求歌单返回的歌曲数据的songmid参数再次请求另一个url返回vkey值
2.通过分析请求歌单url来分析所需参数: dissid
dissid为请求diss的返回数据中的dissid参数
import os import re import requests from urllib.parse import urlencode headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 'origin': 'https://y.qq.com', 'referer': 'https://y.qq.com/portal/playlist.html' } def fetch_url(url): try: r = requests.get(url, headers=headers) if r.status_code in [200, 201]: return r.json() except Exception as e: print(e) def down_song(path, strMediaMid, vkey): params = { 'guid': '5300386295', 'vkey': vkey, 'uin': '0', 'fromtag': '66' } url = 'http://222.73.132.154/amobile.music.tc.qq.com/C400{}.m4a?'.format(strMediaMid) url += urlencode(params) r = requests.get(url, headers=headers) if r.status_code in [200, 201]: with open(path, 'wb') as f: f.write(r.content) def get_vkey(songmid): url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?' params = { '-': 'getplaysongvkey7256617694143965', 'g_tk': '5381', 'loginUin': '0', 'hostUin': '0', 'format': 'json', 'inCharset': 'utf8', 'outCharset': 'utf-8', 'notice': '0', 'platform': 'yqq.json', 'needNewCode': '0', 'data': '{"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"5300386295","songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' % songmid } url += urlencode(params) result = fetch_url(url) vkey = result['req_0']['data']['midurlinfo'][0]['vkey'] return vkey def get_song_info(disstid): url = 'https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?' params = { 'type': '1', 'json': '1', 'utf8': '1', 'onlysong': '0', 'disstid': disstid, 'g_tk': '5381', 'loginUin': '0', 'hostUin': '0', 'format': 'json', 'inCharset': 'utf8', 'outCharset': 'utf-8', 'notice': '0', 'platform': 'yqq.json', 'needNewCode': '0', } url += urlencode(params) result = fetch_url(url) songlist = result['cdlist'][0]['songlist'] for song in songlist: strMediaMid = song['strMediaMid'] songMid = song['songmid'] songname = song['songname'] yield strMediaMid, songMid, songname def get_dist_info(page): url = 'https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg?' params = { 'picmid': '1', 'rnd': '0.15993662911508766', 'g_tk': '5381', 'loginUin': '0', 'hostUin': '0', 'format': 'json', 'inCharset': 'utf8', 'outCharset': 'utf-8', 'notice': '0', 'platform': 'yqq.json', 'needNewCode': '0', 'categoryId': '10000000', 'sortId': '5', 'sin': int(page)*30-30, 'ein': int(page)*30-1, } url += urlencode(params) result = fetch_url(url) disslist = result['data']['list'] for diss in disslist: yield diss['dissid'], diss['dissname'] def main(page): for item in get_dist_info(page): dissid, dissname = item for item in get_song_info(dissid): strMediaMid, songMid, songname = item vkey = get_vkey(songMid) pattern = re.compile(r'[\/::*?"<>| ]+') songname = re.sub(pattern, " ", songname) dissname = re.sub(pattern, " ", dissname) if not os.path.exists('d://data/{}/'.format(dissname)): os.mkdir('d://data/{}/'.format(dissname)) path = 'd://data/{0}/{1}.m4a'.format(dissname, songname) print("正在下载:{}".format(songname)) down_song(path, strMediaMid, vkey) print("下载完成:{}".format(songname)) if __name__ == '__main__': page = 1 main(page)