• ximalaya-spider


    import requests
    import parsel, re, json
    
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
    }
    path = "./video/"
    for i in range(1):
        url = 'https://www.ximalaya.com/youshengshu/4256765/p%d/' % i
        response = requests.get(url, headers=headers, proxies=proxies)
        html_data = response.text
    
        selector = parsel.Selector(html_data)
        lis = selector.xpath('//*[@id="anchor_sound_list"]/div[2]/ul/li')
    
        for li in lis:
            title = li.xpath('.//a/@title').get()
    
            href = li.xpath('.//a/@href').get()
    
            m4a_id = href.split('/')[-1]
            video_url = f'https://www.ximalaya.com/revision/play/v1/audio?id={m4a_id}&ptype=1'
            print("开始下载音频数据:%s" % title)
            m4a_str = requests.get(url=video_url, headers=headers, proxies=proxies).text
            m4a_dict = json.loads(m4a_str)
            m4a_url = m4a_dict['data']['src']
            m4a_data = requests.get(m4a_url, headers=headers, proxies=proxies).content
            pattern = r'[\/:*?"<>|
    ]+'
            pat = re.compile(pattern)
            sign = pat.search(title)
            if sign:
                new_title = re.sub(pattern, '_', title)
                with open(path + new_title + '.mp3', "wb") as w:
                    w.write(m4a_data)
                print("%s音频数据保存完毕" % title)
    
            else:
                with open(path + title + '.mp3', "wb") as w:
                    w.write(m4a_data)
                print("%s音频数据保存完毕" % title)
    

      

  • 相关阅读:
    多线程(一)--线程的运行
    多线程(二)--锁
    守护线程与用户线程
    SWD接口
    RS485,CAN
    tcp/ip协议
    开关电源与线性稳压电源
    与gps相比,北斗的三频信号有什么优势
    射频识别技术(RFID)
    wifi发射模块芯片各个管脚功能,蓝牙和wifi信号互相干扰,2.4GHZ无线技术
  • 原文地址:https://www.cnblogs.com/hello-python2020/p/14187350.html
Copyright © 2020-2023  润新知