[转]Python下载百度新歌100的代码

[转]Python下载百度新歌100的代码

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn>
# License: GPLv2
# Author: oneleaf <oneleaf AT gmail.com>

import httplib
import re
import urllib
import os
import locale

def getdownurl(url):
    urllist=[]
    conn = httplib.HTTPConnection('mp3.baidu.com')
    conn.request("GET",url)
    response = conn.getresponse()
    html=response.read()
    conn.close()
    expression='http://220.181.27.54/m(.*)</a>'
    listSentence = re.findall(expression, html)
    lineno=0
    while lineno<len(listSentence):
        mp3url=re.search('title=(.*)onclick',listSentence[lineno])
        if mp3url:
           mp3url=mp3url.group(0)
           mp3url=re.search('http(\S*)',mp3url)
           if mp3url:
              mp3url=mp3url.group(0)
              try:
                  mp3url=mp3url.decode('gbk')
              except:pass
              urllist.append(mp3url)
        lineno+=2
    return urllist

def downmp3(url,author,name,filelist):
    filename=author+"-"+name;
    for i in filelist:
        name=unicode(i,locale.getpreferredencoding())
        if name.find(filename) == 0:
            print u"文件已经下载，忽略。"
            return 1
    urllists=getdownurl(url)
    for i in urllists:
        print u"正在连接",i

        ext=i[-4:]
        try:
            urlopen = urllib.URLopener()
            fp=urlopen.open(i)
            data = fp.read()
            fp.close()
            filename=filename+ext;
            file=open(filename,'w+b')
            file.write(data)
            file.close()
            print u"下载成功!"
            return 1
        except:
            continue
    return 0

if __name__ == "__main__":
    conn = httplib.HTTPConnection('list.mp3.baidu.com')
    conn.request("GET",'/list/newhits.html?id=1')
    response = conn.getresponse()
    html=response.read().decode('gbk')
    conn.close()
    expression='<a href="http://mp3.baidu.com/m(.*)</a>'
    listSentence = re.findall(expression, html)
    lineno=0
    while lineno<len(listSentence):
       url=re.search('(.*)target',listSentence[lineno])
       url='/m'+url.group(0)[:-8]
       name=re.search('blank>(.*)',listSentence[lineno])
       name=name.group(0)[6:]
       author=re.search('blank>(.*)',listSentence[lineno+1])
       author=author.group(0)[6:]
       print u"开始下载",author,name
       filelist=os.listdir('.');
       if downmp3(url,author,name,filelist)==0:
          print u"下载",author,name,u'失败！'
       lineno+=2
相关阅读:
RocketMQ学习笔记（9）----RocketMQ的Producer 顺序消息
 RocketMQ学习笔记（8）----RocketMQ的Producer API简介
 RocketMQ学习笔记（7）----RocketMQ的整体架构
 RocketMQ学习笔记（6）----RocketMQ的Client的使用 Producer/Consumer
RocketMQ学习笔记（5）----RocketMQ监控平台rocketmq-console-ng的搭建
 RocketMQ学习笔记（4）----RocketMQ搭建双Master集群
 RocketMQ学习笔记（3）----RocketMQ物理结构和逻辑部署结构
 RocketMQ学习笔记（1）----RocketMQ的简介
 RocketMQ学习笔记（2）----Broker的集群四种方式
 Docker学习笔记(1)----认识Docker
原文地址：https://www.cnblogs.com/maplye/p/450118.html