Python爬取微博热搜榜，将数据存入数据库

#-*-coding:utf-8-*-
import urllib, pymysql, requests, re
# 配置数据库
config = {
    'host': '127.0.0.1',
    'port': 3306,
    'user': 'root',
    'password': '******',
    'db': 'weibo',
    'charset': 'utf8',
}
# 链接数据库
conn = pymysql.connect(**config)
cursor = conn.cursor()
# 获取热搜源码
weiboHotFile = requests.get('http://s.weibo.com/top/summary')
weiboHotHtml = weiboHotFile.text
# 正则表达式匹配URL，找到title
hotKey = re.compile(r'td class=\"td_05\"><a href=\"\/weibo\/(.*?)&Refer=top\"')
hotKeyListBe = hotKey.findall(weiboHotHtml)
rank = 1
# 遍历获取的title列表
for title in hotKeyListBe:
    # 去除干扰数字
    title = title.replace('25', '')
    url = 'http://s.weibo.com/weibo/' + title
    title = urllib.parse.unquote(title)
    print(str(rank)+' '+title + ' '+' '+url+'
')
    # 执行数据语句
    sql = 'insert into hotsearch (rank, daydate, mindate, title, url) values (%s, curdate(), curtime(), %s, %s)'
    cursor.execute(sql, (rank, title, url))
    rank += 1
    conn.commit()
cursor.close()
conn.close()

相关阅读:
[LeetCode] Rotate Image
[LeetCode] Generate Parentheses
pandas 使用总结
ConfigParser 读写配置文件
Cheat Sheet pyspark RDD（PySpark 速查表）
python随机生成字符
grep 命令
hadoop 日常使用记录
python 2 计算字符串余弦相似度
screen命令

原文地址：https://www.cnblogs.com/yszr/p/15023338.html