• Python爬取微博热搜榜,将数据存入数据库


    #-*-coding:utf-8-*-
    import urllib, pymysql, requests, re
    # 配置数据库
    config = {
        'host': '127.0.0.1',
        'port': 3306,
        'user': 'root',
        'password': '******',
        'db': 'weibo',
        'charset': 'utf8',
    }
    # 链接数据库
    conn = pymysql.connect(**config)
    cursor = conn.cursor()
    # 获取热搜源码
    weiboHotFile = requests.get('http://s.weibo.com/top/summary')
    weiboHotHtml = weiboHotFile.text
    # 正则表达式匹配URL,找到title
    hotKey = re.compile(r'td class=\"td_05\"><a href=\"\/weibo\/(.*?)&Refer=top\"')
    hotKeyListBe = hotKey.findall(weiboHotHtml)
    rank = 1
    # 遍历获取的title列表
    for title in hotKeyListBe:
        # 去除干扰数字
        title = title.replace('25', '')
        url = 'http://s.weibo.com/weibo/' + title
        title = urllib.parse.unquote(title)
        print(str(rank)+' '+title + ' '+' '+url+'
    ')
        # 执行数据语句
        sql = 'insert into hotsearch (rank, daydate, mindate, title, url) values (%s, curdate(), curtime(), %s, %s)'
        cursor.execute(sql, (rank, title, url))
        rank += 1
        conn.commit()
    cursor.close()
    conn.close()
  • 相关阅读:
    [LeetCode] Rotate Image
    [LeetCode] Generate Parentheses
    pandas 使用总结
    ConfigParser 读写配置文件
    Cheat Sheet pyspark RDD(PySpark 速查表)
    python随机生成字符
    grep 命令
    hadoop 日常使用记录
    python 2 计算字符串 余弦相似度
    screen命令
  • 原文地址:https://www.cnblogs.com/yszr/p/15023338.html
Copyright © 2020-2023  润新知