顶会热词分析

顶会热词分析

import re

import requests

import pymysql

def insertCvpr(value):

    db = pymysql.connect("localhost", "root", "root", "cvprlist", charset='utf8') # 连接数据库

    cursor = db.cursor()

    sql="""insert into cvpr values(%s,%s,%s,%s)"""

    try:

        cursor.execute(sql, value)

        db.commit()

        print('插入数据成功')

    except:

        db.rollback()

        print("插入数据失败")

    db.close()

url="http://openaccess.thecvf.com/ICCV2019.py";

header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36 Edg/81.0.416.53"}

res=requests.get(url,headers=header);

res.encoding="utf-8";

list=re.findall("""<dt class="ptitle"><br><a href="(.*?)">.*?</a></dt>""",res.text,re.S);

for item in list:

    # print(item)

    res=requests.get("http://openaccess.thecvf.com/"+item) #爬取到的网站是相对路径，所以要补全，下方同理

    res.encoding="utf-8"

    title=re.findall("""<div id="papertitle">(.*?)</div>""",res.text,re.S)

    summry=re.findall("""<div id="abstract" >(.*?)</div>""",res.text,re.S)

    link=re.findall("""[<a href="../../(.*?)">pdf</a>]""",res.text,re.S)

    if(len(title)>0):   #有的网站可能爬取不到，数组为空，直接获取会导致程序崩溃

        insertCvpr((title[0].replace(" ", ""),summry[0].replace(" ", ""),title[0].replace(" ", ""),"http://openaccess.thecvf.com/"+link[0]))

　
相关阅读:
贝叶斯网路的采样
 马尔可夫蒙特卡洛采样法
 高斯分布的采样
 常见的采样方法
 正则化
 随机梯度下降
 机器学习中的优化问题
 【原】涉及数据库的单元测试-JTeser
高度和宽度
 定位position
原文地址：https://www.cnblogs.com/wyppaa/p/13093760.html

最新文章
python
python
selenium
python
python
python
selenium
python
python
python