• CVPR顶会热词统计


    任务:

    爬取CVPR2019年所有论文的题目,并提取题目中的关键字,做成按照热度显示大小的热词云。

    代码:

    爬虫:

    # coding=utf-8
    import pymysql
    import requests
    from lxml import etree
     
     
    class Spider:
        def __init__(self):
            self.url = "http://openaccess.thecvf.com/CVPR2019.py"
            self.header = {
                "user-agent": "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Mobile Safari/537.36"}
            self.db = pymysql.connect(host='localhost', port=3306, user='root', passwd='abc456', db='paperdata',
                                      charset='utf8')
            self.cursor = self.db.cursor()
            self.html_list = []
     
        def getHtmlList(self):
            response = requests.get(self.url, headers=self.header)
            html_body = etree.HTML(response.text)
            title = html_body.xpath("//dt[@class='ptitle']/a/@href")
            for item in title:
                self.html_list.append("http://openaccess.thecvf.com/" + item)
     
        def getContent(self, url):
            try:
                response = requests.get(url, headers=self.header)
                body = etree.HTML(response.text)
                title = body.xpath("//div[@id='papertitle']/text()")[0]
                abstract = body.xpath("//div[@id='abstract']/text()")[0]
                down_url = body.xpath("//div[@id='content']//a/@href")[0].replace("../../", "http://openaccess.thecvf.com/")
     
                sql = '''insert into data values({},"{}","{}","{}")'''.format(0, title, down_url, str(abstract))
                self.cursor.execute(sql)
                print(title + "插入成功!")
                self.db.commit()
            except Exception as e:
                print(e)
     
        def run(self):
            self.getHtmlList()
            for url in self.html_list:
                self.getContent(url)
     
     
    if __name__ == '__main__':
        spwder = Spider()
        spwder.run()
     
    package dao;
    
    import java.sql.SQLException;
    import java.util.List;
    
    import org.apache.commons.dbutils.QueryRunner;
    import org.apache.commons.dbutils.handlers.BeanListHandler;
    
    import pojo.Data;
    import utils.DataSourceUtils;
    
    /** 
    * @author: connor
    * @version锛�2020骞�4鏈�15鏃� 涓婂崍10:19:06 
    * 
    */
    public class DataDao {
    
        public List<Data> getData() throws SQLException {
            QueryRunner queryRunner = new QueryRunner(DataSourceUtils.getDataSource());
            String sql = "select * from data ";
            List<Data> dataList = queryRunner.query(sql, new BeanListHandler<Data>(Data.class));
            return dataList;
            
            
        }
    
        public List<Data> getLink(String name) throws SQLException {
            QueryRunner queryRunner = new QueryRunner(DataSourceUtils.getDataSource());
            String sql = "select * from data where papername like ?";
            List<Data> dataList = queryRunner.query(sql, new BeanListHandler<Data>(Data.class),"%"+name+"%");
            return dataList;
        }
    
    }
     
    DataDao.java
    package pojo; 
    
    
    public class Data {
        private int id;
        private String papername;
        private String paperlink;
        private String paperabstract;
        public int getId() {
            return id;
        }
        public void setId(int id) {
            this.id = id;
        }
        public String getPapername() {
            return papername;
        }
        public void setPapername(String papername) {
            this.papername = papername;
        }
        public String getPaperlink() {
            return paperlink;
        }
        public void setPaperlink(String paperlink) {
            this.paperlink = paperlink;
        }
        public String getPaperabstract() {
            return paperabstract;
        }
        public void setPaperabstract(String paperabstract) {
            this.paperabstract = paperabstract;
        }
    }
     
    Data.java
    package pojo; 
    
    public class Word {
        private String name;
        private int value;
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public int getValue() {
            return value;
        }
        public void setValue(int value) {
            this.value = value;
        }
    }
     
    World.java
    package service;
    
    import java.sql.SQLException;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    
    import org.apache.commons.lang.ArrayUtils;
    
    
    import dao.DataDao;
    import pojo.Data;
    import pojo.Word;
    
    
    public class DataService {
    
        public List<Word> getData() throws SQLException {
            DataDao dao = new DataDao();
            List<Data>  dataList= dao.getData();
            List<Word> wordList = new ArrayList<Word>();
            String [] names = new String[100000];
            for(Data data:dataList) {
                String name = data.getPapername();
                String[] namestemp = name.split(" ");
                names = (String[]) ArrayUtils.addAll(namestemp, names);
            }
            HashMap<String, Integer> name_value = new HashMap<>();
            
            for(String name:names) {
                name_value.put(name, !name_value.containsKey(name)?1:name_value.get(name)+1);
            }
            
            for(String name:name_value.keySet()) {
                Word word = new Word();
                if(name!=null&&(name_value.get(name)>1)&&(name.length()>4)) {
                    word.setName(name);
                    word.setValue(name_value.get(name));
                    wordList.add(word);
                }
    
            }
            return wordList;
        }
    
        public List<Data> getLink(String name) throws SQLException {
            DataDao dao = new DataDao();
            return dao.getLink(name);
        }
    
    }
     
    DataService.java
    package servlet;
    
    import java.io.IOException;
    import java.sql.SQLException;
    import java.util.List;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import pojo.Data;
    import service.DataService;
    
    /**
     * Servlet implementation class ClickFunctionServlet
     */
    @WebServlet("/clickFunction")
    public class ClickFunctionServlet extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public ClickFunctionServlet() {
            super();
            // TODO Auto-generated constructor stub
        }
    
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            request.setCharacterEncoding("utf-8");
            response.setContentType("text/html;charset=UTF-8");
            String name = request.getParameter("name");
            List<Data> dataList =null;
            DataService service = new DataService();
            try {
                dataList = service.getLink(name);
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            request.setAttribute("dataList", dataList);
            request.getRequestDispatcher("papercloud.jsp").forward(request, response);
        }
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            doGet(request, response);
        }
    
    }
    ClickFunctionServlet.java
    package servlet;
    
    import java.io.IOException;
    import java.sql.SQLException;
    import java.util.List;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import com.google.gson.Gson;
    
    import pojo.Word;
    import service.DataService;
    
    /**
     * Servlet implementation class GetDataServlet
     */
    @WebServlet("/getData")
    public class GetDataServlet extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public GetDataServlet() {
            super();
            // TODO Auto-generated constructor stub
        }
    
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            request.setCharacterEncoding("utf-8");
            response.setContentType("text/html;charset=UTF-8");
            List<Word> wordList = null;
            DataService service = new DataService();
            try {
                wordList = service.getData();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            Gson gson = new Gson();
            String json = gson.toJson(wordList);
            response.getWriter().write(json);
        }
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            doGet(request, response);
        }
    
    }
    GetDataServlet.java
    package utils;
    
    import java.sql.Connection;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    
    import javax.sql.DataSource;
    
    import com.mchange.v2.c3p0.ComboPooledDataSource;
    
    public class DataSourceUtils {
    
        private static DataSource dataSource = new ComboPooledDataSource();
    
        private static ThreadLocal<Connection> tl = new ThreadLocal<Connection>();
    
    
        public static DataSource getDataSource() {
            return dataSource;
        }
    
        
        public static Connection getConnection() throws SQLException {
    
            Connection con = tl.get();
            if (con == null) {
                con = dataSource.getConnection();
                tl.set(con);
            }
            return con;
        }
    
        
        public static void startTransaction() throws SQLException {
            Connection con = getConnection();
            if (con != null) {
                con.setAutoCommit(false);
            }
        }
    
        
        public static void rollback() throws SQLException {
            Connection con = getConnection();
            if (con != null) {
                con.rollback();
            }
        }
    
        public static void commitAndRelease() throws SQLException {
            Connection con = getConnection();
            if (con != null) {
                con.commit(); 
                con.close();
                tl.remove();
            }
        }
    
    
        public static void closeConnection() throws SQLException {
            Connection con = getConnection();
            if (con != null) {
                con.close();
            }
        }
    
        public static void closeStatement(Statement st) throws SQLException {
            if (st != null) {
                st.close();
            }
        }
    
        public static void closeResultSet(ResultSet rs) throws SQLException {
            if (rs != null) {
                rs.close();
            }
        }
    
    }
    DataSourceUtils.java
    <?xml version="1.0" encoding="UTF-8"?>
    <c3p0-config>
        <default-config>
            <property name="user">root</property>
            <property name="password">0608</property>
            <property name="driverClass">com.mysql.jdbc.Driver</property>
            <property name="jdbcUrl">jdbc:mysql://localhost:3306/paperdata?serverTimezone=GMT%2B8&amp;useUnicode=true&amp;characterEncoding=UTF-8</property>
        </default-config> 
    </c3p0-config> 
    c3p0-config.xml
    <%@ page language="java" contentType="text/html; charset=UTF-8"
        pageEncoding="UTF-8"%>
    <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="UTF-8">
    <title>论文云</title>
    <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>
    <script src="./js/echarts-wordcloud.js"></script>
    <script src="./js/jquery-1.11.3.min.js"></script>
    <!-- 引入Bootstrap核心样式文件 -->
    <link href="css/bootstrap.css" rel="stylesheet">
    <!-- 引入BootStrap核心js文件 -->
    <script src="./js/bootstrap.js"></script>
    <style>
    html, body, #main {
         100%;
        height: 100%;
        margin: 0;
    }
    </style>
    </head>
    <body>
        <div id="main"></div>
        <div>
            <table class="table table-hover">
                <thead>
                    <tr>
                        <td style="font-size: 20px;">论文链接</td>
                    </tr>
                </thead>
                <tbody>
                    <c:forEach items="${dataList}" var="data" varStatus="vs">
                        <tr>
                            <td><a href="${data.paperlink}">${data.papername}</a></td>
                        </tr>
                    </c:forEach>
                </tbody>
            </table>
        </div>
        <script>
            var chart = echarts.init(document.getElementById('main'));
            var postURL = "/PaperData/getData";
            var mydata = new Array();
            $.ajaxSettings.async = false;
            $.post(postURL, {}, function(rs) {
                var dataList = JSON.parse(rs);
                for (var i = 0; i < dataList.length; i++) {
                    var d = {};
                    d['name'] = dataList[i].name;
                    d['value'] = dataList[i].value;
                    mydata.push(d);
                }
            });
            $.ajaxSettings.async = true;
            var option = {
                tooltip : {},
                series : [ {
                    type : 'wordCloud',
                    gridSize : 2,
                    sizeRange : [ 20, 50 ],
                    rotationRange : [ -90, 90 ],
                    shape : 'pentagon',
                    width : 800,
                    height : 600,
                    drawOutOfBound : false,
                    textStyle : {
                        normal : {
                            color : function() {
                                return 'rgb('
                                        + [ Math.round(Math.random() * 160),
                                                Math.round(Math.random() * 160),
                                                Math.round(Math.random() * 160) ]
                                                .join(',') + ')';
                            }
                        },
                        emphasis : {
                            shadowBlur : 10,
                            shadowColor : '#333'
                        }
                    },
                    data : mydata
                } ]
            };
            chart.setOption(option);
            chart.on('click', function(params) {
                var url = "clickFunction?name=" + params.name;
                window.location.href = url;
            });
        </script>
    </body>
    </html>
    papercloud.jsp
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="UTF-8">
    <title>论文云</title>
    <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>
    <script src="./js/echarts-wordcloud.js"></script>
    <script src="./js/jquery-1.11.3.min.js"></script>
    <!-- 引入Bootstrap核心样式文件 -->
    <link href="css/bootstrap.css" rel="stylesheet">
    <!-- 引入BootStrap核心js文件 -->
    <script src="./js/bootstrap.js"></script>
    <style>
    html, body, #main {
         100%;
        height: 100%;
        margin: 0;
    }
    </style>
    </head>
    <body>
        <div id="main"></div>
        <div>
            <table class="table table-hover">
                <thead>
                    <tr>
                        <td style="font-size: 20px;">论文链接</td>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td><a>www.baidu.com</a></td>
                    </tr>
                </tbody>
            </table>
        </div>
        <script>
            var chart = echarts.init(document.getElementById('main'));
            var postURL = "/PaperData/getData";
            var mydata = new Array();
            $.ajaxSettings.async = false;
            $.post(postURL, {}, function(rs) {
                var dataList = JSON.parse(rs);
                for (var i = 0; i < dataList.length; i++) {
                    var d = {};
                    d['name'] = dataList[i].name;
                    d['value'] = dataList[i].value;
                    mydata.push(d);
                }
            });
            $.ajaxSettings.async = true;
            var option = {
                tooltip : {},
                series : [ {
                    type : 'wordCloud',
                    gridSize : 2,
                    sizeRange : [ 20, 50 ],
                    rotationRange : [ -90, 90 ],
                    shape : 'pentagon',
                    width : 800,
                    height : 600,
                    drawOutOfBound : false,
                    textStyle : {
                        normal : {
                            color : function() {
                                return 'rgb('
                                        + [ Math.round(Math.random() * 160),
                                                Math.round(Math.random() * 160),
                                                Math.round(Math.random() * 160) ]
                                                .join(',') + ')';
                            }
                        },
                        emphasis : {
                            shadowBlur : 10,
                            shadowColor : '#333'
                        }
                    },
                    data : mydata
                } ]
            };
            chart.setOption(option);
            chart.on('click', function(params) {
                var url = "clickFunction?name=" + params.name;
                window.location.href = url;
            });
        </script>
    </body>
    </html>
    papercloud.html

    截图:

     

  • 相关阅读:
    实验五
    实验四
    实验三
    实验二
    寄存器(内存访问)
    实验一
    寄存器
    Mermaid 绘图总结
    电脑查看系统版本
    _ZNote_Chrom_插件_Chrom运行Android软件_APK
  • 原文地址:https://www.cnblogs.com/Aming-/p/13085339.html
Copyright © 2020-2023  润新知