• 顶会热词统计


    爬取CVPR2019年所有论文的题目,并提取题目中的关键字,做成按照热度显示大小的热词云。

    代码:

     

    # coding=utf-8import pymysqlimport requestsfrom lxml import etree
     
     class Spider:
        def __init__(self):
            self.url = "http://openaccess.thecvf.com/CVPR2019.py"
            self.header = {
                "user-agent": "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Mobile Safari/537.36"}
            self.db = pymysql.connect(host='localhost', port=3306, user='root', passwd='abc456', db='paperdata',
                                      charset='utf8')
            self.cursor = self.db.cursor()
            self.html_list = []
     
        def getHtmlList(self):
            response = requests.get(self.url, headers=self.header)
            html_body = etree.HTML(response.text)
            title = html_body.xpath("//dt[@class='ptitle']/a/@href")
            for item in title:
                self.html_list.append("http://openaccess.thecvf.com/" + item)
     
        def getContent(self, url):
            try:
                response = requests.get(url, headers=self.header)
                body = etree.HTML(response.text)
                title = body.xpath("//div[@id='papertitle']/text()")[0]
                abstract = body.xpath("//div[@id='abstract']/text()")[0]
                down_url = body.xpath("//div[@id='content']//a/@href")[0].replace("../../", "http://openaccess.thecvf.com/")
     
                sql = '''insert into data values({},"{}","{}","{}")'''.format(0, title, down_url, str(abstract))
                self.cursor.execute(sql)
                print(title + "插入成功!")
                self.db.commit()
            except Exception as e:
                print(e)
     
        def run(self):
            self.getHtmlList()
            for url in self.html_list:
                self.getContent(url)
     
     if __name__ == '__main__':
        spwder = Spider()
        spwder.run()
     
     

    DataDao.java

     

    package dao;

    import java.sql.SQLException;

    import java.util.List;

    import org.apache.commons.dbutils.QueryRunner;

    import org.apache.commons.dbutils.handlers.BeanListHandler;

    import pojo.Data;

    import utils.DataSourceUtils;

    public class DataDao {

        public List<Data> getData() throws SQLException {

            QueryRunner queryRunner = new QueryRunner(DataSourceUtils.getDataSource());

            String sql = "select * from data ";

            List<Data> dataList = queryRunner.query(sql, new BeanListHandler<Data>(Data.class));

            return dataList;

           

           

        }

        public List<Data> getLink(String name) throws SQLException {

            QueryRunner queryRunner = new QueryRunner(DataSourceUtils.getDataSource());

            String sql = "select * from data where papername like ?";

            List<Data> dataList = queryRunner.query(sql, new BeanListHandler<Data>(Data.class),"%"+name+"%");

            return dataList;

        }

    }

    Data.java

    package pojo;

    public class Data {

        private int id;

        private String papername;

        private String paperlink;

        private String paperabstract;

        public int getId() {

            return id;

        }

        public void setId(int id) {

            this.id = id;

        }

        public String getPapername() {

            return papername;

        }

        public void setPapername(String papername) {

            this.papername = papername;

        }

        public String getPaperlink() {

            return paperlink;

        }

        public void setPaperlink(String paperlink) {

            this.paperlink = paperlink;

        }

        public String getPaperabstract() {

            return paperabstract;

        }

        public void setPaperabstract(String paperabstract) {

            this.paperabstract = paperabstract;

        }

    }

    World.java

    package pojo;

    public class Word {

        private String name;

        private int value;

        public String getName() {

            return name;

        }

        public void setName(String name) {

            this.name = name;

        }

        public int getValue() {

            return value;

        }

        public void setValue(int value) {

            this.value = value;

        }

    }

    DataService.java

    package service;

    import java.sql.SQLException;

    import java.util.ArrayList;

    import java.util.HashMap;

    import java.util.List;

    import org.apache.commons.lang.ArrayUtils;

    import dao.DataDao;

    import pojo.Data;

    import pojo.Word;

    public class DataService {

        public List<Word> getData() throws SQLException {

            DataDao dao = new DataDao();

            List<Data>  dataList= dao.getData();

            List<Word> wordList = new ArrayList<Word>();

            String [] names = new String[100000];

            for(Data data:dataList) {

                String name = data.getPapername();

                String[] namestemp = name.split(" ");

                names = (String[]) ArrayUtils.addAll(namestemp, names);

            }

            HashMap<String, Integer> name_value = new HashMap<>();

           

            for(String name:names) {

                name_value.put(name, !name_value.containsKey(name)?1:name_value.get(name)+1);

            }

           

            for(String name:name_value.keySet()) {

                Word word = new Word();

                if(name!=null&&(name_value.get(name)>1)&&(name.length()>4)) {

                    word.setName(name);

                    word.setValue(name_value.get(name));

                    wordList.add(word);

                }

            }

            return wordList;

        }

        public List<Data> getLink(String name) throws SQLException {

            DataDao dao = new DataDao();

            return dao.getLink(name);

        }

    }

    ClickFunctionServlet.java

    package servlet;

    import java.io.IOException;

    import java.sql.SQLException;

    import java.util.List;

    import javax.servlet.ServletException;

    import javax.servlet.annotation.WebServlet;

    import javax.servlet.http.HttpServlet;

    import javax.servlet.http.HttpServletRequest;

    import javax.servlet.http.HttpServletResponse;

    import pojo.Data;

    import service.DataService;

    /**

     * Servlet implementation class ClickFunctionServlet

     */

    @WebServlet("/clickFunction")

    public class ClickFunctionServlet extends HttpServlet {

        private static final long serialVersionUID = 1L;

          

        /**

         * @see HttpServlet#HttpServlet()

         */

        public ClickFunctionServlet() {

            super();

            // TODO Auto-generated constructor stub

        }

        /**

         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)

         */

        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            // TODO Auto-generated method stub

            request.setCharacterEncoding("utf-8");

            response.setContentType("text/html;charset=UTF-8");

            String name = request.getParameter("name");

            List<Data> dataList =null;

            DataService service = new DataService();

            try {

                dataList = service.getLink(name);

            } catch (SQLException e) {

                // TODO Auto-generated catch block

                e.printStackTrace();

            }

            request.setAttribute("dataList", dataList);

            request.getRequestDispatcher("papercloud.jsp").forward(request, response);

        }

        /**

         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

         */

        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            // TODO Auto-generated method stub

            doGet(request, response);

        }

    }

    GetDataServlet.java

    package servlet;

    import java.io.IOException;

    import java.sql.SQLException;

    import java.util.List;

    import javax.servlet.ServletException;

    import javax.servlet.annotation.WebServlet;

    import javax.servlet.http.HttpServlet;

    import javax.servlet.http.HttpServletRequest;

    import javax.servlet.http.HttpServletResponse;

    import com.google.gson.Gson;

    import pojo.Word;

    import service.DataService;

    /**

     * Servlet implementation class GetDataServlet

     */

    @WebServlet("/getData")

    public class GetDataServlet extends HttpServlet {

        private static final long serialVersionUID = 1L;

          

        /**

         * @see HttpServlet#HttpServlet()

         */

        public GetDataServlet() {

            super();

            // TODO Auto-generated constructor stub

        }

        /**

         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)

         */

        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            // TODO Auto-generated method stub

            request.setCharacterEncoding("utf-8");

            response.setContentType("text/html;charset=UTF-8");

            List<Word> wordList = null;

            DataService service = new DataService();

            try {

                wordList = service.getData();

            } catch (SQLException e) {

                // TODO Auto-generated catch block

                e.printStackTrace();

            }

            Gson gson = new Gson();

            String json = gson.toJson(wordList);

            response.getWriter().write(json);

        }

        /**

         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

         */

        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            // TODO Auto-generated method stub

            doGet(request, response);

        }

    }

    DataSourceUtils.java

    package utils;

    import java.sql.Connection;

    import java.sql.ResultSet;

    import java.sql.SQLException;

    import java.sql.Statement;

    import javax.sql.DataSource;

    import com.mchange.v2.c3p0.ComboPooledDataSource;

    public class DataSourceUtils {

        private static DataSource dataSource = new ComboPooledDataSource();

        private static ThreadLocal<Connection> tl = new ThreadLocal<Connection>();

        public static DataSource getDataSource() {

            return dataSource;

        }

       

        public static Connection getConnection() throws SQLException {

            Connection con = tl.get();

            if (con == null) {

                con = dataSource.getConnection();

                tl.set(con);

            }

            return con;

        }

       

        public static void startTransaction() throws SQLException {

            Connection con = getConnection();

            if (con != null) {

                con.setAutoCommit(false);

            }

        }

       

        public static void rollback() throws SQLException {

            Connection con = getConnection();

            if (con != null) {

                con.rollback();

            }

        }

        public static void commitAndRelease() throws SQLException {

            Connection con = getConnection();

            if (con != null) {

                con.commit();

                con.close();

                tl.remove();

            }

        }

        public static void closeConnection() throws SQLException {

            Connection con = getConnection();

            if (con != null) {

                con.close();

            }

        }

        public static void closeStatement(Statement st) throws SQLException {

            if (st != null) {

                st.close();

            }

        }

        public static void closeResultSet(ResultSet rs) throws SQLException {

            if (rs != null) {

                rs.close();

            }

        }

    }

    c3p0-config.xml

    <?xml version="1.0" encoding="UTF-8"?>

    <c3p0-config>

        <default-config>

            <property name="user">root</property>

            <property name="password">0608</property>

            <property name="driverClass">com.mysql.jdbc.Driver</property>

            <property name="jdbcUrl">jdbc:mysql://localhost:3306/paperdata?serverTimezone=GMT%2B8&useUnicode=true&characterEncoding=UTF-8</property>

        </default-config>

    </c3p0-config>

    papercloud.jsp

    <%@ page language="java" contentType="text/html; charset=UTF-8"

        pageEncoding="UTF-8"%>

    <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>

    <!DOCTYPE html>

    <html>

    <head>

    <meta charset="UTF-8">

    <title>论文云</title>

    <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>

    <script src="./js/echarts-wordcloud.js"></script>

    <script src="./js/jquery-1.11.3.min.js"></script>

    <!-- 引入Bootstrap核心样式文件 -->

    <link href="css/bootstrap.css" rel="stylesheet">

    <!-- 引入BootStrap核心js文件 -->

    <script src="./js/bootstrap.js"></script>

    <style>

    html, body, #main {

        100%;

        height: 100%;

        margin: 0;

    }

    </style>

    </head>

    <body>

        <div id="main"></div>

        <div>

            <table class="table table-hover">

                <thead>

                    <tr>

                        <td style="font-size: 20px;">论文链接</td>

                    </tr>

                </thead>

                <tbody>

                    <c:forEach items="${dataList}" var="data" varStatus="vs">

                        <tr>

                            <td><a href="${data.paperlink}">${data.papername}</a></td>

                        </tr>

                    </c:forEach>

                </tbody>

            </table>

        </div>

        <script>

            var chart = echarts.init(document.getElementById('main'));

            var postURL = "/PaperData/getData";

            var mydata = new Array();

            $.ajaxSettings.async = false;

            $.post(postURL, {}, function(rs) {

                var dataList = JSON.parse(rs);

                for (var i = 0; i < dataList.length; i++) {

                    var d = {};

                    d['name'] = dataList[i].name;

                    d['value'] = dataList[i].value;

                    mydata.push(d);

                }

            });

            $.ajaxSettings.async = true;

            var option = {

                tooltip : {},

                series : [ {

                    type : 'wordCloud',

                    gridSize : 2,

                    sizeRange : [ 20, 50 ],

                    rotationRange : [ -90, 90 ],

                    shape : 'pentagon',

                    width : 800,

                    height : 600,

                    drawOutOfBound : false,

                    textStyle : {

                        normal : {

                            color : function() {

                                return 'rgb('

                                        + [ Math.round(Math.random() * 160),

                                                Math.round(Math.random() * 160),

                                                Math.round(Math.random() * 160) ]

                                                .join(',') + ')';

                            }

                        },

                        emphasis : {

                            shadowBlur : 10,

                            shadowColor : '#333'

                        }

                    },

                    data : mydata

                } ]

            };

            chart.setOption(option);

            chart.on('click', function(params) {

                var url = "clickFunction?name=" + params.name;

                window.location.href = url;

            });

        </script>

    </body>

    </html>

    papercloud.html

    <!DOCTYPE html>

    <html>

    <head>

    <meta charset="UTF-8">

    <title>论文云</title>

    <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>

    <script src="./js/echarts-wordcloud.js"></script>

    <script src="./js/jquery-1.11.3.min.js"></script>

    <!-- 引入Bootstrap核心样式文件 -->

    <link href="css/bootstrap.css" rel="stylesheet">

    <!-- 引入BootStrap核心js文件 -->

    <script src="./js/bootstrap.js"></script>

    <style>

    html, body, #main {

        100%;

        height: 100%;

        margin: 0;

    }

    </style>

    </head>

    <body>

        <div id="main"></div>

        <div>

            <table class="table table-hover">

                <thead>

                    <tr>

                        <td style="font-size: 20px;">论文链接</td>

                    </tr>

                </thead>

                <tbody>

                    <tr>

                        <td><a>www.baidu.com</a></td>

                    </tr>

                </tbody>

            </table>

        </div>

        <script>

            var chart = echarts.init(document.getElementById('main'));

            var postURL = "/PaperData/getData";

            var mydata = new Array();

            $.ajaxSettings.async = false;

            $.post(postURL, {}, function(rs) {

                var dataList = JSON.parse(rs);

                for (var i = 0; i < dataList.length; i++) {

                    var d = {};

                    d['name'] = dataList[i].name;

                    d['value'] = dataList[i].value;

                    mydata.push(d);

                }

            });

            $.ajaxSettings.async = true;

            var option = {

                tooltip : {},

                series : [ {

                    type : 'wordCloud',

                    gridSize : 2,

                    sizeRange : [ 20, 50 ],

                    rotationRange : [ -90, 90 ],

                    shape : 'pentagon',

                    width : 800,

                    height : 600,

                    drawOutOfBound : false,

                    textStyle : {

                        normal : {

                            color : function() {

                                return 'rgb('

                                        + [ Math.round(Math.random() * 160),

                                                Math.round(Math.random() * 160),

                                                Math.round(Math.random() * 160) ]

                                                .join(',') + ')';

                            }

                        },

                        emphasis : {

                            shadowBlur : 10,

                            shadowColor : '#333'

                        }

                    },

                    data : mydata

                } ]

            };

            chart.setOption(option);

            chart.on('click', function(params) {

                var url = "clickFunction?name=" + params.name;

                window.location.href = url;

            });

        </script>

    </body>

    </html>

  • 相关阅读:
    浅谈ssh(struts,spring,hibernate三大框架)整合的意义及其精髓
    Spring中ClassPathXmlApplicationContext类的简单使用
    测试计划/系统风险 (设计方面、开发方面、测试本身 等风险)
    浏览器兼容
    4.0 爬虫
    2.1 IDEA
    2.3 接口测试
    1.1测试分类
    1.3 ODPS
    2.1 Word 插入 smartart、图表
  • 原文地址:https://www.cnblogs.com/ICDTAD/p/13110838.html
Copyright © 2020-2023  润新知