• 顶会热词统计


    一、要求:

    1、完成论文的题目、摘要、关键词、原文链接四项内容爬取;

    2、存储到本地数据库中;

    3、按照题目、关键词分类统计得到最热的十个领域方向;

    4、热词越多,在热词云中显示的就越大,还要将热词与文章链接,点击热词云中的热词可以找到与之对应的文章题目;

    二、效果:

     

     

     

     

     

     

     

     三、设计思路:

    from lxml import etree

    from pymysql import connect

    from jieba.analyse import *

    import requests

    class CVPR:

        # 保存数据

        def saveContent_list(self,title,zhaiyao,guanjian,lianjie):

            # 打开数据库连接(ip/数据库用户名/登录密码/数据库名)

            con = connect("localhost", "root", "a3685371", "pachong")

            # 使用 cursor() 方法创建一个游标对象 cursor

            cursors = con.cursor()

            # 使用 execute()  方法执行 SQL 查询 返回的是你影响的行数

            row = cursors.execute("insert into CVPR values(%s,%s,%s,%s)", (title,zhaiyao,guanjian,lianjie))

            # 使用 fetchone() 方法获取数据.

            con.commit()

            con.close()

    headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"

               }

    url = "http://openaccess.thecvf.com/CVPR2019.py"

    proxies = {

      "http": "http://211.147.226.4",

      "https": "http://122.200.90.12",

    }

    cvpr = CVPR()

    response = requests.get(url,headers=headers)

    html_str = etree.HTML(response.content.decode())

    #获得标题

    hrefs = html_str.xpath("//div[@id='content']/dl/dt/a/@href")

    for href in hrefs:

        href = "http://openaccess.thecvf.com/{0}".format(href)

        response2 = requests.get(href,headers=headers)

        html_str = etree.HTML(response2.content.decode())

        lunwens = {}

        title = html_str.xpath("//div[@id='content']/dl/dd//div[@id='papertitle']/text()")

        lianjie = html_str.xpath("//div[@id='content']/dl/dd//a/@href")

        zhaiyao = html_str.xpath("//div[@id='content']/dl/dd//div[@id='abstract']/text()")

        for keyword, weight in extract_tags(zhaiyao[0].strip(), topK=5, withWeight=True):

            

        try:

            cvpr.saveContent_list(title,zhaiyao,keyword,lianjie)

            print("存入成功")

        except:

            print("存入失败")

    <%@ page language="java" contentType="text/html; charset=UTF-8"

        pageEncoding="UTF-8"%>

    <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>

    <!DOCTYPE html>

    <html>

    <head>

    <meta charset="UTF-8">

    <title>Insert title here</title>

    <link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" />

    <script src="js/jquery-1.11.3.min.js" type="text/javascript"></script>

    <script type="text/javascript" src="js/echarts.min.js"></script>

    <script type="text/javascript" src="js/china.js"></script>

    <script src="js/bootstrap.min.js" type="text/javascript"></script>

    <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script>

    <script src='js/echarts-wordcloud.js'></script>

    </head>

    <body>

    <div id="main" style=" 100%;height: 400px"></div>

    <div>

      <table class="table" style=" 100%;align-content: center;" >

        <tr>

          <th align="center">论文连接</th>

        </tr>

        <c:forEach var="item" items="${list}">

          <tr>

            <td><a href="${item.lianjie }">${item.title}</a></td>

          </tr>

        </c:forEach>

      </table>

    </div>

    <script>

      var chart = echarts.init(document.getElementById('main'));

      var dt;

      $.ajax({

        url : "PaperServlet_",

        async : false,

        type : "POST",

        success : function(data) {

          dt = data;

         // alert(dt[0].title);

        },

        error : function() {

          alert("请求失败");

        },

        dataType : "json"

      });

      var mydata = new Array(0);

      for (var i = 0; i < dt.length; i++) {

          var d = {};

          

          d["name"] = dt[i].name;

          //alert(dt[i].name);

          d["value"] = dt[i].value;

          mydata.push(d);

      }

      var option = {

        tooltip: {},

        series: [ {

          type: 'wordCloud',

          gridSize: 2,

          sizeRange: [20, 50],

          rotationRange: [-90, 90],

          shape: 'pentagon',

           600,

          height: 300,

          drawOutOfBound: true,

          textStyle: {

            normal: {

              color: function () {

                return 'rgb(' + [

                  Math.round(Math.random() * 160),

                  Math.round(Math.random() * 160),

                  Math.round(Math.random() * 160)

                ].join(',') + ')';

              }

            },

            emphasis: {

              shadowBlur: 10,

              shadowColor: '#333'

            }

          },

          data: mydata

        } ]

      };

      chart.setOption(option);

      chart.on('click', function (params) {

          var url = "ClickServlet?geunjian=" + params.name;

          window.location.href = url;

        });

      window.onresize = chart.resize;

    </script>

    </body>

    </html>

    package com.me.servlet;

    import java.io.IOException;

    import java.sql.SQLException;

    import java.util.ArrayList;

    import java.util.List;

    import javax.servlet.ServletException;

    import javax.servlet.annotation.WebServlet;

    import javax.servlet.http.HttpServlet;

    import javax.servlet.http.HttpServletRequest;

    import javax.servlet.http.HttpServletResponse;

    import com.google.gson.Gson;

    import com.me.dao.LWDao;

    import com.me.domain.LunWen;

    import com.me.domain.Tu;

    @WebServlet("/PaperServlet_")

    public class PaperServlet_ extends HttpServlet {

        private static final long serialVersionUID = 1L;

           

        public PaperServlet_() {

            super();

        }

        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            response.setHeader("content-type", "text/html;charset=UTF-8");

            response.setCharacterEncoding("UTF-8");

            LWDao dao = new LWDao();

            List<LunWen> list = new ArrayList<LunWen>();

            List<Tu> list_tu = new ArrayList<Tu>();

            String [] str = new String[10000];

            String [] str_ = new String[10000];

            int [] b = new int[10000];

            int num = 0;

            int length1 = 0;

            try {

                list = dao.search_();

            } catch (SQLException e) {

                e.printStackTrace();

            }

            for(int i=0;i<list.size();i++) {

                if(list.get(i).getLianjie()!=null) {

                    String ss = list.get(i).getLianjie().substring(6,list.get(i).getLianjie().length());

                    list.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);

                }

                String[] split = list.get(i).getGuanjian().split(" ");

                for(int j=0;j<split.length;j++) {

                    str[num++] = split[j];

                }

            }

            for(int k=0;k<num;k++) {

                b[k]=0;

            }

            str_[0]=str[0];

            int tt=1;

            Boolean rt=true;

            for(int i=1;i<num;i++) {

                rt=false;

                for(int j=0;j<tt;j++) {

                    if(str[i].equals(str_[j])) {

                        rt=true;

                        break;

                    }

                }

                if(!rt) {

                    str_[tt]=str[i];

                    tt++;

                }

            }

            length1=tt;

            for(int i=0;i<length1;i++) {

                for(int j=0;j<num;j++) {

                    if(str_[i].equals(str[j])) {

                        b[i]++;

                    }

                }

            }

            int t3=0;

            int t2=0;

            String sr="";

            for(int i=0;i<length1-1;i++) {

                t3=i;

                for(int j=i+1;j<length1;j++) {

                    if(b[t3]<b[j]) {

                        t3=j;

                    }

                }

               if(t3!=i) {

                   t2=b[i];

                   b[i]=b[t3];

                   b[t3]=t2;

                   sr=str_[i];

                   str_[i]=str_[t3];

                   str_[t3]=sr;

               }

            }

            for(int i=0;i<100;i++) {

                Tu tu = new Tu();

                tu.name=str_[i];

                tu.value= b[i];

                list_tu.add(tu);

            }

            

            Gson gson = new Gson();

            String json = gson.toJson(list_tu);

            response.getWriter().write(json);

        }

        

        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            // TODO Auto-generated method stub

            doGet(request, response);

        }

    }

    package com.me.domain;

    public class Tu {

        public String name;

        public int value;

    }

    LWDao dao = new LWDao();

            List<LunWen> list = new ArrayList<LunWen>();

            List<Tu> list_tu = new ArrayList<Tu>();

            String [] str = new String[10000];

            String [] str_ = new String[10000];

            int [] b = new int[10000];

            int num = 0;

            int length1 = 0;

            try {

                list = dao.search_();

            } catch (SQLException e) {

                e.printStackTrace();

            }

    //分割成单词

    for(int i=0;i<list.size();i++) {

                String[] split = list.get(i).getGuanjian().split(" ");

                for(int j=0;j<split.length;j++) {

                    str[num++] = split[j];

                }

            }

    //去重并计数

            for(int k=0;k<num;k++) {

                b[k]=0;

            }

            str_[0]=str[0];

            int tt=1;

            Boolean rt=true;

            for(int i=1;i<num;i++) {

                rt=false;

                for(int j=0;j<tt;j++) {

                    if(str[i].equals(str_[j])) {

                        rt=true;

                        break;

                    }

                }

                if(!rt) {

                    str_[tt]=str[i];

                    tt++;

                }

            }

            length1=tt;

            for(int i=0;i<length1;i++) {

                for(int j=0;j<num;j++) {

                    if(str_[i].equals(str[j])) {

                        b[i]++;

                    }

                }

            }

    //排序

            int t3=0;

            int t2=0;

            String sr="";

            for(int i=0;i<length1-1;i++) {

                t3=i;

                for(int j=i+1;j<length1;j++) {

                    if(b[t3]<b[j]) {

                        t3=j;

                    }

                }

               if(t3!=i) {

                   t2=b[i];

                   b[i]=b[t3];

                   b[t3]=t2;

                   sr=str_[i];

                   str_[i]=str_[t3];

                   str_[t3]=sr;

               }

            }

    //封装

            for(int i=0;i<100;i++) {

                Tu tu = new Tu();

                tu.name=str_[i];

                tu.value= b[i];

                list_tu.add(tu);

            }

    package com.me.servlet;

    import java.io.IOException;

    import java.sql.SQLException;

    import java.util.ArrayList;

    import java.util.List;

    import javax.servlet.ServletException;

    import javax.servlet.annotation.WebServlet;

    import javax.servlet.http.HttpServlet;

    import javax.servlet.http.HttpServletRequest;

    import javax.servlet.http.HttpServletResponse;

    import com.me.dao.LWDao;

    import com.me.domain.LunWen;

    import com.me.domain.Tu;

    /**

     * Servlet implementation class LunServlet

     */

    @WebServlet("/LunServlet")

    public class LunServlet extends HttpServlet {

        private static final long serialVersionUID = 1L;

           

        /**

         * @see HttpServlet#HttpServlet()

         */

        public LunServlet() {

            super();

            // TODO Auto-generated constructor stub

        }

        /**

         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)

         */

        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            response.setHeader("content-type", "text/html;charset=UTF-8");

            response.setCharacterEncoding("UTF-8");

            LWDao dao = new LWDao();

            List<LunWen> list = new ArrayList<LunWen>();

            try {

                list = dao.search_();

            } catch (SQLException e) {

                e.printStackTrace();

            }

            for(int i=0;i<list.size();i++) {

                if(list.get(i).getLianjie()!=null) {

                    String ss = list.get(i).getLianjie().substring(6,list.get(i).getLianjie().length());

                    list.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);

                }

                

            }

            

            request.setAttribute("list",list);

            request.getRequestDispatcher("lw.jsp").forward(request, response);

        }

        /**

         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

         */

        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            // TODO Auto-generated method stub

            doGet(request, response);

        }

        

        

    }

    package com.me.domain;

    public class LunWen {

        private String title;

        private String zhaiyao;

        private String guanjian;

        private String lianjie;

        public String getTitle() {

            return title;

        }

        public void setTitle(String title) {

            this.title = title;

        }

        public String getZhaiyao() {

            return zhaiyao;

        }

        public void setZhaiyao(String zhaiyao) {

            this.zhaiyao = zhaiyao;

        }

        public String getGuanjian() {

            return guanjian;

        }

        public void setGuanjian(String guanjian) {

            this.guanjian = guanjian;

        }

        public String getLianjie() {

            return lianjie;

        }

        public void setLianjie(String lianjie) {

            this.lianjie = lianjie;

        }

    }

    package com.me.servlet;

    import java.io.IOException;

    import java.sql.SQLException;

    import java.util.ArrayList;

    import java.util.List;

    import javax.servlet.ServletException;

    import javax.servlet.annotation.WebServlet;

    import javax.servlet.http.HttpServlet;

    import javax.servlet.http.HttpServletRequest;

    import javax.servlet.http.HttpServletResponse;

    import com.me.dao.LWDao;

    import com.me.domain.LunWen;

    /**

     * Servlet implementation class ClickServlet

     */

    @WebServlet("/ClickServlet")

    public class ClickServlet extends HttpServlet {

        private static final long serialVersionUID = 1L;

        LWDao dao = new LWDao();

        

        public ClickServlet() {

            super();

            // TODO Auto-generated constructor stub

        }

        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            String geunjian = request.getParameter("geunjian");

            System.out.println(geunjian);

            List<LunWen> guan = new ArrayList<LunWen>();

            try {

                guan = dao.login(geunjian);

            } catch (SQLException e) {

                e.printStackTrace();

            }

            for(int i=0;i<guan.size();i++) {

                if(guan.get(i).getLianjie()!=null) {

                    String ss = guan.get(i).getLianjie().substring(6,guan.get(i).getLianjie().length());

                    guan.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);

                }

                

            }

            request.setAttribute("list", guan);

            System.out.println(guan.size());

            request.getRequestDispatcher("lw.jsp").forward(request, response);

        }

        /**

         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

         */

        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

            // TODO Auto-generated method stub

            doGet(request, response);

        }

    }

    package com.me.dao;

    import java.sql.SQLException;

    import java.util.List;

    import org.apache.commons.dbutils.QueryRunner;

    import org.apache.commons.dbutils.handlers.BeanListHandler;

    import com.me.domain.LunWen;

    import com.me.utils.DBUtils;

    public class LWDao {

        public List<LunWen> search_() throws SQLException {

            QueryRunner qr = new QueryRunner(DBUtils.getDataSource());

            String sql = "select * from cvpr";

            List<LunWen> query = qr.query(sql, new BeanListHandler<LunWen>(LunWen.class));

            return query;

        }

        public List<LunWen> login(String guanjien) throws SQLException {

            QueryRunner qr = new QueryRunner(DBUtils.getDataSource());

            String sql = "select * from cvpr where guanjian like "+"'%"+guanjien+"%'";

            System.out.println(sql);

            List<LunWen> user01 = qr.query(sql, new BeanListHandler<LunWen>(LunWen.class));

            return user01;

        }

    }

    https://www.cnblogs.com/20183544-wangzhengshuai/p/12702137.html参考博客原文

  • 相关阅读:
    如何卸载服务(转)
    The Frightening Science of Prediction: How Target & 10 Others Make Money Predicting Your Next Life Event(转摘)
    如果你迷恋厚实的屋顶,就会失去浩瀚的繁星
    李开复:移动互联网创业不要总是“入口思维”(转)
    [微言]增长与幸福 zz
    缔元信
    Tabledriven Approach
    北京医院排名 很有用,留下了
    青春期企业如何突围(转)
    意大利罗马&佛罗伦萨 攻略
  • 原文地址:https://www.cnblogs.com/aiyyue/p/13085119.html
Copyright © 2020-2023  润新知