• 顶会热词统计


    根据CVPR论文生成热点词汇云图 

    1、用怕python 爬取论文到数据库中;

    2、分析、查找关键词,对他排序;

    3、生成热词汇云图;

     一、python爬取数据

    import requests
    import pymysql
    from bs4 import BeautifulSoup
     
    db = pymysql.connect('127.0.0.1',
                         port=3306,
                         user='root',
                         password='123',
                         db='mytest',
                         charset='utf8')
     
    cursor = db.cursor()
     
    headers={
            "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
            }
    url="http://openaccess.thecvf.com/CVPR2019.py"
    html=requests.get(url)
     
    soup=BeautifulSoup(html.content,'html.parser')
     
    soup.a.contents=='pdf'
     
    pdfs=soup.findAll(name="a",text="pdf")
     
    lis = []
    jianjie=""
    for i,pdf in enumerate(pdfs):
        pdf_name=pdf["href"].split('/')[-1]
        name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")
        link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"
        url1=link
        html1 = requests.get(url1)
        soup1 = BeautifulSoup(html1.content, 'html.parser')
        weizhi = soup1.find('div', attrs={'id':'abstract'})
        if weizhi:
            jianjie =weizhi.get_text();
        print("这是第"+str(i)+"条数据")
        keyword = str(name).split('_')
        keywords = ''
        for k in range(len(keyword)):
            if (k == 0):
                keywords += keyword[k]
            else:
                keywords += ',' + keyword[k]
        info = {}
        info['title'] = name
        info['link'] =link
        info['abstract']=jianjie
        info['keywords']=keywords
        lis.append(info)
     
    cursor = db.cursor()
    for i in range(len(lis)):
        cols = ", ".join('`{}`'.format(k) for k in lis[i].keys())
        print(cols)  # '`name`, `age`'
     
        val_cols = ', '.join('%({})s'.format(k) for k in lis[i].keys())
        print(val_cols)  # '%(name)s, %(age)s'
     
        sql = "insert into lunwen(%s) values(%s)"
        res_sql = sql % (cols, val_cols)
        print(res_sql)
     
        cursor.execute(res_sql, lis[i])  # 将字典a传入
        db.commit()
        num=1
        print(num)
        print("成功")

     二、分析、查找关键词

     借助Map存储关键词, key为关键词,value为出现的次数。遍历到相同的关键词value值+1,然后根据value值排序。

    dao层:

    package dao;
     
     
    import java.sql.Connection;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.LinkedHashMap;
    import java.util.Map;
    import java.util.stream.Collectors;
    import  Bean.copy.*;
    import jdbc.Util;
    import java.sql.Connection;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    import java.util.ArrayList;
    import java.util.List;
     
    import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils.Collections;
     
    public class Dao {
        public static Map<String,Integer> getrc()
        {
            String sql="select * from lunwen";
            Map<String, Integer>map= new HashMap<String, Integer>();
            Map<String, Integer>results= new LinkedHashMap<String, Integer>();
            Connection con=null;
            Statement state=null;
            ResultSet rs=null;
            con=Util.getConn();
            try {
                state=con.createStatement();
                rs=state.executeQuery(sql);
                while(rs.next())
                {
                    String keywords=rs.getString("keywords");
                    String[] split = keywords.split(",");
                    for(int i=0;i<split.length;i++)
                    {
                        if(map.get(split[i])==null)
                        {
                            map.put(split[i],0);
                        }
                        else
                        {
                            map.replace(split[i], map.get(split[i])+1);
                        }
                    }
                }
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            Util.close(rs, state, con);
            map.entrySet()                
            .stream()               
            .sorted((p1, p2) -> p2.getValue().compareTo(p1.getValue()))                
            .collect(Collectors.toList())
            .forEach(ele -> results.put(ele.getKey(), ele.getValue()));
     
           
            return results;
        }
        
        
        public List<Data> list(String keywords) { // 查询所有信息
     
     
            List<Data> list = new ArrayList<Data>(); // 创建集合
            Connection conn = Util.getConn();
            String sql = "select * from lunwen where keywords like "+"'%"+keywords+"%'"; // SQL查询语句
     
            try {
     
                PreparedStatement pst = conn.prepareStatement(sql);
     
                ResultSet rs = pst.executeQuery();
                
                Data data = null;
                
                while (rs.next()) {
     
     
                    String title = rs.getString("title");
                    
                    String link = rs.getString("link");
                    
                    String as= rs.getString("abstract");
                    
     
                    
     
                        data = new Data(title,link,as,keywords);
                    
                    list.add(data);
     
                }
     
                rs.close(); // 关闭
     
                pst.close(); // 关闭
     
            } catch (SQLException e1) {
     
                e1.printStackTrace(); // 抛出异常
     
            }
     
            return list; // 返回一个集合
     
        }
     
     
     
     
    }

      servlet层:

    package servlet;
     
    import java.io.IOException;
    import java.util.Map;
     
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
     
    import dao.Dao;
    import net.sf.json.JSONArray;
    import net.sf.json.JSONObject;
     
    @WebServlet("/RcServlet")
    public class RcServlet extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public RcServlet() {
            super();
            // TODO Auto-generated constructor stub
        }
     
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
     
            this.doPost(request, response);
        }
     
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            request.setCharacterEncoding("utf-8");
            Map<String, Integer>sortMap=Dao.getrc();
            JSONArray json =new JSONArray();
            int k=0;
            for (Map.Entry<String, Integer> entry : sortMap.entrySet()) 
            {
                JSONObject ob=new JSONObject();
                ob.put("name", entry.getKey());
                ob.put("value", entry.getValue());
                if(!(entry.getKey().equals("for")||entry.getKey().equals("and")||entry.getKey().equals("With")||entry.getKey().equals("of")||entry.getKey().equals("in")||entry.getKey().equals("From")||entry.getKey().equals("A")||entry.getKey().equals("to")||entry.getKey().equals("a")||entry.getKey().equals("the")||entry.getKey().equals("by")))
                {
                    json.add(ob);
                    k++;
                }
                if(k==10)
                    break;
            }
            System.out.println(json.toString());
            
            response.getWriter().write(json.toString());
        
        }
     
    }

      三、生成热词汇云图

    <%@ page language="java" contentType="text/html; charset=UTF-8"
        pageEncoding="UTF-8"%>
    <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
     
    <%request.setCharacterEncoding("utf-8"); 
    response.setCharacterEncoding("utf-8");%>
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset="ISO-8859-1">
    <title>热词云</title>
    </head>
     
    <meta charset="UTF-8">
         <link type="text/css" rel="stylesheet" href="css/style.css">
    <script src="js/jquery-3.4.1.min.js"></script>
    <script src="js/echarts.min.js"></script>
    <script src="js/echarts-cloud.js"></script>
    <style>
      
                    
               #main{
                   30%;
                  height: 500px;
                  
                  border:1px solid #ddd;
                  float:right;
              }
              #table{
                    overflow-x: auto;
                     overflow-y: auto;
                     70%;
                    height: 500px;
                    float:left;
                    margin-top:100dp;
                    padding-top:100dp;
                    
                }
     
            </style>
     
    </head>
     
    <body >
    <br>
    <h1>热词云</h1>
    <br>
    <br>
    <br>
     
    <div id="table">
      <table id='gradient-style' >
        <tr>
          <th align="center">论文连接</th>
        </tr>
        <c:forEach var="item" items="${list}">
          <tr>
            <td><a href="${item.link}">${item.title}</a></td>
          </tr>
        </c:forEach>
      </table>
    </div>
     
     
      <div id="main">
      
      </div>
      <script type="text/javascript">
     
        var dt;
       
                $.ajax({
                    url : "RcServlet",
                    async : true,
                    type : "POST",
                    data : {        
                    },
                    dataType : "json",
                    success : function(data) {
                        dt = data;
                        
                         var mydata = new Array(0);
                         for (var i = 0; i < dt.length; i++) {
                              var d = {};
                              
                              d["name"] = dt[i].name;
                             
                              d["value"] = dt[i].value;
                              mydata.push(d);
                          }
                         var myChart = echarts.init(document.getElementById('main'));
                         //设置点击效果
                        
                         
                         
                         myChart.setOption({
                             title: {
                                 text: ''
                             },
                             tooltip: {},
                             series: [{
                                 type : 'wordCloud',  //类型为字符云
                                     shape:'smooth',  //平滑
                                     gridSize : 8, //网格尺寸
                                     size : ['50%','50%'],
                                     //sizeRange : [ 50, 100 ],
                                     rotationRange : [-45, 0, 45, 90], //旋转范围
                                     textStyle : {
                                         normal : {
                                             fontFamily:'微软雅黑',
                                             color: function() {
                                                 return 'rgb(' + 
                                                     Math.round(Math.random() * 255) +
                                              ', ' + Math.round(Math.random() * 255) +
                                              ', ' + Math.round(Math.random() * 255) + ')'
                                                    }
                                             },
                                         emphasis : {
                                             shadowBlur : 5,  //阴影距离
                                             shadowColor : '#333'  //阴影颜色
                                         }
                                     },
                                     left: 'center',
                                     top: 'center',
                                     right: null,
                                     bottom: null,
                                     '100%',
                                     height:'100%',
                                     data:mydata
                             }]
                         });
                         
                         myChart.on('click', function (params) {
                             var url = "ClickServlet?keywords=" + params.name;
                             window.location.href = url;
                           });
                         
                        alert("成功!");
                       
       
                    },
                    error : function() {
                        alert("请求失败");
                    },
               });
        
    </script>
        
     
    </body>
    </html>
    生成图:
     

  • 相关阅读:
    LVS负载均衡部署
    将源码包制作成rpm包
    root用户被提示:Operation not permitted
    varnish加速web
    优化nginx数据包头缓存
    NGINX并发量优化
    Python+API接口测试框架设计(pytest)
    python+API接口测试框架设计(unittest)
    python编程面试题
    Python + unittest知识点回顾
  • 原文地址:https://www.cnblogs.com/mxk123456/p/13085267.html
Copyright © 2020-2023  润新知