一、要求:
1、完成论文的题目、摘要、关键词、原文链接四项内容爬取;
2、存储到本地数据库中;
3、按照题目、关键词分类统计得到最热的十个领域方向;
4、热词越多,在热词云中显示的就越大,还要将热词与文章链接,点击热词云中的热词可以找到与之对应的文章题目;
二、效果
三、设计思路:
1、爬取、存取数据:
2、使用echart 的 wordCloud 实现热词云。
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Insert title here</title> <link rel="stylesheet" href="css/bootstrap.min.css" type="text/css" /> <script src="js/jquery-1.11.3.min.js" type="text/javascript"></script> <script type="text/javascript" src="js/echarts.min.js"></script> <script type="text/javascript" src="js/china.js"></script> <script src="js/bootstrap.min.js" type="text/javascript"></script> <script src='https://cdn.bootcss.com/echarts/3.7.0/echarts.simple.js'></script> <script src='js/echarts-wordcloud.js'></script> </head> <body> <div id="main" style=" 100%;height: 400px"></div> <div> <table class="table" style=" 100%;align-content: center;" > <tr> <th align="center">论文连接</th> </tr> <c:forEach var="item" items="${list}"> <tr> <td><a href="${item.lianjie }">${item.title}</a></td> </tr> </c:forEach> </table> </div> <script> var chart = echarts.init(document.getElementById('main')); var dt; $.ajax({ url : "PaperServlet_", async : false, type : "POST", success : function(data) { dt = data; // alert(dt[0].title);}, error : function() { alert("请求失败");}, dataType : "json" }); var mydata = new Array(0); for (var i = 0; i < dt.length; i++) { var d = {}; d["name"] = dt[i].name; //alert(dt[i].name); d["value"] = dt[i].value; mydata.push(d);} var option = { tooltip: {}, series: [ { type: 'wordCloud', gridSize: 2, sizeRange: [20, 50], rotationRange: [-90, 90], shape: 'pentagon', 600, height: 300, drawOutOfBound: true, textStyle: { normal: { color: function () { return 'rgb(' + [ Math.round(Math.random() * 160), Math.round(Math.random() * 160), Math.round(Math.random() * 160) ].join(',') + ')';}}, emphasis: { shadowBlur: 10, shadowColor: '#333' } }, data: mydata} ]}; chart.setOption(option); chart.on('click', function (params) { var url = "ClickServlet?geunjian=" + params.name; window.location.href = url; }); window.onresize = chart.resize; </script> </body> </html>
3、将关键字分割成单词然后对单词进行去重、计数和排序,装到list,转换为json字符串传递给界面(ajax请求获取的数据)
package com.me.servlet; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.google.gson.Gson; import com.me.dao.LWDao; import com.me.domain.LunWen; import com.me.domain.Tu; @WebServlet("/PaperServlet_") public class PaperServlet_ extends HttpServlet { private static final long serialVersionUID = 1L; public PaperServlet_() { super();} protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setHeader("content-type", "text/html;charset=UTF-8"); response.setCharacterEncoding("UTF-8"); LWDao dao = new LWDao(); List<LunWen> list = new ArrayList<LunWen>(); List<Tu> list_tu = new ArrayList<Tu>(); String [] str = new String[10000]; String [] str_ = new String[10000]; int [] b = new int[10000]; int num = 0; int length1 = 0; try { list = dao.search_();} catch (SQLException e) { e.printStackTrace();} for(int i=0;i<list.size();i++) { if(list.get(i).getLianjie()!=null) { String ss = list.get(i).getLianjie().substring(6,list.get(i).getLianjie().length()); list.get(i).setLianjie("http://openaccess.thecvf.com/"+ss);} String[] split = list.get(i).getGuanjian().split(" "); for(int j=0;j<split.length;j++) { str[num++] = split[j];}} for(int k=0;k<num;k++) { b[k]=0;} str_[0]=str[0]; int tt=1; Boolean rt=true; for(int i=1;i<num;i++) { rt=false; for(int j=0;j<tt;j++) { if(str[i].equals(str_[j])) { rt=true; break;}} if(!rt) { str_[tt]=str[i]; tt++;}} length1=tt; for(int i=0;i<length1;i++) { for(int j=0;j<num;j++) { if(str_[i].equals(str[j])) { b[i]++;}}} int t3=0; int t2=0; String sr=""; for(int i=0;i<length1-1;i++) { t3=i; for(int j=i+1;j<length1;j++) { if(b[t3]<b[j]) { t3=j;}} if(t3!=i) { t2=b[i]; b[i]=b[t3]; b[t3]=t2; sr=str_[i]; str_[i]=str_[t3]; str_[t3]=sr;}} for(int i=0;i<100;i++) { Tu tu = new Tu(); tu.name=str_[i]; tu.value= b[i]; list_tu.add(tu);} Gson gson = new Gson(); String json = gson.toJson(list_tu); response.getWriter().write(json);} protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response);}}
1)热词实体
2)将关键字分割成单词然后对单词进行去重、计数和排序,装到list
4、论文连接列表数据准备(PaperServlet是最初访问的地方,携带数据跳转到jsp界面)
1)论文实体
5、点击热词后携带此热词到servlet,再从数据库中找出论文的关键字中包含此热词的论文列表
6、dao层