Python爬虫部分由结对的郑磊完成。
前端利用改自eChart的工具类,用它提供的方法绘制热词云图。由于该工具类本身接收JSON进行绘制,而我们出于简单采用Stack将封装的结果传至页面,故在页面上用Java脚本和JavaScript现场封装了一个JSON提供给绘图接口。利用给定的点击事件接口合成iframe属性修改搜索结果。同时在统计阶段,会先读入一个无效词表,出现在无效词表中的词汇(比如a、the等)将不予统计。
DBUtil.java:(学长给的祖传工具类)
echarts.js:(工具类)
jquery.min.js(同上)
worldcloud.js(同上)
package konoha.dao; import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; import java.util.Stack; import konoha.etc.RtnData; import konoha.etc.RtnWord; import konoha.util.DBUtil; public class UniDao { public static int doClear() { int flag = 0; System.out.println("ClearTable"); String sql = "truncate table countlog"; Connection conn = DBUtil.getConn(); Statement state = null; try { state = conn.createStatement(); state.executeUpdate(sql); } catch (Exception e) { e.printStackTrace(); } finally { DBUtil.close(state, conn); } return flag; } public static int nowAmount(String wordipt) { int flag = 0; System.out.println("Querying word "" + wordipt + """); String sql = "select wordamount from countlog where word='" + wordipt + "'"; Connection conn = DBUtil.getConn(); Statement state = null; ResultSet rs = null; try { state = conn.createStatement(); rs = state.executeQuery(sql); if (rs.next()) { flag = rs.getInt("wordamount"); } } catch (Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return flag; } public static boolean useless(String wordipt) { boolean flag = false; System.out.println("Querying Useless word "" + wordipt + """); String sql = "select word from useless where word='" + wordipt + "'"; Connection conn = DBUtil.getConn(); Statement state = null; ResultSet rs = null; try { state = conn.createStatement(); rs = state.executeQuery(sql); if (rs.next()) { flag = true; } } catch (Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return flag; } public static int countAdd(String wordipt) { int num = 0; num = nowAmount(wordipt) + 1; System.out.println("SetCountTo:" + num); String sql = null; if (num == 1) { sql = "insert into countlog(word,wordamount) values('" + wordipt + "',1)"; } else { sql = "update countlog set wordamount=" + num + " where word='" + wordipt + "'"; } Connection conn = DBUtil.getConn(); Statement state = null; int f = 0; int a = 0; try { state = conn.createStatement(); a = state.executeUpdate(sql); } catch (Exception e) { e.printStackTrace(); } finally { DBUtil.close(state, conn); } if (a > 0) { f = num; } return f; } public static Stack<RtnData> getTitleAndLink(String wordipt) { System.out.println("GettingTitleBy:"" + wordipt + """); int counter = 0; Stack<RtnData> flag = new Stack<RtnData>(); RtnData tmp = null; String sql = "select * from papers"; Connection conn = DBUtil.getConn(); Statement state = null; ResultSet rs = null; if (!wordipt.equals("")) sql = sql + " where title like '%" + wordipt + "%'"; try { state = conn.createStatement(); rs = state.executeQuery(sql); while (rs.next()) { counter = counter + 1; tmp = new RtnData(rs.getString("title"), rs.getString("address")); flag.push(tmp); } } catch (Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } System.out.println("getCounter:" + counter); return flag; } public static void doCount() { Stack<RtnData> tit = null; RtnData tmp = null; String[] stmp = null; int i = 0; tit = getTitleAndLink(""); doClear(); while (!tit.isEmpty()) { tmp = tit.pop(); stmp = tmp.title.split(" "); for (i = 0; i < stmp.length; i++) { if (!useless(stmp[i])) countAdd(stmp[i]); } } } public static Stack<RtnWord> rtnTop(int amount) { Stack<RtnWord> tit = new Stack<RtnWord>(); RtnWord tmp = null; int i = 0; String sql = "select * from countlog order by wordamount desc"; Connection conn = DBUtil.getConn(); Statement state = null; ResultSet rs = null; try { state = conn.createStatement(); rs = state.executeQuery(sql); for (i = 0; i < amount; i++) { if (rs.next()) { tmp = new RtnWord(rs.getString("word"), rs.getInt("wordamount")); tit.push(tmp); } } } catch (Exception e) { e.printStackTrace(); } finally { DBUtil.close(rs, state, conn); } return tit; } }
package konoha.etc; public class RtnData { public String title = null; public String link = null; public RtnData(String ipt1, String ipt2) { title = new String(ipt1); link = new String(ipt2); } }
package konoha.etc; public class RtnWord { public String word = null; public int count = 0; public RtnWord(String ipt1, int ipt2) { word = ipt1; count = ipt2; } }
package konoha.servlet; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import konoha.dao.UniDao; /** * Servlet implementation class bodySL */ @WebServlet("/bodySL") public class bodySL extends HttpServlet { private static final long serialVersionUID = 1L; protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("utf-8"); response.setCharacterEncoding("utf-8"); response.setContentType("text/html; charset=utf-8"); request.setAttribute("rtn", UniDao.rtnTop(10)); System.out.println("RunBSL"); request.getRequestDispatcher("bodyPage.jsp").forward(request, response); } }
package konoha.servlet; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import konoha.dao.UniDao; /** * Servlet implementation class ReCount */ @WebServlet("/ReCount") public class ReCount extends HttpServlet { private static final long serialVersionUID = 1L; protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("utf-8"); response.setCharacterEncoding("utf-8"); response.setContentType("text/html; charset=utf-8"); System.out.println("RunDoCount"); UniDao.doCount(); request.getRequestDispatcher("bodySL").forward(request, response); } }
package konoha.servlet; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import konoha.dao.UniDao; /** * Servlet implementation class ReCount */ @WebServlet("/ReCount") public class ReCount extends HttpServlet { private static final long serialVersionUID = 1L; protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("utf-8"); response.setCharacterEncoding("utf-8"); response.setContentType("text/html; charset=utf-8"); System.out.println("RunDoCount"); UniDao.doCount(); request.getRequestDispatcher("bodySL").forward(request, response); } }
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Sol8</title> </head> <script> </script> <body> <div align="center"> <input type="button" value="重新统计" onclick="doReCount();"> <iframe id="body_P" width="1920px" height="1080px" src="bodySL"></iframe> </div> </body> <script> function doReCount(){ document.getElementById("body_P").setAttribute("src","ReCount"); } </script> </html>
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8" import="konoha.etc.RtnWord" import="java.util.Random" import="java.util.Stack"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>bodyPage</title> <script src="js/jquery.min.js"></script> <script src="js/echarts.js"></script> <script src="js/worldcloud.js"></script> </head> <script> </script> <body> <div id="main" style="600px;height:400px;"></div> <script> $(function(){ echartsCloud();//初始化echarts图 }) function echartsCloud(){ // 基于准备好的dom,初始化echarts实例 var myChart = echarts.init(document.getElementById('main')); myChart.setOption({ title: { text: 'echarts3云图展示' }, tooltip: {}, series: [{ type : 'wordCloud', //类型为字符云 shape:'smooth', //平滑 gridSize : 2, //网格尺寸 size : ['80%','80%'], //sizeRange : [ 50, 100 ], rotationRange : [ 46, 80 ], //旋转范围 textStyle : { normal : { fontFamily:'sans-serif', color : function() { return 'rgb(' + [ Math.round(Math.random() * 160), Math.round(Math.random() * 160), Math.round(Math.random() * 160) ] .join(',') + ')'; } }, emphasis : { shadowBlur : 5, //阴影距离 shadowColor : '#333' //阴影颜色 } }, data:[], }] }); myChart.setOption({ series: [{ data: [ <% Stack<RtnWord> rtn = (Stack<RtnWord>)request.getAttribute("rtn"); RtnWord tmp = null; Random rand = new Random(); boolean flag = false; while(!rtn.isEmpty()){ tmp = rtn.pop(); if(flag) out.print(","); flag=true; out.println("{name:'"+tmp.word+"',value:"+tmp.count+"}"); } %> ] }] }); myChart.on('click',function Search(params){ document.getElementById("title_P").setAttribute("src","titleSL?word="+params.name); }) } </script> <%/* Stack<RtnWord> rtn = (Stack<RtnWord>)request.getAttribute("rtn"); RtnWord tmp = null; Random rand = new Random(); while(!rtn.isEmpty()){ tmp = rtn.pop(); out.println("<div onclick='Search(""+tmp.word+"");'style='display:block;font-size:"+tmp.count+"px;color:rgb("+(63+rand.nextInt(128))+","+(63+rand.nextInt(128))+","+(63+rand.nextInt(128))+")'>"+ tmp.word+"</div>"); }*/ %> <iframe id="title_P" width="1920px" height="1080px" src="titleSL"></iframe> </body> <script> </script> </html>
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8" import="konoha.etc.RtnData" import="java.util.Stack"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>title</title> </head> <body> <table> <% Stack<RtnData> rtn = (Stack<RtnData>)request.getAttribute("rtn"); RtnData tmp = null; while(!rtn.isEmpty()){ tmp = rtn.pop(); out.println("<tr><td><a href='"+tmp.link+"'>"+tmp.title+"</a></tr></td>"); } %> </table> </body> </html>
掌握了利用别人写好的工具类整合进自己的程序的方法。以后有许多地方都可以有应用。