<%@page import="java.util.Iterator"%> <%@page import="java.util.HashMap"%> <%@page import="entity.Cvf"%> <%@page import="java.util.List"%> <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>CVPR词云</title> <script type="text/javascript" src="Echart/echarts.js"></script> <script type="text/javascript" src="Echart/echarts-wordcloud.min.js"></script> </head> <body> <% request.setCharacterEncoding("utf-8"); List <Cvf> cvfs =(List<Cvf>) request.getAttribute("cvfs"); int i=0; int j=0; int k=0; HashMap<String, Integer> hm=new HashMap(); if(cvfs!=null){ for(Cvf cvf:cvfs){i++; if (!hm.containsKey(cvf.getCkeyword())) { hm.put(cvf.getCkeyword(), 1); }else { Integer counts=hm.get(cvf.getCkeyword()); hm.put(cvf.getCkeyword(), counts+1); } } } %> <div id="main" style=" 800px; height: 600px"></div> <script> var myChart = echarts.init(document.getElementById('main')); option = { title: { text: '词云',//标题 x: 'center', textStyle: { fontSize: 23 } }, backgroundColor: '#F7F7F7', tooltip: { show: true }, series: [{ name: '热点分析',//数据提示窗标题 type: 'wordCloud', sizeRange: [6, 66],//画布范围,如果设置太大会出现少词(溢出屏幕) rotationRange: [-45, 90],//数据翻转范围 //shape: 'circle', textPadding: 0, autoSize: { enable: true, minSize: 6 }, drawOutOfBound: true,//词云显示完整,超出画布的也显示 textStyle: { normal: { color: function() { return 'rgb(' + [ Math.round(Math.random() * 160), Math.round(Math.random() * 160), Math.round(Math.random() * 160) ].join(',') + ')'; } }, emphasis: { shadowBlur: 10, shadowColor: '#333' } }, data:[ <% //获取request域中的数据 Iterator<String> it=hm.keySet().iterator(); while(it.hasNext()) { String keyName=it.next(); %> {name:"<%=keyName%>",value:<%=hm.get(keyName) %>}, <% } %> ] }] }; myChart.setOption(option,true); myChart.on('click',function(param){ var selected = param.name; if(selected){ window.open("ListServlet?keyword="+selected); } }); </script> </body> </html>
<%@page import="entity.Cvf"%> <%@page import="java.util.List"%> <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>链接地址</title> </head> <body> <% request.setCharacterEncoding("utf-8"); List <Cvf> cvfs =(List<Cvf>) request.getAttribute("cvfs"); %> <table > <thead > <tr> <th>标题</th> <th>关键词</th> </tr> </thead> <tbody class="htbody"> <% if(cvfs!=null){ for(Cvf cvf:cvfs){ %> <tr> <td><a href="<%=cvf.getChref() %>"><%=cvf.getCname() %></a></td> <td><%=cvf.getCkeyword() %></td> </tr> <% } } %> </tbody> </table> </div> </body> </html>
package utils; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; import org.apache.http.HttpEntity; import org.apache.http.NameValuePair; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import com.alibaba.fastjson.JSONObject; public class HttpClientPool { /** * 这是httpClient连接池 * @throws Exception */ public static void HttpClientPool() { //创建连接池管理器 PoolingHttpClientConnectionManager cm =new PoolingHttpClientConnectionManager(); //设置最大连接数 cm.setMaxTotal(100); //设置每个主机的最大连接数 cm.setDefaultMaxPerRoute(10); //使用连接池管理器发起请求 // doGet(cm); // doPost(cm); } public static String doPost(PoolingHttpClientConnectionManager cm) throws Exception { //从连接池中获取httpClient对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); //2、输入网址,发起请求,创建httpPost对象 HttpPost httpPost= new HttpPost("http://openaccess.thecvf.com/CVPR2019.py#"); System.out.println("发起请求的信息:"+httpPost); //Post使用,声明List集合,封装表单中的参数 List<NameValuePair> params= new ArrayList<NameValuePair>(); params.add(new BasicNameValuePair("","")); //创建表单的Entity对象,第一个参数是封装好的参数,第二个是编码 UrlEncodedFormEntity formEntity= new UrlEncodedFormEntity(params,"utf8"); //设置表单的Entity对象到Post请求中 httpPost.setEntity(formEntity); //配置请求信息 RequestConfig config = RequestConfig.custom().setConnectTimeout(10000)//设置创建连接的最长时间,单位为毫秒 .setConnectionRequestTimeout(50000)//设置获取连接的最长时间,单位为毫秒 .setSocketTimeout(1000*1000)//设置传输数据的最长时间,单位为毫秒 .build(); //给请求设置请求信息 httpPost.setConfig(config); CloseableHttpResponse response=null; String content=null; try { //3、按回车,发起请求,返回响应,使用httpClient对象发起请求 response = httpClient.execute(httpPost); //解析响应,获取数据 //判断状态码是否为两百 if(response.getStatusLine().getStatusCode()==200) { HttpEntity httpEntity = response.getEntity(); if(httpEntity!=null) { content = EntityUtils.toString(httpEntity, "utf8"); System.out.println(content.length()); // System.out.println(content); } }else { System.out.println("请求失败"+response); } }catch(Exception e) { e.printStackTrace(); }finally { try { //关闭response if(response!=null) { //关闭response response.close(); } //不关闭httpClient //httpClient.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return content; } public static String doGet(PoolingHttpClientConnectionManager cm) throws Exception { //从连接池中获取httpClient对象 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); //创建URIBuilder URIBuilder uribuilder= new URIBuilder("http://openaccess.thecvf.com/CVPR2019.py#"); //设置参数:参数名+参数值,可设置多个 //2、输入网址,发起请求,创建httpGet对象 HttpGet httpGet= new HttpGet(uribuilder.build()); System.out.println("发起请求的信息:"+httpGet); //配置请求信息 RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒 .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒 .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒 .build(); //给请求设置请求信息 httpGet.setConfig(config); CloseableHttpResponse response=null; String content=null; try { //3、按回车,发起请求,返回响应,使用httpClient对象发起请求 response = httpClient.execute(httpGet); //解析响应,获取数据 //判断状态码是否为两百 if(response.getStatusLine().getStatusCode()==200) { HttpEntity httpEntity = response.getEntity(); if(httpEntity!=null) { content = EntityUtils.toString(httpEntity, "utf8"); // System.out.println(content.length()); // System.out.println(content); } } }catch(Exception e) { e.printStackTrace(); }finally { try { if(response!=null) { //关闭response response.close(); } //不能关闭httpClient //httpClient.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return content; } }
package utils; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Set; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.jsoup.Jsoup; import org.jsoup.nodes.Attributes; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import dao.dao; import entity.Cvf; /** * 这是使用Jsoup解析 * @author 张志伟 * */ public class Jsouputil { /** * 使用Selector选择器获取元素 */ public static void testSelector()throws Exception{ //获取Document对象 HttpClientPool httpClientPool =new HttpClientPool(); //创建连接池管理器 PoolingHttpClientConnectionManager cm =new PoolingHttpClientConnectionManager(); //获取网页HTML字符串 String content=httpClientPool.doGet(cm); //解析字符串 Document doc = Jsoup.parse(content); // System.out.println(doc.toString()); //[attr=value],利用属性获取 Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]"); System.out.println(elements.toString()); Cvf cvf=new Cvf(); dao dao=new dao(); if(elements!=null) { for(Element ele:elements) { String href="http://openaccess.thecvf.com/"; String cname=ele.select("a").text(); System.out.println(cname); String href2=ele.select("a").attr("href"); String chref=href.concat(href2); System.out.println(chref); String cabstract =null; String ckeyword =null; //获取title的内容 CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build(); //创建URIBuilder URIBuilder uribuilder= new URIBuilder(chref); HttpGet httpGet= new HttpGet(uribuilder.build()); RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒 .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒 .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒 .build(); //给请求设置请求信息 httpGet.setConfig(config); CloseableHttpResponse response=null; response = httpClient.execute(httpGet); //解析响应,获取数据 //判断状态码是否为两百 if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) { Document document = Jsoup.parse(new URL(chref), 100000); cabstract = document.select("div[id=abstract]").text(); System.out.println("已获取摘要"); String[] strs = strTostrArray(cname+cabstract); ckeyword=keyword(strs); } else { System.out.println(response.getStatusLine().getStatusCode()); cabstract =null; ckeyword=null; } if(response!=null) { //关闭response response.close(); } cvf=new Cvf(cname,chref,cabstract,ckeyword); dao.add(cvf); } } } public static String[] strTostrArray(String str) { /* * 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符 */ str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写 String regex = "[\W]+";// 非字母的正则表达式 --W:表示任意一个非单词字符 str = str.replaceAll(regex, " "); String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组 return strs; } public static String keyword(String[] strs) { /* * 建立字符串(String)出现次数(Integer)的映射 */ HashMap<String, Integer> strhash = new HashMap<String, Integer>(); Integer in = null;// 用于存放put操作的返回值 for (String s : strs) {// 遍历数组 strs in = strhash.put(s, 1); if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数 strhash.put(s, in + 1); } } Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet(); String maxStr = null;// 用于存放出现最多的单词 int maxValue = 0;// 用于存放出现最多的次数 for (java.util.Map.Entry<String, Integer> e : entrySet) { String key = e.getKey(); Integer value = e.getValue(); if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) { value=0; } if (value > maxValue) { maxValue = value;// 这里有自动拆装箱 maxStr = key; } } System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "次"); return maxStr; } }
package servlet; import java.io.IOException; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import dao.dao; import entity.Cvf; import utils.Jsouputil; public class QueryServlet extends HttpServlet { protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { /** * 这里是设置编码集,以避免出现乱码问题 */ request.setCharacterEncoding("utf-8"); response.setCharacterEncoding("utf-8"); response.setContentType("text/html;charset=utf-8"); /** * 这是爬取数据 */ //Jsouputil jsouptil=new Jsouputil(); //try { //Jsouputil.testSelector(); // } catch (Exception e) { // TODO Auto-generated catch block // e.printStackTrace(); //} dao dao=new dao(); List<Cvf> cvfs=dao.Query(); System.out.println(cvfs); request.setAttribute("cvfs",cvfs); request.getRequestDispatcher("show.jsp").forward(request, response); } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
package servlet; import java.io.IOException; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import dao.dao; import entity.Cvf; public class ListServlet extends HttpServlet { protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { /** * 这里是设置编码集,以避免出现乱码问题 */ request.setCharacterEncoding("utf-8"); String key= request.getParameter("keyword"); response.setCharacterEncoding("utf-8"); response.setContentType("text/html;charset=utf-8"); dao dao=new dao(); List<Cvf> cvfs=dao.Query(key); System.out.println(cvfs); request.setAttribute("cvfs",cvfs); request.getRequestDispatcher("list.jsp").forward(request, response); } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
package entity; public class Cvf { private int id; private String cname; private String chref; private String cabstract; private String ckeyword; @Override public String toString() { return "Cvf [id=" + id + ", cname=" + cname + ", chref=" + chref + ", cabstract=" + cabstract + ", ckeyword=" + ckeyword + "]"; } public int getId() { return id; } public void setId(int id) { this.id = id; } public String getCname() { return cname; } public void setCname(String cname) { this.cname = cname; } public String getChref() { return chref; } public void setChref(String chref) { this.chref = chref; } public String getCabstract() { return cabstract; } public void setCabstract(String cabstract) { this.cabstract = cabstract; } public String getCkeyword() { return ckeyword; } public void setCkeyword(String ckeyword) { this.ckeyword = ckeyword; } public Cvf(int id, String cname, String chref, String cabstract, String ckeyword) { this.id = id; this.cname = cname; this.chref = chref; this.cabstract = cabstract; this.ckeyword = ckeyword; } public Cvf() { } public Cvf(String cname, String chref, String cabstract, String ckeyword) { this.cname = cname; this.chref = chref; this.cabstract = cabstract; this.ckeyword = ckeyword; } }
package dao; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import entity.Cvf; import utils.DBUtil; public class dao { //添加数据入库 public boolean add(Cvf cvf) { String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)"; Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()}; return DBUtil.executeUpdate(sql, obj); } //查询数据 public List<Cvf> Query() { List<Cvf> cvfs=new ArrayList(); Cvf cvf= null; ResultSet rs = null; try { String sql="select * from cvpr " ; Object [] params= {}; rs=DBUtil.executeQuery(sql, params); while(rs.next()) { int Id=rs.getInt("id"); String cname=rs.getString("cname"); String chref=rs.getString("chref"); String cabstract=rs.getString("cabstract"); String ckeyword=rs.getString("ckeyword"); cvf=new Cvf(Id,cname,chref,cabstract,ckeyword); cvfs.add(cvf); } }catch(SQLException e) { e.printStackTrace(); }catch(Exception e) { e.printStackTrace(); }finally { try { //先开的后关,后开的先关 if(rs!=null)rs.close(); if(DBUtil.pstmt!=null)DBUtil.pstmt.close(); if(DBUtil.connection !=null)DBUtil.connection.close(); }catch(SQLException e) { e.printStackTrace(); }finally { } } return cvfs; } //查询数据 public List<Cvf> Query(String key) { List<Cvf> cvfs=new ArrayList(); Cvf cvf= null; ResultSet rs = null; try { String sql="select * from cvpr where ckeyword=? " ; Object [] params= {key}; rs=DBUtil.executeQuery(sql, params); while(rs.next()) { int Id=rs.getInt("id"); String cname=rs.getString("cname"); String chref=rs.getString("chref"); String cabstract=rs.getString("cabstract"); cvf=new Cvf(Id,cname,chref,cabstract,key); cvfs.add(cvf); } }catch(SQLException e) { e.printStackTrace(); }catch(Exception e) { e.printStackTrace(); }finally { try { //先开的后关,后开的先关 if(rs!=null)rs.close(); if(DBUtil.pstmt!=null)DBUtil.pstmt.close(); if(DBUtil.connection !=null)DBUtil.connection.close(); }catch(SQLException e) { e.printStackTrace(); }finally { } } return cvfs; } }