package com.wbg.my.service; import java.io.*; import java.net.HttpURLConnection; import java.net.URL; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author Jack Chen * */ public class BlogUtil { /** * URL_PAGE:cnblogs url * URL_PAGE_DETAIL:详情页url * PAGE_COUNT:页数 * urlLists:所有详情页url Set集合(防止重复) * p:匹配模式 * */ public final static String URL_PAGE = "https://www.cnblogs.com/weibanggang/default.html?page="; public final static String URL_PAGE_DETAIL = "https://www.cnblogs.com/weibanggang/p/([0-9]+.html)"; public final static int PAGE_COUNT = 20; public static Set<String> urlLists = new TreeSet<String>(); public final static Pattern p = Pattern.compile(URL_PAGE_DETAIL); //文件路径 public static String file="d:index.html"; static String [] arr=null; static int sun=0; public static void main(String[] args) throws Exception { for(int i = 1;i<=PAGE_COUNT;i++) { getUrls(i); } System.out.println("开始获取内容!"); arr=new String[urlLists.size()]; for(Iterator<String> i = urlLists.iterator();i.hasNext();) { createFile(i.next()); sun++; } System.out.println("获取内容完毕!"); System.out.println("开始写入文件!"); StringBuffer stringBuffer=new StringBuffer(kais()); for (int i = 0; i < arr.length; i++) { stringBuffer.append(arr[i]); } stringBuffer.append(jiehun()); System.out.println("写入文件完毕!"); System.out.println("开始导出文件!"); createFile(file,stringBuffer); System.out.println("导出文件完毕!"); System.out.println("输出文件地址为:"+file); } /* * 将结果写入文件 */ private static void createFile(String file, StringBuffer buffer) { try { File newFile = new File(file); if (newFile.exists())// 存在,则删除 if (!newFile.delete())// 删除成功则创建 { System.err.println("删除文件" + newFile + "失败"); } if (newFile.createNewFile()) {// 创建成功,则写入文件内容 PrintWriter p = new PrintWriter(new FileOutputStream(newFile .getAbsolutePath())); p.write(buffer.toString()); p.close(); } else { System.err.println("创建文件:" + newFile + "失败"); } } catch (Exception e) { e.printStackTrace(); } } //开始头部 public static String kais(){ return "<!DOCTYPE html> " + "<html> " + "<head> " + " <meta charset="utf-8"> " + " <title>weibanggang.github.io</title> " + " <meta name="renderer" content="webkit"> " + " <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> " + " <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"> " + " <style> " + " html,body{100%;height: 100%} " + " table{ 1150px;height:500px;margin: auto} " + " table,td,th{border: 1px solid #e6e6e6;border-collapse:collapse; } " + " body{-moz-background-size:100% 100%; background-size:100% 100%;background-image:url("link.jpg");background-repeat: no-repeat} body{-moz-background-size:100% 100%; background-size:100% 100%;background-image:url("link.jpg");background-repeat: no-repeat} " + " * { margin: 0; padding: 0; } " + " table { border-collapse: collapse; text-align: center; } " + " /*关键设置 tbody出现滚动条*/ " + " table tbody { " + " display: block; " + " height: 500px; " + " overflow-y: scroll;overflow-x:hidden; " + " } " + " table thead, tbody tr { display: table; 100%; table-layout: fixed; } " + " table thead th { height: 40px } " + " table tbody td {height: 30px } " + " </style> " + "</head> " + " " + "<body> " + "<marquee><h1 style="color:white;">本网页仅作为参考博客、github等地址</h1></marquee> " + "<table width="80%" border="1"> " + " <thead> " + " <tr> " + " <th style="230px">序号</th> " + " <th style="231px">标题链接</th> " + " <th style="231px">时间</th> " + " <th style="231px">来源</th> " + " <th style="249px">备注</th> " + " </tr> " + " </thead> " + " <tbody> " + " " + " </tbody> " + "</table> " + "</body> " + "<script src="js/jquery.js"></script> " + "<script> " + " var sum=["; } //结尾 public static String jiehun(){ return " ]; " + " " + " for(var i=0;i<sum.length;i++){ " + " var tr=$("<tr/>"); " + " //序号 " + " $("<td/>").html(i+1).appendTo(tr); " + " //标题链接 " + " var a="<a href='"+sum[i][0]+"' target='_blank'>"+sum[i][1]+"</a>" " + " $("<td/>").html(a).appendTo(tr); " + " //时间 " + " $("<td/>").html(sum[i][2]).appendTo(tr); " + " //来源 " + " $("<td/>").html(sum[i][3]).appendTo(tr); " + " //备注 " + " $("<td/>").html(sum[i][4]).appendTo(tr); " + " $("table tbody").append(tr); " + " } " + "</script> " + "</html>"; } static String fh=""; /** * @param url * 获取所有内容 * @throws */ private static void createFile(String url) throws Exception { Matcher m = p.matcher(url); m.find(); String fileName = m.group(1); URL u = new URL(url); HttpURLConnection conn = (HttpURLConnection) u.openConnection(); conn.connect(); BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8")); String str; StringBuffer s=new StringBuffer(); while((str = br.readLine()) != null){ s.append(str); } String href="https://www.cnblogs.com/weibanggang/p/"+fileName; String title=getTitle(s); String data=getDate(s); arr[sun]=fh+"[""+href+"",""+title+"",""+data+"","博客","正常"]"; fh=","; br.close(); conn.disconnect(); } //获取时间 public static String getDate(StringBuffer sb){ int first=sb.indexOf("<span id="post-date">")+"<span id="post-date">".length(); String aa=sb.substring(first); int last=aa.indexOf("</span>"); String sa=aa.substring(0,last); return sa; } //获取标题 public static String getTitle(StringBuffer sb){ int first=sb.indexOf("<title>"); int last=sb.indexOf("</title>"); String sa=sb.substring(first+7,last); int errorindex=sa.lastIndexOf("- 韦邦杠 - 博客园"); return sa.substring(0,errorindex); } /** * @param idx * 获取页数 * @throws */ private static void getUrls(int idx) throws Exception{ URL u = new URL(URL_PAGE+""+idx); HttpURLConnection conn = (HttpURLConnection) u.openConnection(); conn.connect(); BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8")); String str; while((str = br.readLine()) != null){ if(null != str && str.contains("https://www.cnblogs.com/weibanggang/p/")) { Matcher m = p.matcher(str); if(m.find()) { urlLists.add(m.group()); } } } br.close(); conn.disconnect(); } }