需要的jar如下:
package jsoup.zr.com.utils; /** * * @author LF * */ public class Constant { /** * 网站链接地址ַ */ public static String URL = "http://bbs.tianya.cn/post-enterprise-758850-1.shtml"; /** * 生成目标文件所存放的路径 * 注意:路径请用"/",勿用"" */ public static String PATH = "/Users/apple/Desktop/"; /** * exel文件的命名 */ public static String NAME = "QQ"; /** * 正则表达式(第一位1-9之间的数字,第二位0-9之间的数字,数字范围4-14) */ public static String QQREGEX = "[1-9][0-9]{4,14}"; /** * 正则表达式(QQ、微信号、手机) */ public static String QQ_WEIXIN_PHONE = "^[a-zA-Z0-9_-]{5,19}$"; }
package jsoup.zr.com.utils; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; /** * * @author LF * */ public class ParseHTML { public static void main(String[] args) { List<String> list = getInfoByURL(Constant.URL,Constant.QQREGEX); ProduceExel.writeByList(list, "QQ号码"); } /** * 获取网页内容 */ public static List<String> getInfoByURL(String urlString,String regexString){ List<String> list = new ArrayList<String>(); URL url = null; try { url = new URL(urlString); } catch (MalformedURLException e) { e.printStackTrace(); } // 打开连接 URLConnection connection = null; try { connection = url.openConnection(); } catch (IOException e) { e.printStackTrace(); } BufferedReader bReader = null; try { // 读取输入流 bReader = new BufferedReader(new InputStreamReader(connection.getInputStream())); } catch (IOException e) { e.printStackTrace(); } Pattern pattern = Pattern.compile(regexString); String line = null; try { while ((line = bReader.readLine()) != null) { Matcher matcher = pattern.matcher(line); // 如果匹配,存起来 while (matcher.find()) { String str = matcher.group(); list.add(str); } } } catch (IOException e) { e.printStackTrace(); } return list; } }
package jsoup.zr.com.utils; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; /** * * @author LF * */ public class ProduceExel { /** * 通过集合list生成exel报表 * @param list 集合 * @param lineName exel首行的名称 * @throws IOException 异常 */ public static void writeByList(List<String> list,String lineName){ System.out.println(list.size()); // 如果没有数据,不做处理 if (list==null || list.size()==0) { return; } String path = Constant.PATH+Constant.NAME+".xls"; System.out.println(path); OutputStream outputStream = null; try { outputStream = new FileOutputStream(new File(path)); } catch (FileNotFoundException e) { e.printStackTrace(); } // 初始化一个HSSFWorkbook对象 HSSFWorkbook workbook = new HSSFWorkbook(); // 创建一个表 HSSFSheet sheet = workbook.createSheet("lf"); // 创建行 HSSFRow row = sheet.createRow(0); // 创建单元格 HSSFCell cell0 = row.createCell(0); cell0.setCellValue(new HSSFRichTextString(lineName)); int i = 0; // 遍历 for (String str : list) { System.out.println("====="+str); // 创建行 HSSFRow rowi = sheet.createRow(i+1); // 创建单元格 HSSFCell celli = rowi.createCell(0); // 单元格添加内容 celli.setCellValue(new HSSFRichTextString(str)); i++; } // 写(输出) try { workbook.write(outputStream); } catch (IOException e) { e.printStackTrace(); } } /** * 通过集合map生成exel报表 * @param map 集合 * @param lineName exel首行的名称 * @throws IOException 异常 */ public static void write(Map<String, String> map,String lineName){ // 如果没有数据,不做处理 if (map==null || map.size()==0) { return; } String path = Constant.PATH+Constant.NAME+".xls"; System.out.println(path); OutputStream outputStream = null; try { outputStream = new FileOutputStream(new File(path)); } catch (FileNotFoundException e) { e.printStackTrace(); } // 初始化一个HSSFWorkbook对象 HSSFWorkbook workbook = new HSSFWorkbook(); // 创建一个表 HSSFSheet sheet = workbook.createSheet("lf"); // 创建行 HSSFRow row = sheet.createRow(0); // 创建单元格 HSSFCell cell0 = row.createCell(0); cell0.setCellValue(new HSSFRichTextString(lineName)); int i = 0; // 遍历 for (String str : map.values()) { // 创建行 HSSFRow rowi = sheet.createRow(i+1); // 创建单元格 HSSFCell celli = rowi.createCell(0); // 单元格添加内容 celli.setCellValue(new HSSFRichTextString(str)); i++; } // 写(输出) try { workbook.write(outputStream); } catch (IOException e) { e.printStackTrace(); } } }
package jsoup.zr.com.utils; /** * * @author LF * */ public class Verify { /** * 验证是否是QQ号码 * @param QQNumber * @return */ public static boolean verifyQQNumber(String QQNumber){ System.out.println(QQNumber.matches(Constant.QQREGEX)); return QQNumber.matches(Constant.QQREGEX); } }