• 获取网页上的所有QQ号码,并生成exel报表


    需要的jar如下:

    package jsoup.zr.com.utils;
    /**
     * 
     * @author LF
     *
     */
    public class Constant {
        
        /**
         * 网站链接地址ַ
         */
        public static String URL = "http://bbs.tianya.cn/post-enterprise-758850-1.shtml";
        
        /**
         * 生成目标文件所存放的路径
         * 注意:路径请用"/",勿用""
         */
        public static String PATH = "/Users/apple/Desktop/";
        
        /**
         * exel文件的命名
         */
        public static String NAME = "QQ";
        
        /**
         * 正则表达式(第一位1-9之间的数字,第二位0-9之间的数字,数字范围4-14)
         */
        public static String QQREGEX = "[1-9][0-9]{4,14}";
        
        /**
         * 正则表达式(QQ、微信号、手机)
         */
        public static String QQ_WEIXIN_PHONE = "^[a-zA-Z0-9_-]{5,19}$";
    }
    package jsoup.zr.com.utils;
    
    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    
    /**
     * 
     * @author LF
     *
     */
    public class ParseHTML {
        
        public static void main(String[] args) {
            List<String> list = getInfoByURL(Constant.URL,Constant.QQREGEX);
            ProduceExel.writeByList(list, "QQ号码");
            
        }
        
        /**
         * 获取网页内容
         */
        public static List<String> getInfoByURL(String urlString,String regexString){
            List<String> list = new ArrayList<String>();
            URL url = null;
            try {
                url = new URL(urlString);
            } catch (MalformedURLException e) {
                e.printStackTrace();
            }
            // 打开连接
            URLConnection connection = null;
            try {
                connection = url.openConnection();
            } catch (IOException e) {
                e.printStackTrace();
            }
            BufferedReader bReader = null;
            try {
                // 读取输入流
                bReader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            } catch (IOException e) {
                e.printStackTrace();
            }
            
            Pattern pattern = Pattern.compile(regexString);
            String line = null;
            try {
                while ((line = bReader.readLine()) != null) {
                    Matcher matcher = pattern.matcher(line);
                    // 如果匹配,存起来
                    while (matcher.find()) {
                        String str = matcher.group();
                        list.add(str);
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            
            
            return list;
        }
        
        
    
        
        
    }
    package jsoup.zr.com.utils;
    
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.OutputStream;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.poi.hssf.usermodel.HSSFCell;
    import org.apache.poi.hssf.usermodel.HSSFRichTextString;
    import org.apache.poi.hssf.usermodel.HSSFRow;
    import org.apache.poi.hssf.usermodel.HSSFSheet;
    import org.apache.poi.hssf.usermodel.HSSFWorkbook;
    /**
     * 
     * @author LF
     *
     */
    public class ProduceExel {
        
    
        /**
         * 通过集合list生成exel报表
         * @param list 集合
         * @param lineName exel首行的名称
         * @throws IOException 异常
         */
        public static void writeByList(List<String> list,String lineName){
            System.out.println(list.size());
            // 如果没有数据,不做处理
            if (list==null || list.size()==0) {
                return;
            }
            String path = Constant.PATH+Constant.NAME+".xls";
            System.out.println(path);
            OutputStream outputStream = null;
            try {
                outputStream = new FileOutputStream(new File(path));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
          
            // 初始化一个HSSFWorkbook对象
            HSSFWorkbook workbook = new HSSFWorkbook();
            // 创建一个表
            HSSFSheet sheet = workbook.createSheet("lf");
            // 创建行
            HSSFRow row = sheet.createRow(0);
            // 创建单元格
            HSSFCell cell0 = row.createCell(0);
            cell0.setCellValue(new HSSFRichTextString(lineName));
            int i = 0;
            // 遍历
            for (String str : list) {
                System.out.println("====="+str);
                // 创建行
                HSSFRow rowi = sheet.createRow(i+1);
                // 创建单元格
                HSSFCell celli = rowi.createCell(0);
                // 单元格添加内容
                celli.setCellValue(new HSSFRichTextString(str));
                i++;
            }
            // 写(输出)
            try {
                workbook.write(outputStream);
            } catch (IOException e) {
                e.printStackTrace();
            }
         }
    
        /**
         * 通过集合map生成exel报表
         * @param map 集合
         * @param lineName exel首行的名称
         * @throws IOException 异常
         */
        public static void write(Map<String, String> map,String lineName){
            // 如果没有数据,不做处理
            if (map==null || map.size()==0) {
                return;
            }
            String path = Constant.PATH+Constant.NAME+".xls";
            System.out.println(path);
            OutputStream outputStream = null;
            try {
                outputStream = new FileOutputStream(new File(path));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
          
            // 初始化一个HSSFWorkbook对象
            HSSFWorkbook workbook = new HSSFWorkbook();
            // 创建一个表
            HSSFSheet sheet = workbook.createSheet("lf");
            // 创建行
            HSSFRow row = sheet.createRow(0);
            // 创建单元格
            HSSFCell cell0 = row.createCell(0);
            cell0.setCellValue(new HSSFRichTextString(lineName));
            int i = 0;
            // 遍历
            for (String str : map.values()) {
                // 创建行
                HSSFRow rowi = sheet.createRow(i+1);
                // 创建单元格
                HSSFCell celli = rowi.createCell(0);
                // 单元格添加内容
                celli.setCellValue(new HSSFRichTextString(str));
                i++;
            }
            // 写(输出)
            try {
                workbook.write(outputStream);
            } catch (IOException e) {
                e.printStackTrace();
            }
         }
        
    }
    package jsoup.zr.com.utils;
    /**
     * 
     * @author LF
     *
     */
    public class Verify {
        
        /**
         * 验证是否是QQ号码
         * @param QQNumber
         * @return
         */
        public static boolean verifyQQNumber(String QQNumber){
            System.out.println(QQNumber.matches(Constant.QQREGEX));
            return QQNumber.matches(Constant.QQREGEX);
        }
        
    }
  • 相关阅读:
    尚硅谷面试第一季-05递归与迭代
    尚硅谷面试第一季-04方法的参数传递机制
    尚硅谷面试第一季-03类初始化和实例初始化
    python爬爬爬之单网页html页面爬取
    python之花瓣美女下载
    (转载博文)VC++API速查
    (转载博文)MFC 窗口句柄获取
    全局变量的声明
    python图片小爬虫
    Opencv2.4.4作图像旋转和缩放
  • 原文地址:https://www.cnblogs.com/lantu1989/p/6792831.html
Copyright © 2020-2023  润新知