• crawler_工具类_RegexUtils_正则帮助类


    package com.cph.crawler.core.utils;
    
    import java.util.ArrayList;
    import java.util.List;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import org.apache.commons.logging.Log;
    import org.apache.commons.logging.LogFactory;
    
    /**
     * 
     * @ClassName: RegexUtils
     * @Description: 正则帮助类
     * @author cphmvp
     * @date 2013-9-9 下午3:48:59<br>
     *       适合单次抽取结果,不适合遍历抽取
     * 
     */
    public final class RegexUtils {
        private RegexUtils() {
    
        }
    
        private static Log logger = LogFactory.getLog(RegexUtils.class);
        private static Pattern defaultPattern;
        private static Matcher defaultMatcher;
        private static final String NOT_MATCHER_DATA = "没有匹配到对应数据";
    
        /**
         * 返回单行结果集
         * 
         * @param input
         * @param regex
         * @param group
         * @return
         */
        public static String getString(String input, Pattern pattren, int group) {
            if (pattren.toString().equals(
                    "<span class="l">当前位置:([\s\S]*?)</span>")) {
                System.out.println("warn");
            }
            String result = "";
            String splitStr = "⊙";
            defaultMatcher = pattren.matcher(input);
            while (defaultMatcher.find()) {
                result = defaultMatcher.group(group).trim() + "" + splitStr;
            }
            result = result.trim().replaceAll("</?[^>]+>", "");
            result = result.replaceAll("&gt;", ">");
            result = result.replaceAll("
    ", "");
            result = result.replaceAll("\r\n", "");
            result = result.replaceAll("\s", "");
            result = result.replaceAll("&nbsp", " ");
            result = result.replace("
    ", "");
            result = result.replace("	", "");
            result = result.replace("^p", "");
            result = result.replaceAll("⊙", " ");
            return result.trim();
        }
    
        /**
         * 返回单行结果集
         * 
         * @param input
         * @param regex
         * @param group
         * @return
         */
        public static String getString(String input, String regex, int group) {
            String result = " ";
            defaultMatcher = getMatcher(input, regex);
            while (defaultMatcher.find()) {
                result = defaultMatcher.group(group).trim();
            }
            getLog(result);
            return result;
        }
    
        /**
         * 获得可匹配对象
         * 
         * @param input
         * @param regex
         * @return
         */
        public static Matcher getMatcher(String input, String regex) {
            defaultPattern = getPattern(regex);
            defaultMatcher = defaultPattern.matcher(input);
            return defaultMatcher;
        }
    
        /**
         * 获得模式对象
         * 
         * @param regex
         * @return
         */
        public static Pattern getPattern(String regex) {
            defaultPattern = Pattern.compile(regex);
            return defaultPattern;
        }
    
        /**
         * 返回多行结果集
         * 
         * @param input
         * @param regex
         * @param group
         * @return
         */
        public static List<String> getStringList(String input, String regex,
                int group) {
            List<String> resultList = new ArrayList<String>();
            defaultMatcher = getMatcher(input, regex);
            while (defaultMatcher.find()) {
                resultList.add(defaultMatcher.group().trim());
            }
            if (resultList.size() < 1) {
                logger.error(NOT_MATCHER_DATA);
            }
            return resultList;
        }
    
        /**
         * 返回多行结果集
         * 
         * @param input
         * @param regex
         * @param group
         * @return
         */
        public static List<Integer> getIntList(String input, String regex, int group) {
            List<Integer> resultList = new ArrayList<Integer>();
            defaultMatcher = getMatcher(input, regex);
            while (defaultMatcher.find()) {
                resultList.add(Integer.parseInt(defaultMatcher.group().trim()));
            }
            if (resultList.size() < 1) {
                logger.error(NOT_MATCHER_DATA);
            }
            return resultList;
        }
    
        /**
         * 返回多行结果集
         * 
         * @param input
         * @param regex
         * @param group
         * @return
         */
        public static String getString(String input, String regex) {
            String result = " ";
            defaultMatcher = getMatcher(input, regex);
            while (defaultMatcher.find()) {
                result = defaultMatcher.group().trim();
            }
            getLog(result);
            return result;
        }
    
        /**
         * 返回单行结果集
         * 
         * @param input
         * @param regex
         * @param group
         * @return
         */
        public static int getInt(String input, String regex, int group) {
            int result = -1;
            defaultMatcher = getMatcher(input, regex);
            while (defaultMatcher.find()) {
                result = Integer.parseInt(defaultMatcher.group(group).trim());
            }
            getLog(result);
            return result;
        }
    
        /**
         * 返回单行结果集
         * 
         * @param input
         * @param regex
         * @param group
         * @return
         */
        public static int getInt(String input, String regex) {
            int result = -1;
            defaultMatcher = getMatcher(input, regex);
            while (defaultMatcher.find()) {
                result = Integer.parseInt(defaultMatcher.group().trim());
            }
            getLog(result);
            return result;
        }
    
        /**
         * 匹配中国邮政编码
         * 
         * @param postcode
         *            邮政编码
         * @return 验证成功返回true,验证失败返回false
         */
        public static boolean checkPostcode(String postcode) {
            String regex = "[1-9]\d{5}";
            return Pattern.matches(regex, postcode);
        }
    
        private static void getLog(String result) {
            if (result.trim().equals("")) {
                logger.error(NOT_MATCHER_DATA);
            }
        }
    
        private static void getLog(Integer result) {
    
            if (-1 == result) {
                logger.error(NOT_MATCHER_DATA);
            }
        }
    }
  • 相关阅读:
    开发JQuery插件标准结构
    JavaScript中的正则表达式
    IntelliJ IDEA 之Web的Facets/Artifacts(八)
    IntelliJ IDEA 快捷键(七)
    IntelliJ IDEA 集成各种插件(六)
    [转]VS2010几款超赞的扩展辅助工具总结
    创建和使用动态链接库(转)vs2008 vs2010
    springboot 自定义Repository
    springboot redis 缓存对象
    springboot 集成spring-session redis 实现分布式session
  • 原文地址:https://www.cnblogs.com/cphmvp/p/3588741.html
Copyright © 2020-2023  润新知