• 铭感词过滤工具类


    编写  SensitiveWordInit 类

    package com.JUtils.sensitiveword;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;
    import java.util.HashMap;
    import java.util.HashSet;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    
    /**
     *  初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
     *
     */
    public class SensitiveWordInit {
        private String ENCODING = "GBK";    //字符编码
        @SuppressWarnings("rawtypes")
        public HashMap sensitiveWordMap;
    
        SensitiveWordInit(){
            super();
        }
    
        /**
         * @version 1.0
         */
        @SuppressWarnings("rawtypes")
        Map initKeyWord(){
            try {
                //读取敏感词库
                Set<String> keyWordSet = readSensitiveWordFile();
                //将敏感词库加入到HashMap中
                addSensitiveWordToHashMap(keyWordSet);
            } catch (Exception e) {
                e.printStackTrace();
            }
            return sensitiveWordMap;
        }
    
        /**
         * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
         * 中 = {
         *      isEnd = 0
         *      国 = {<br>
         *           isEnd = 1
         *           人 = {isEnd = 0
         *                民 = {isEnd = 1}
         *                }
         *           男  = {
         *                  isEnd = 0
         *                   人 = {
         *                        isEnd = 1
         *                       }
         *               }
         *           }
         *      }
         *  五 = {
         *      isEnd = 0
         *      星 = {
         *          isEnd = 0
         *          红 = {
         *              isEnd = 0
         *              旗 = {
         *                   isEnd = 1
         *                  }
         *              }
         *          }
         *      }
         * @param keyWordSet  敏感词库
         * @version 1.0
         */
        @SuppressWarnings({ "rawtypes", "unchecked" })
        private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
            sensitiveWordMap = new HashMap(keyWordSet.size());     //初始化敏感词容器,减少扩容操作
            String key = null;
            Map nowMap = null;
            Map<String, String> newWorMap = null;
            //迭代keyWordSet
            Iterator<String> iterator = keyWordSet.iterator();
            while(iterator.hasNext()){
                key = iterator.next();    //关键字
                nowMap = sensitiveWordMap;
                for(int i = 0 ; i < key.length() ; i++){
                    char keyChar = key.charAt(i);       //转换成char型
                    Object wordMap = nowMap.get(keyChar);       //获取
    
                    if(wordMap != null){        //如果存在该key,直接赋值
                        nowMap = (Map) wordMap;
                    }
                    else{     //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
                        newWorMap = new HashMap<String,String>();
                        newWorMap.put("isEnd", "0");     //不是最后一个
                        nowMap.put(keyChar, newWorMap);
                        nowMap = newWorMap;
                    }
    
                    if(i == key.length() - 1){
                        nowMap.put("isEnd", "1");    //最后一个
                    }
                }
            }
        }
    
        /**
         * 读取敏感词库中的内容,将内容添加到set集合中
         * @return
         * @version 1.0
         * @throws Exception
         */
        @SuppressWarnings("resource")
        private Set<String> readSensitiveWordFile() throws Exception{
            Set<String> set = null;
    
            File file = new File("D:\SensitiveWord.txt");    //读取文件
            InputStreamReader read = new InputStreamReader(new FileInputStream(file),ENCODING);
            try {
                if(file.isFile() && file.exists()){      //文件流是否存在
                    set = new HashSet<String>();
                    BufferedReader bufferedReader = new BufferedReader(read);
                    String txt = null;
                    while((txt = bufferedReader.readLine()) != null){    //读取文件,将文件内容放入到set中
                        set.add(txt);
                    }
                }
                else{         //不存在抛出异常信息
                    throw new Exception("敏感词库文件不存在");
                }
            } catch (Exception e) {
                throw e;
            }finally{
                read.close();     //关闭文件流
            }
            return set;
        }
    }

    编写  SensitivewordFilterUtil 类

    package com.JUtils.sensitiveword;
    
    import java.util.HashSet;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    
    /**
     * 铭感词过滤工具类
     *
     */
    public class SensitivewordFilterUtil{
        @SuppressWarnings("rawtypes")
        private Map sensitiveWordMap = null;
        public static int minMatchTYpe = 1;      //最小匹配规则
        public static int maxMatchType = 2;      //最大匹配规则
    
        /**
         * 构造函数,初始化敏感词库
         */
        public SensitivewordFilterUtil(){
            sensitiveWordMap = new SensitiveWordInit().initKeyWord();
        }
    
        /**
         * 判断文字是否包含敏感字符
         * @param txt  文字
         * @param matchType  匹配规则&nbsp;1:最小匹配规则,2:最大匹配规则
         * @return 若包含返回true,否则返回false
         * @version 1.0
         */
        public boolean isContaintSensitiveWord(String txt,int matchType){
            boolean flag = false;
            for(int i = 0 ; i < txt.length() ; i++){
                int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符
                if(matchFlag > 0){    //大于0存在,返回true
                    flag = true;
                }
            }
            return flag;
        }
    
        /**
         * 获取文字中的敏感词
         * @param txt 文字
         * @param matchType 匹配规则&nbsp;1:最小匹配规则,2:最大匹配规则
         * @return
         * @version 1.0
         */
        public Set<String> getSensitiveWord(String txt , int matchType){
            Set<String> sensitiveWordList = new HashSet<String>();
    
            for(int i = 0 ; i < txt.length() ; i++){
                int length = CheckSensitiveWord(txt, i, matchType);    //判断是否包含敏感字符
                if(length > 0){    //存在,加入list中
                    sensitiveWordList.add(txt.substring(i, i+length));
                    i = i + length - 1;    //减1的原因,是因为for会自增
                }
            }
    
            return sensitiveWordList;
        }
    
        /**
         * 替换敏感字字符
         * @param txt
         * @param matchType
         * @param replaceChar 替换字符,默认*
         * @version 1.0
         */
        public String replaceSensitiveWord(String txt,int matchType,String replaceChar){
            String resultTxt = txt;
            Set<String> set = getSensitiveWord(txt, matchType);     //获取所有的敏感词
            Iterator<String> iterator = set.iterator();
            String word = null;
            String replaceString = null;
            while (iterator.hasNext()) {
                word = iterator.next();
                replaceString = getReplaceChars(replaceChar, word.length());
                resultTxt = resultTxt.replaceAll(word, replaceString);
            }
    
            return resultTxt;
        }
    
        /**
         * 获取替换字符串
         * @param replaceChar
         * @param length
         * @return
         * @version 1.0
         */
        private String getReplaceChars(String replaceChar,int length){
            String resultReplace = replaceChar;
            for(int i = 1 ; i < length ; i++){
                resultReplace += replaceChar;
            }
    
            return resultReplace;
        }
    
        /**
         * 检查文字中是否包含敏感字符,检查规则如下:<br>
         * @param txt
         * @param beginIndex
         * @param matchType
         * @return,如果存在,则返回敏感词字符的长度,不存在返回0
         * @version 1.0
         */
        @SuppressWarnings({ "rawtypes"})
        public int CheckSensitiveWord(String txt,int beginIndex,int matchType){
            boolean  flag = false;    //敏感词结束标识位:用于敏感词只有1位的情况
            int matchFlag = 0;     //匹配标识数默认为0
            char word = 0;
            Map nowMap = sensitiveWordMap;
            for(int i = beginIndex; i < txt.length() ; i++){
                word = txt.charAt(i);
                nowMap = (Map) nowMap.get(word);     //获取指定key
                if(nowMap != null){     //存在,则判断是否为最后一个
                    matchFlag++;     //找到相应key,匹配标识+1
                    if("1".equals(nowMap.get("isEnd"))){       //如果为最后一个匹配规则,结束循环,返回匹配标识数
                        flag = true;       //结束标志位为true
                        if(SensitivewordFilterUtil.minMatchTYpe == matchType){    //最小规则,直接返回,最大规则还需继续查找
                            break;
                        }
                    }
                }
                else{     //不存在,直接返回
                    break;
                }
            }
            if(matchFlag < 2 || !flag){        //长度必须大于等于1,为词
                matchFlag = 0;
            }
            return matchFlag;
        }
    }
  • 相关阅读:
    JVM 规范
    通过jmap查看jvm采用的垃圾收集器
    Nginx做前端Proxy时TIME_WAIT过多的问题
    nginx访问http自动跳转到https
    mysql5.7启动slave报错 ERROR 1872 (HY000): Slave failed to initialize relay log info structure from the repository
    nginx检查报错 error while loading shared libraries: libprofiler.so.0: cannot open shared object file: No such file or directory
    Nginx+Center OS 7.2 开机启动设置(转载)
    windows下安装pycharm并连接Linux的python环境
    jenkins结合脚本实现代码自动化部署及一键回滚至上一版本
    centos7-安装mysql5.6.36
  • 原文地址:https://www.cnblogs.com/JonaLin/p/11278334.html
Copyright © 2020-2023  润新知