• NET脏字过滤算法 收藏



    方法一:使用正则表达式


     1//脏字典数据存放文件路径
     2        private static string FILE_NAME="zang.txt";
     3        //脏数据字典表,如:脏数据一|脏数据二|脏数据三
     4        public static string dirtyStr="";
     5       
     6        public ValidDirty()
     7        {
     8            if (HttpRuntime.Cache["Regex"]==null)
     9            {
    10                dirtyStr=ReadDic();
    11                //用于检测脏字典的正则表达式
    12                Regex validateReg= new Regex("^((?!"+dirtyStr+").(?<!"+dirtyStr+"))*$",RegexOptions.Compiled|RegexOptions.ExplicitCapture);   
    13                HttpRuntime.Cache.Insert("Regex" ,validateReg,null,DateTime.Now.AddMinutes(20) ,TimeSpan.Zero);
    14            }
    15           
    16        }
    17        private string ReadDic()
    18        {
    19            FILE_NAME=Environment.CurrentDirectory+"http://www.cnblogs.com/hsapphire/admin/file://%22+file_name/;
    20
    21            if (!File.Exists(FILE_NAME))
    22            {
    23                Console.WriteLine("{0} does not exist.", FILE_NAME);
    24                return "";
    25            }
    26            StreamReader sr = File.OpenText(FILE_NAME);
    27            String input="";
    28            while (sr.Peek() > -1)
    29            {
    30                input += sr.ReadLine() ;
    31            }
    32           
    33            sr.Close();
    34            return input;
    35
    36        }
    37
    38       
    39        public bool ValidByReg(string str)
    40        {
    41            Regex reg=(Regex)HttpRuntime.Cache["Regex"];
    42            return reg.IsMatch(str) ;
    43           
    44        }


    感觉这种方法的执行效率不是很高,简单的测试了一下 1000字的文章,脏字典有800多个关键字
    式了一下是 1.238秒,大家有没有更好的方法,请不吝赐教!

    方法二:普通循环查找方法


        public bool ValidGeneral(string str)
            {
               
                if(!File.Exists(FILE_NAME))
                {
                    Console.WriteLine("文件路径或者文件路径不存在错误信息") ;
                    return false;
                }
                else
                {
                    StreamReader objReader = new StreamReader(FILE_NAME,System.Text.Encoding.GetEncoding("gb2312"));
                    string sLine="";
                    ArrayList arrText = new ArrayList();

                    while (sLine != null)
                    {
                        sLine = objReader.ReadLine();
                        if (sLine != null)
                            arrText.Add(sLine);
                       
                    }
                    objReader.Close();


                    foreach (string sOutput in arrText)
                    {
                        string[] strArr=sOutput.Split('|');
                       
                        for (int i = 0; i < strArr.Length; i++)
                        {
                            if (str.IndexOf(strArr[i])!=-1)
                            {
                                return false;   
                            }
                           
                        }
                       
                    }
                    return true;

                }

            }


    以下是测试的方法,有什么问题还大家请指出!


     1DateTime t1 =DateTime.Now;
     2            string str="213";
     3            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
     4            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
     5            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
     6            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
     7            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
     8            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
     9            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    10            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    11            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    12            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";   
    13            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    14            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    15            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    16            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    17            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    18            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    19            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    20            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    21            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    22            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    23            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    24            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    25            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    26            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    27            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    28            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    29            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    30            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    31            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    32            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    33            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    34            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    35            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    36            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    37            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    38            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    39            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    40            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    41            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    42            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    43            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    44            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    45            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    46            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    47            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    48            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    49            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    50            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    51            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    52            str+="珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋珍惜水晶之恋";
    53            ValidDirty vd=new ValidDirty() ;
    54            Console.WriteLine(vd.ValidByReg(str))  ;
    55            DateTime t2 =DateTime.Now;
    56            TimeSpan ts=t2-t1;
    57            Console.WriteLine(ts.TotalMilliseconds) ;
    58            Console.Read() ;

    算法
     检索文本文件长度 / 耗费时间(ms)
     
    正则算法
     10个汉字/ 980
     100个汉字/999
     1000个汉字/1234
     
    普通算法
     10个汉字/ 234
     100个汉字/234
     1000个汉字/265
     


    脏字典下载

     


    作者:水木    
     
  • 相关阅读:
    leetcode 309. Best Time to Buy and Sell Stock with Cooldown
    leetcode 714. Best Time to Buy and Sell Stock with Transaction Fee
    leetcode 32. Longest Valid Parentheses
    leetcode 224. Basic Calculator
    leetcode 540. Single Element in a Sorted Array
    leetcode 109. Convert Sorted List to Binary Search Tree
    leetcode 3. Longest Substring Without Repeating Characters
    leetcode 84. Largest Rectangle in Histogram
    leetcode 338. Counting Bits
    git教程之回到过去,版本对比
  • 原文地址:https://www.cnblogs.com/hsapphire/p/1569001.html
Copyright © 2020-2023  润新知