• 简单的不雅词语过滤类


            在论坛或一些公共的地方, 经常要对客户提交的文本进行过滤,我们可以使用以下这种方法来实现:

    /// <summary>
    /// Censor 
    /// </summary>
    /// <remarks>http://wintersun.cnblogs.com</remarks>
    public class Censor
    {
        /// <summary>
        /// Gets or sets the censored words.
        /// </summary>
        /// <value>The censored words.</value>
        public IList<string> CensoredWords { get; private set; }
    
        /// <summary>
        /// Initializes a new instance of the <see cref="Censor"/> class.
        /// </summary>
        /// <param name="censoredWords">The censored words.</param>
        public Censor(IEnumerable<string> censoredWords)
        {
            if (censoredWords == null)
                throw new ArgumentNullException("censoredWords");
    
            CensoredWords = new List<string>(censoredWords);
        }
    
        /// <summary>
        /// Censors the text.
        /// </summary>
        /// <param name="text">The text.</param>
        /// <returns>CensorText</returns>
        public string CensorText(string text)
        {
            if (string.IsNullOrEmpty(text))
                throw new ArgumentNullException("text");
    
            string censoredText = text;
    
            foreach (string censoredWord in CensoredWords)
            {
                string regularExpression = ToRegexPattern(censoredWord);
    
                censoredText = Regex.Replace(censoredText, regularExpression, StarCensoredMatch,
                  RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
            }
    
            return censoredText;
        }
    
        /// <summary>
        /// Toes the regex pattern.
        /// </summary>
        /// <param name="wildcardSearch">The wildcard search.</param>
        /// <returns></returns>
        private string ToRegexPattern(string wildcardSearch)
        {
            string regexPattern = Regex.Escape(wildcardSearch);
    
            regexPattern = regexPattern.Replace(@"\*", ".*?");
            regexPattern = regexPattern.Replace(@"\?", ".");
    
            if (regexPattern.StartsWith(".*?"))
            {
                regexPattern = regexPattern.Substring(3);
                regexPattern = @"(^\b)*?" + regexPattern;
            }
    
            regexPattern = @"\b" + regexPattern + @"\b";
    
            return regexPattern;
        }
    
        /// <summary>
        /// Stars the censored match.
        /// </summary>
        /// <param name="m">The m.</param>
        /// <returns></returns>
        private static string StarCensoredMatch(Match m)
        {
            string word = m.Captures[0].Value;
    
            return new string('*', word.Length);
        }
    }

    好的,接着来看UnitTest:

    /// <summary>
    /// Censors the text test.
    /// </summary>
    /// <remarks>http://wintersun.cnblogs.com</remarks>
    [Test]
    public void CensorTextTest()
    {
        //arrange
        IList<string> censoredWords = new List<string>
        {
          "gosh",
          "drat",
          "darn*",
          "*fuck*",
          "ass hole"
        };
    
        Censor censor = new Censor(censoredWords);
        string result = string.Empty;
    
        //act
        result = censor.CensorText("I stubbed my toe. Gosh it hurts!");
        //assert
        Assert.AreEqual("I stubbed my toe. **** it hurts!", result);
    
        result = censor.CensorText("The midrate on the USD -> EUR forex trade has soured my day. Drat!");
        Assert.AreEqual("The midrate on the USD -> EUR forex trade has soured my day. ****!", result);
    
        result = censor.CensorText("Gosh darnit, my shoe laces are undone.fuck you ass hole.");
        Assert.AreEqual("**** ******, my shoe laces are undone.**** you ********.", result);
    }

    关于那个censoredWords,你可以从一个文本文件读出(File.GetAllLines),或使用其它数据源xml,DB.
    随你了,希望这篇POST对您有帮助.

    Author:Petter Liu   http://wintersun.cnblogs.com

  • 相关阅读:
    线性判别分析(LDA)
    奇异值分解(SVD)
    傅里叶变换
    SVM 之非线性支持向量机
    三角函数常用公式
    协方差、样本协方差和协方差矩阵
    方差和样本方差
    SVM 之线性支持向量机
    参考资料汇总
    QObject: Cannot create children for a parent that is in a different thread
  • 原文地址:https://www.cnblogs.com/wintersun/p/1517154.html
Copyright © 2020-2023  润新知