在论坛或一些公共的地方, 经常要对客户提交的文本进行过滤,我们可以使用以下这种方法来实现:
/// <summary> /// Censor /// </summary> /// <remarks>http://wintersun.cnblogs.com</remarks> public class Censor { /// <summary> /// Gets or sets the censored words. /// </summary> /// <value>The censored words.</value> public IList<string> CensoredWords { get; private set; } /// <summary> /// Initializes a new instance of the <see cref="Censor"/> class. /// </summary> /// <param name="censoredWords">The censored words.</param> public Censor(IEnumerable<string> censoredWords) { if (censoredWords == null) throw new ArgumentNullException("censoredWords"); CensoredWords = new List<string>(censoredWords); } /// <summary> /// Censors the text. /// </summary> /// <param name="text">The text.</param> /// <returns>CensorText</returns> public string CensorText(string text) { if (string.IsNullOrEmpty(text)) throw new ArgumentNullException("text"); string censoredText = text; foreach (string censoredWord in CensoredWords) { string regularExpression = ToRegexPattern(censoredWord); censoredText = Regex.Replace(censoredText, regularExpression, StarCensoredMatch, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled); } return censoredText; } /// <summary> /// Toes the regex pattern. /// </summary> /// <param name="wildcardSearch">The wildcard search.</param> /// <returns></returns> private string ToRegexPattern(string wildcardSearch) { string regexPattern = Regex.Escape(wildcardSearch); regexPattern = regexPattern.Replace(@"\*", ".*?"); regexPattern = regexPattern.Replace(@"\?", "."); if (regexPattern.StartsWith(".*?")) { regexPattern = regexPattern.Substring(3); regexPattern = @"(^\b)*?" + regexPattern; } regexPattern = @"\b" + regexPattern + @"\b"; return regexPattern; } /// <summary> /// Stars the censored match. /// </summary> /// <param name="m">The m.</param> /// <returns></returns> private static string StarCensoredMatch(Match m) { string word = m.Captures[0].Value; return new string('*', word.Length); } }
/// <summary> /// Censors the text test. /// </summary> /// <remarks>http://wintersun.cnblogs.com</remarks> [Test] public void CensorTextTest() { //arrange IList<string> censoredWords = new List<string> { "gosh", "drat", "darn*", "*fuck*", "ass hole" }; Censor censor = new Censor(censoredWords); string result = string.Empty; //act result = censor.CensorText("I stubbed my toe. Gosh it hurts!"); //assert Assert.AreEqual("I stubbed my toe. **** it hurts!", result); result = censor.CensorText("The midrate on the USD -> EUR forex trade has soured my day. Drat!"); Assert.AreEqual("The midrate on the USD -> EUR forex trade has soured my day. ****!", result); result = censor.CensorText("Gosh darnit, my shoe laces are undone.fuck you ass hole."); Assert.AreEqual("**** ******, my shoe laces are undone.**** you ********.", result); }
关于那个censoredWords,你可以从一个文本文件读出(File.GetAllLines),或使用其它数据源xml,DB.
随你了,希望这篇POST对您有帮助.
Author:Petter Liu http://wintersun.cnblogs.com