• .net2.0 非法关键字过滤算法


    偶尔在网上看到这一篇文章,有可能会用到此东西,暂时记下来,以后用的上时再研究

    .net 2.0 不支持  HashSet ,需实现此类,比HashTab速度快一些

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Runtime.Serialization;

    public class HashSet<T> : ICollection<T>, ISerializable, IDeserializationCallback
    {
    private readonly Dictionary<T, object> dict;

    public HashSet()
    {
    dict
    = new Dictionary<T, object>();
    }

    public HashSet(IEnumerable<T> items)
    :
    this()
    {
    if (items == null)
    {
    return;
    }

    foreach (T item in items)
    {
    Add(item);
    }
    }

    public HashSet<T> NullSet { get { return new HashSet<T>(); } }

    #region ICollection<T> Members

    public void Add(T item)
    {
    if (null == item)
    {
    throw new ArgumentNullException("item");
    }

    dict[item]
    = null;
    }

    /// <summary>
    /// Removes all items from the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <exception cref="T:System.NotSupportedException">The <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only. </exception>
    public void Clear()
    {
    dict.Clear();
    }

    public bool Contains(T item)
    {
    return dict.ContainsKey(item);
    }

    /// <summary>
    /// Copies the items of the <see cref="T:System.Collections.Generic.ICollection`1"/> to an <see cref="T:System.Array"/>, starting at a particular <see cref="T:System.Array"/> index.
    /// </summary>
    /// <param name="array">The one-dimensional <see cref="T:System.Array"/> that is the destination of the items copied from <see cref="T:System.Collections.Generic.ICollection`1"/>. The <see cref="T:System.Array"/> must have zero-based indexing.</param><param name="arrayIndex">The zero-based index in <paramref name="array"/> at which copying begins.</param><exception cref="T:System.ArgumentNullException"><paramref name="array"/> is null.</exception><exception cref="T:System.ArgumentOutOfRangeException"><paramref name="arrayIndex"/> is less than 0.</exception><exception cref="T:System.ArgumentException"><paramref name="array"/> is multidimensional.-or-<paramref name="arrayIndex"/> is equal to or greater than the length of <paramref name="array"/>.-or-The number of items in the source <see cref="T:System.Collections.Generic.ICollection`1"/> is greater than the available space from <paramref name="arrayIndex"/> to the end of the destination <paramref name="array"/>.-or-Type T cannot be cast automatically to the type of the destination <paramref name="array"/>.</exception>
    public void CopyTo(T[] array, int arrayIndex)
    {
    if (array == null) throw new ArgumentNullException("array");
    if (arrayIndex < 0 || arrayIndex >= array.Length || arrayIndex >= Count)
    {
    throw new ArgumentOutOfRangeException("arrayIndex");
    }

    dict.Keys.CopyTo(array, arrayIndex);
    }

    /// <summary>
    /// Removes the first occurrence of a specific object from the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <returns>
    /// true if <paramref name="item"/> was successfully removed from the <see cref="T:System.Collections.Generic.ICollection`1"/>; otherwise, false. This method also returns false if <paramref name="item"/> is not found in the original <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </returns>
    /// <param name="item">The object to remove from the <see cref="T:System.Collections.Generic.ICollection`1"/>.</param><exception cref="T:System.NotSupportedException">The <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.</exception>
    public bool Remove(T item)
    {
    return dict.Remove(item);
    }

    /// <summary>
    /// Gets the number of items contained in the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <returns>
    /// The number of items contained in the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </returns>
    public int Count
    {
    get { return dict.Count; }
    }

    /// <summary>
    /// Gets a value indicating whether the <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.
    /// </summary>
    /// <returns>
    /// true if the <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only; otherwise, false.
    /// </returns>
    public bool IsReadOnly
    {
    get
    {
    return false;
    }
    }

    #endregion

    public HashSet<T> Union(HashSet<T> set)
    {
    HashSet
    <T> unionSet = new HashSet<T>(this);

    if (null == set)
    {
    return unionSet;
    }

    foreach (T item in set)
    {
    if (unionSet.Contains(item))
    {
    continue;
    }

    unionSet.Add(item);
    }

    return unionSet;
    }

    public HashSet<T> Subtract(HashSet<T> set)
    {
    HashSet
    <T> subtractSet = new HashSet<T>(this);

    if (null == set)
    {
    return subtractSet;
    }

    foreach (T item in set)
    {
    if (!subtractSet.Contains(item))
    {
    continue;
    }

    subtractSet.dict.Remove(item);
    }

    return subtractSet;
    }

    public bool IsSubsetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    foreach (T item in this)
    {
    if (!setToCompare.Contains(item))
    {
    return false;
    }
    }

    return true;
    }

    public HashSet<T> Intersection(HashSet<T> set)
    {
    HashSet
    <T> intersectionSet = NullSet;

    if (null == set)
    {
    return intersectionSet;
    }

    foreach (T item in this)
    {
    if (!set.Contains(item))
    {
    continue;
    }

    intersectionSet.Add(item);
    }

    foreach (T item in set)
    {
    if (!Contains(item) || intersectionSet.Contains(item))
    {
    continue;
    }

    intersectionSet.Add(item);
    }

    return intersectionSet;
    }

    public bool IsProperSubsetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    // A is a proper subset of a if the b is a subset of a and a != b
    return (IsSubsetOf(setToCompare) && !setToCompare.IsSubsetOf(this));
    }

    public bool IsSupersetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    foreach (T item in setToCompare)
    {
    if (!Contains(item))
    {
    return false;
    }
    }

    return true;
    }

    public bool IsProperSupersetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    // B is a proper superset of a if b is a superset of a and a != b
    return (IsSupersetOf(setToCompare) && !setToCompare.IsSupersetOf(this));
    }

    public List<T> ToList()
    {
    return new List<T>(this);
    }

    #region Implementation of ISerializable

    /// <summary>
    /// Populates a <see cref="T:System.Runtime.Serialization.SerializationInfo"/> with the data needed to serialize the target object.
    /// </summary>
    /// <param name="info">The <see cref="T:System.Runtime.Serialization.SerializationInfo"/> to populate with data. </param><param name="context">The destination (see <see cref="T:System.Runtime.Serialization.StreamingContext"/>) for this serialization. </param><exception cref="T:System.Security.SecurityException">The caller does not have the required permission. </exception>
    public void GetObjectData(SerializationInfo info, StreamingContext context)
    {
    if (info == null) throw new ArgumentNullException("info");
    dict.GetObjectData(info, context);
    }

    #endregion

    #region Implementation of IDeserializationCallback

    /// <summary>
    /// Runs when the entire object graph has been deserialized.
    /// </summary>
    /// <param name="sender">The object that initiated the callback. The functionality for this parameter is not currently implemented. </param>
    public void OnDeserialization(object sender)
    {
    dict.OnDeserialization(sender);
    }

    #endregion

    #region Implementation of IEnumerable

    /// <summary>
    /// Returns an enumerator that iterates through the collection.
    /// </summary>
    /// <returns>
    /// A <see cref="T:System.Collections.Generic.IEnumerator`1"/> that can be used to iterate through the collection.
    /// </returns>
    /// <filterpriority>1</filterpriority>
    public IEnumerator<T> GetEnumerator()
    {
    return dict.Keys.GetEnumerator();
    }

    /// <summary>
    /// Returns an enumerator that iterates through a collection.
    /// </summary>
    /// <returns>
    /// An <see cref="T:System.Collections.IEnumerator"/> object that can be used to iterate through the collection.
    /// </returns>
    /// <filterpriority>2</filterpriority>
    IEnumerator IEnumerable.GetEnumerator()
    {
    return GetEnumerator();
    }

    #endregion
    }

    以下为过滤算法

    public class BadWordsFilter
    {
    private HashSet<string> hash = new HashSet<string>();
    private byte[] fastCheck = new byte[char.MaxValue];
    private byte[] fastLength = new byte[char.MaxValue];
    private BitArray charCheck = new BitArray(char.MaxValue);
    private BitArray endCheck = new BitArray(char.MaxValue);
    private int maxWordLength = 0;
    private int minWordLength = int.MaxValue;

    public BadWordsFilter()
    {

    }

    public void Init(string[] badwords)
    {
    foreach (string word in badwords)
    {
    maxWordLength
    = Math.Max(maxWordLength, word.Length);
    minWordLength
    = Math.Min(minWordLength, word.Length);

    for (int i = 0; i < 7 && i < word.Length; i++)
    {
    fastCheck[word[i]]
    |= (byte)(1 << i);
    }

    for (int i = 7; i < word.Length; i++)
    {
    fastCheck[word[i]]
    |= 0x80;
    }

    if (word.Length == 1)
    {
    charCheck[word[
    0]] = true;
    }
    else
    {
    fastLength[word[
    0]] |= (byte)(1 << (Math.Min(7, word.Length - 2)));
    endCheck[word[word.Length
    - 1]] = true;

    hash.Add(word);
    }
    }
    }

    public string Filter(string text, string mask)
    {
    throw new NotImplementedException();
    }

    public bool HasBadWord(string text)
    {
    int index = 0;

    while (index < text.Length)
    {
    int count = 1;

    if (index > 0 || (fastCheck[text[index]] & 1) == 0)
    {
    while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
    }

    char begin = text[index];

    if (minWordLength == 1 && charCheck[begin])
    {
    return true;
    }

    for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
    {
    char current = text[index + j];

    if ((fastCheck[current] & 1) == 0)
    {
    ++count;
    }

    if ((fastCheck[current] & (1 << Math.Min(j, 7))) == 0)
    {
    break;
    }

    if (j + 1 >= minWordLength)
    {
    if ((fastLength[begin] & (1 << Math.Min(j - 1, 7))) > 0 && endCheck[current])
    {
    string sub = text.Substring(index, j + 1);

    if (hash.Contains(sub))
    {
    return true;
    }
    }
    }
    }

    index
    += count;
    }

    return false;
    }
    }
    }

  • 相关阅读:
    学习人人都是产品经理03
    Excel 自增
    axios中get/post请求方式
    vue本地跨域实现与原理
    于无声处听惊雷Deepin成长小记
    .NET桌面程序混合开发之三:WebView2与JS的深度应用
    跨平台系统的开发方案整理
    .NET桌面程序混合开发之二:在原生WinFrom程序中使用WebView2
    双系统升级Win11小记.md
    .NET桌面程序混合开发之一:Winform+H5,WebView2概览
  • 原文地址:https://www.cnblogs.com/qingyi/p/1977494.html
Copyright © 2020-2023  润新知