• .net2.0 非法关键字过滤算法


    偶尔在网上看到这一篇文章,有可能会用到此东西,暂时记下来,以后用的上时再研究

    .net 2.0 不支持  HashSet ,需实现此类,比HashTab速度快一些

    using System;
    using System.Collections;
    using System.Collections.Generic;
    using System.Runtime.Serialization;

    public class HashSet<T> : ICollection<T>, ISerializable, IDeserializationCallback
    {
    private readonly Dictionary<T, object> dict;

    public HashSet()
    {
    dict
    = new Dictionary<T, object>();
    }

    public HashSet(IEnumerable<T> items)
    :
    this()
    {
    if (items == null)
    {
    return;
    }

    foreach (T item in items)
    {
    Add(item);
    }
    }

    public HashSet<T> NullSet { get { return new HashSet<T>(); } }

    #region ICollection<T> Members

    public void Add(T item)
    {
    if (null == item)
    {
    throw new ArgumentNullException("item");
    }

    dict[item]
    = null;
    }

    /// <summary>
    /// Removes all items from the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <exception cref="T:System.NotSupportedException">The <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only. </exception>
    public void Clear()
    {
    dict.Clear();
    }

    public bool Contains(T item)
    {
    return dict.ContainsKey(item);
    }

    /// <summary>
    /// Copies the items of the <see cref="T:System.Collections.Generic.ICollection`1"/> to an <see cref="T:System.Array"/>, starting at a particular <see cref="T:System.Array"/> index.
    /// </summary>
    /// <param name="array">The one-dimensional <see cref="T:System.Array"/> that is the destination of the items copied from <see cref="T:System.Collections.Generic.ICollection`1"/>. The <see cref="T:System.Array"/> must have zero-based indexing.</param><param name="arrayIndex">The zero-based index in <paramref name="array"/> at which copying begins.</param><exception cref="T:System.ArgumentNullException"><paramref name="array"/> is null.</exception><exception cref="T:System.ArgumentOutOfRangeException"><paramref name="arrayIndex"/> is less than 0.</exception><exception cref="T:System.ArgumentException"><paramref name="array"/> is multidimensional.-or-<paramref name="arrayIndex"/> is equal to or greater than the length of <paramref name="array"/>.-or-The number of items in the source <see cref="T:System.Collections.Generic.ICollection`1"/> is greater than the available space from <paramref name="arrayIndex"/> to the end of the destination <paramref name="array"/>.-or-Type T cannot be cast automatically to the type of the destination <paramref name="array"/>.</exception>
    public void CopyTo(T[] array, int arrayIndex)
    {
    if (array == null) throw new ArgumentNullException("array");
    if (arrayIndex < 0 || arrayIndex >= array.Length || arrayIndex >= Count)
    {
    throw new ArgumentOutOfRangeException("arrayIndex");
    }

    dict.Keys.CopyTo(array, arrayIndex);
    }

    /// <summary>
    /// Removes the first occurrence of a specific object from the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <returns>
    /// true if <paramref name="item"/> was successfully removed from the <see cref="T:System.Collections.Generic.ICollection`1"/>; otherwise, false. This method also returns false if <paramref name="item"/> is not found in the original <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </returns>
    /// <param name="item">The object to remove from the <see cref="T:System.Collections.Generic.ICollection`1"/>.</param><exception cref="T:System.NotSupportedException">The <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.</exception>
    public bool Remove(T item)
    {
    return dict.Remove(item);
    }

    /// <summary>
    /// Gets the number of items contained in the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <returns>
    /// The number of items contained in the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </returns>
    public int Count
    {
    get { return dict.Count; }
    }

    /// <summary>
    /// Gets a value indicating whether the <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.
    /// </summary>
    /// <returns>
    /// true if the <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only; otherwise, false.
    /// </returns>
    public bool IsReadOnly
    {
    get
    {
    return false;
    }
    }

    #endregion

    public HashSet<T> Union(HashSet<T> set)
    {
    HashSet
    <T> unionSet = new HashSet<T>(this);

    if (null == set)
    {
    return unionSet;
    }

    foreach (T item in set)
    {
    if (unionSet.Contains(item))
    {
    continue;
    }

    unionSet.Add(item);
    }

    return unionSet;
    }

    public HashSet<T> Subtract(HashSet<T> set)
    {
    HashSet
    <T> subtractSet = new HashSet<T>(this);

    if (null == set)
    {
    return subtractSet;
    }

    foreach (T item in set)
    {
    if (!subtractSet.Contains(item))
    {
    continue;
    }

    subtractSet.dict.Remove(item);
    }

    return subtractSet;
    }

    public bool IsSubsetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    foreach (T item in this)
    {
    if (!setToCompare.Contains(item))
    {
    return false;
    }
    }

    return true;
    }

    public HashSet<T> Intersection(HashSet<T> set)
    {
    HashSet
    <T> intersectionSet = NullSet;

    if (null == set)
    {
    return intersectionSet;
    }

    foreach (T item in this)
    {
    if (!set.Contains(item))
    {
    continue;
    }

    intersectionSet.Add(item);
    }

    foreach (T item in set)
    {
    if (!Contains(item) || intersectionSet.Contains(item))
    {
    continue;
    }

    intersectionSet.Add(item);
    }

    return intersectionSet;
    }

    public bool IsProperSubsetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    // A is a proper subset of a if the b is a subset of a and a != b
    return (IsSubsetOf(setToCompare) && !setToCompare.IsSubsetOf(this));
    }

    public bool IsSupersetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    foreach (T item in setToCompare)
    {
    if (!Contains(item))
    {
    return false;
    }
    }

    return true;
    }

    public bool IsProperSupersetOf(HashSet<T> set)
    {
    HashSet
    <T> setToCompare = set ?? NullSet;

    // B is a proper superset of a if b is a superset of a and a != b
    return (IsSupersetOf(setToCompare) && !setToCompare.IsSupersetOf(this));
    }

    public List<T> ToList()
    {
    return new List<T>(this);
    }

    #region Implementation of ISerializable

    /// <summary>
    /// Populates a <see cref="T:System.Runtime.Serialization.SerializationInfo"/> with the data needed to serialize the target object.
    /// </summary>
    /// <param name="info">The <see cref="T:System.Runtime.Serialization.SerializationInfo"/> to populate with data. </param><param name="context">The destination (see <see cref="T:System.Runtime.Serialization.StreamingContext"/>) for this serialization. </param><exception cref="T:System.Security.SecurityException">The caller does not have the required permission. </exception>
    public void GetObjectData(SerializationInfo info, StreamingContext context)
    {
    if (info == null) throw new ArgumentNullException("info");
    dict.GetObjectData(info, context);
    }

    #endregion

    #region Implementation of IDeserializationCallback

    /// <summary>
    /// Runs when the entire object graph has been deserialized.
    /// </summary>
    /// <param name="sender">The object that initiated the callback. The functionality for this parameter is not currently implemented. </param>
    public void OnDeserialization(object sender)
    {
    dict.OnDeserialization(sender);
    }

    #endregion

    #region Implementation of IEnumerable

    /// <summary>
    /// Returns an enumerator that iterates through the collection.
    /// </summary>
    /// <returns>
    /// A <see cref="T:System.Collections.Generic.IEnumerator`1"/> that can be used to iterate through the collection.
    /// </returns>
    /// <filterpriority>1</filterpriority>
    public IEnumerator<T> GetEnumerator()
    {
    return dict.Keys.GetEnumerator();
    }

    /// <summary>
    /// Returns an enumerator that iterates through a collection.
    /// </summary>
    /// <returns>
    /// An <see cref="T:System.Collections.IEnumerator"/> object that can be used to iterate through the collection.
    /// </returns>
    /// <filterpriority>2</filterpriority>
    IEnumerator IEnumerable.GetEnumerator()
    {
    return GetEnumerator();
    }

    #endregion
    }

    以下为过滤算法

    public class BadWordsFilter
    {
    private HashSet<string> hash = new HashSet<string>();
    private byte[] fastCheck = new byte[char.MaxValue];
    private byte[] fastLength = new byte[char.MaxValue];
    private BitArray charCheck = new BitArray(char.MaxValue);
    private BitArray endCheck = new BitArray(char.MaxValue);
    private int maxWordLength = 0;
    private int minWordLength = int.MaxValue;

    public BadWordsFilter()
    {

    }

    public void Init(string[] badwords)
    {
    foreach (string word in badwords)
    {
    maxWordLength
    = Math.Max(maxWordLength, word.Length);
    minWordLength
    = Math.Min(minWordLength, word.Length);

    for (int i = 0; i < 7 && i < word.Length; i++)
    {
    fastCheck[word[i]]
    |= (byte)(1 << i);
    }

    for (int i = 7; i < word.Length; i++)
    {
    fastCheck[word[i]]
    |= 0x80;
    }

    if (word.Length == 1)
    {
    charCheck[word[
    0]] = true;
    }
    else
    {
    fastLength[word[
    0]] |= (byte)(1 << (Math.Min(7, word.Length - 2)));
    endCheck[word[word.Length
    - 1]] = true;

    hash.Add(word);
    }
    }
    }

    public string Filter(string text, string mask)
    {
    throw new NotImplementedException();
    }

    public bool HasBadWord(string text)
    {
    int index = 0;

    while (index < text.Length)
    {
    int count = 1;

    if (index > 0 || (fastCheck[text[index]] & 1) == 0)
    {
    while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
    }

    char begin = text[index];

    if (minWordLength == 1 && charCheck[begin])
    {
    return true;
    }

    for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
    {
    char current = text[index + j];

    if ((fastCheck[current] & 1) == 0)
    {
    ++count;
    }

    if ((fastCheck[current] & (1 << Math.Min(j, 7))) == 0)
    {
    break;
    }

    if (j + 1 >= minWordLength)
    {
    if ((fastLength[begin] & (1 << Math.Min(j - 1, 7))) > 0 && endCheck[current])
    {
    string sub = text.Substring(index, j + 1);

    if (hash.Contains(sub))
    {
    return true;
    }
    }
    }
    }

    index
    += count;
    }

    return false;
    }
    }
    }

  • 相关阅读:
    geoserver发布地图服务WMTS
    geoserver发布地图服务WMS
    geoserver安装部署步骤
    arcgis api 3.x for js 入门开发系列十四最近设施点路径分析(附源码下载)
    arcgis api 3.x for js 入门开发系列十三地图最短路径分析(附源码下载)
    cesium 之自定义气泡窗口 infoWindow 后续优化篇(附源码下载)
    arcgis api 3.x for js 入门开发系列十二地图打印GP服务(附源码下载)
    arcgis api 3.x for js 入门开发系列十一地图统计图(附源码下载)
    arcgis api 3.x for js 入门开发系列十叠加 SHP 图层(附源码下载)
    arcgis api 3.x for js入门开发系列九热力图效果(附源码下载)
  • 原文地址:https://www.cnblogs.com/qingyi/p/1977494.html
Copyright © 2020-2023  润新知