public class BloomFilter
{
//种子个数
private int _seedcount ;
//内存大小,单位bits
private int _memsize;
//不同哈希函数的种子,一般应取质数
private static List<int> seeds = new List<int>{ 5, 37, 73, 31, 17, 47, 67, 29, 13, 19, 89, 43, 7, 53, 41, 61, 23, 71, 11, 79, 83, 59, 97 };
private BitArray bits;
//哈希函数对象
private List<SimpleHash> func = new List<SimpleHash>();
/// <summary>
///
/// </summary>
/// <param name="ncount">存储对象的数目</param>
/// <param name="correction">误差</param>
public BloomFilter(int ncount, double correction)
{
_seedcount = Convert.ToInt16(-0.7 * Math.Log(correction, Math.E) / (Math.Log(2, Math.E) * Math.Log(2, Math.E)));
_memsize = Convert.ToInt32(-ncount * Math.Log(correction, Math.E) / (Math.Log(2, Math.E) * Math.Log(2, Math.E)));
bits = new BitArray(_memsize);
List<int> nseeds = seeds.GetRange(0, _seedcount);
for (int i = 0; i < nseeds.Count; i++)
{
func.Add(new SimpleHash(_memsize, nseeds[i]));
}
}
// 将字符串标记到bits中
public void add(String value)
{
foreach (SimpleHash f in func)
{
bits.Set(f.hash(value), true);
}
}
//判断字符串是否已经被bits标记
public Boolean contains(String value)
{
if (value == null)
{
return false;
}
Boolean ret = true;
foreach (SimpleHash f in func)
{
ret = ret && bits.Get(f.hash(value));
}
return ret;
}
/* 哈希函数类 */
public class SimpleHash
{
private int size;
private int seed;
public SimpleHash(int size, int seed)
{
this.size = size;
this.seed = seed;
}
//hash函数,采用简单的加权和hash
public int hash(String value)
{
long hash = seed;
for (int i = 0; i < value.Length; i++)
{
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ value[i] ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ value[i] ^ (hash >> 5)));
}
}
unchecked
{
return size - Math.Abs((int)hash % (size / 2)) - 1;
}
}
}
}