• Hash算法


     高性能的Hash算法对我们的应用程序无疑是至关重要的。以下几种Hash的性能很不俗,记录在这里。

    1. MurMurHash算法

      MurmurHash 是一种非加密哈希函数,适用于一般的哈希检索操作。 由Austin Appleby在2008年发明,并出现了多个变种,都已经发布到了公有领域(public domain)。与其它流行的哈希函数相比,对于规律性较强的key,MurmurHash的随机分布特征表现更良好。

      以下是MurmurHash官方性能图

      

    以下是官方的算法实现,随手摘来了。

    public class Murmur3
    {
        // 128 bit output, 64 bit platform version
     
        public static ulong READ_SIZE = 16;
        private static ulong C1 = 0x87c37b91114253d5L;
        private static ulong C2 = 0x4cf5ad432745937fL;
     
        private ulong length;
        private uint seed; // if want to start with a seed, create a constructor
        ulong h1;
        ulong h2;
     
        private void MixBody(ulong k1, ulong k2)
        {
            h1 ^= MixKey1(k1);
     
            h1 = h1.RotateLeft(27);
            h1 += h2;
            h1 = h1 * 5 + 0x52dce729;
     
            h2 ^= MixKey2(k2);
     
            h2 = h2.RotateLeft(31);
            h2 += h1;
            h2 = h2 * 5 + 0x38495ab5;
        }
     
        private static ulong MixKey1(ulong k1)
        {
            k1 *= C1;
            k1 = k1.RotateLeft(31);
            k1 *= C2;
            return k1;
        }
     
        private static ulong MixKey2(ulong k2)
        {
            k2 *= C2;
            k2 = k2.RotateLeft(33);
            k2 *= C1;
            return k2;
        }
     
        private static ulong MixFinal(ulong k)
        {
            // avalanche bits
     
            k ^= k >> 33;
            k *= 0xff51afd7ed558ccdL;
            k ^= k >> 33;
            k *= 0xc4ceb9fe1a85ec53L;
            k ^= k >> 33;
            return k;
        }
     
        public byte[] ComputeHash(byte[] bb)
        {
            ProcessBytes(bb);
            return Hash;
        }
     
        private void ProcessBytes(byte[] bb)
        {
            h1 = seed;
            this.length = 0L;
     
            int pos = 0;
            ulong remaining = (ulong)bb.Length;
     
            // read 128 bits, 16 bytes, 2 longs in eacy cycle
            while (remaining >= READ_SIZE)
            {
                ulong k1 = bb.GetUInt64(pos);
                pos += 8;
     
                ulong k2 = bb.GetUInt64(pos);
                pos += 8;
     
                length += READ_SIZE;
                remaining -= READ_SIZE;
     
                MixBody(k1, k2);
            }
     
            // if the input MOD 16 != 0
            if (remaining > 0)
                ProcessBytesRemaining(bb, remaining, pos);
        }
     
        private void ProcessBytesRemaining(byte[] bb, ulong remaining, int pos)
        {
            ulong k1 = 0;
            ulong k2 = 0;
            length += remaining;
     
            // little endian (x86) processing
            switch (remaining)
            {
                case 15:
                    k2 ^= (ulong)bb[pos + 14] << 48; // fall through
                    goto case 14;
                case 14:
                    k2 ^= (ulong)bb[pos + 13] << 40; // fall through
                    goto case 13;
                case 13:
                    k2 ^= (ulong)bb[pos + 12] << 32; // fall through
                    goto case 12;
                case 12:
                    k2 ^= (ulong)bb[pos + 11] << 24; // fall through
                    goto case 11;
                case 11:
                    k2 ^= (ulong)bb[pos + 10] << 16; // fall through
                    goto case 10;
                case 10:
                    k2 ^= (ulong)bb[pos + 9] << 8; // fall through
                    goto case 9;
                case 9:
                    k2 ^= (ulong)bb[pos + 8]; // fall through
                    goto case 8;
                case 8:
                    k1 ^= bb.GetUInt64(pos);
                    break;
                case 7:
                    k1 ^= (ulong)bb[pos + 6] << 48; // fall through
                    goto case 6;
                case 6:
                    k1 ^= (ulong)bb[pos + 5] << 40; // fall through
                    goto case 5;
                case 5:
                    k1 ^= (ulong)bb[pos + 4] << 32; // fall through
                    goto case 4;
                case 4:
                    k1 ^= (ulong)bb[pos + 3] << 24; // fall through
                    goto case 3;
                case 3:
                    k1 ^= (ulong)bb[pos + 2] << 16; // fall through
                    goto case 2;
                case 2:
                    k1 ^= (ulong)bb[pos + 1] << 8; // fall through
                    goto case 1;
                case 1:
                    k1 ^= (ulong)bb[pos]; // fall through
                    break;
                default:
                    throw new Exception("Something went wrong with remaining bytes calculation.");
            }
     
            h1 ^= MixKey1(k1);
            h2 ^= MixKey2(k2);
        }
     
        public byte[] Hash
        {
            get
            {
                h1 ^= length;
                h2 ^= length;
     
                h1 += h2;
                h2 += h1;
     
                h1 = Murmur3.MixFinal(h1);
                h2 = Murmur3.MixFinal(h2);
     
                h1 += h2;
                h2 += h1;
     
                var hash = new byte[Murmur3.READ_SIZE];
     
                Array.Copy(BitConverter.GetBytes(h1), 0, hash, 0, 8);
                Array.Copy(BitConverter.GetBytes(h2), 0, hash, 8, 8);
     
                return hash;
            }
        }
    }
    

     使用方法:

    public static class IntHelpers
    {
        public static ulong RotateLeft(this ulong original, int bits)
        {
            return (original << bits) | (original >> (64 - bits));
        }
     
        public static ulong RotateRight(this ulong original, int bits)
        {
            return (original >> bits) | (original << (64 - bits));
        }
     
        unsafe public static ulong GetUInt64(this byte[] bb, int pos)
        {
            // we only read aligned longs, so a simple casting is enough
            fixed (byte* pbyte = &bb[pos])
            {
                return *((ulong*)pbyte);
            }
        }
    }
    
  • 相关阅读:
    03_02_leetcode_24_删除有序数组的重复项
    03_02_leetcode_141_环形链表
    03_02_leetcode_24_两两交换链表中的结点
    Solution -「ZJOI 2014」力
    Solution -「GXOI / GZOI 2019」宝牌一大堆
    Solution -「CSP 2019」Centroid
    Solution -「CSP 2019」Partition
    Note -「Suffix Automaton」SAM
    Solution -「BalticOI 2004」Sequence
    Solution -「BZOJ 3779」重组病毒
  • 原文地址:https://www.cnblogs.com/rainnight/p/3475514.html
Copyright © 2020-2023  润新知