• 不可小瞧的GetHashCode函数


    要实现对象的相等比较,需要实现IEquatable<T>,或单独写一个类实现IEqualityComparer<T>接口。

    像List<T>的Contains这样的函数,如果我们自己定义的对象不实现IEquatable<T>接口,这个函数会默认调用object的Equels来比较对象,得出非预期的结果。

    先自定义一个类:

            public class DaichoKey 
            {
                public int ID { get; set; }
                public int SubID { get; set; }
            }
    
    
                List<DaichoKey> lst = new List<DaichoKey>() { 
                new DaichoKey(){ID = 1,SubID =2},
                new DaichoKey(){ID = 1,SubID = 3}
                };            
                var newItem = new DaichoKey() { ID = 1, SubID = 2 };
                bool isContains = lst.Contains(newItem);//false
    
    

     上面的代码调用Contains后得到false,我们预想1和2的对象都已经存在了,应该得到true才对呀。

    要实现这个效果,需要实现IEquatable<T>接口。

            public class DaichoKey : IEquatable<DaichoKey>
            {
                public int ID { get; set; }
                public int SubID { get; set; }
    
                public bool Equals(DaichoKey other)
                {
                    return this.ID == other.ID && this.SubID == other.SubID;
                }
            }
    

    经过上面的改良,结果如我们预期了,但是还不够完善,微软建议我们重写object的Equels方法我GetHashCode方法,以保持语义的一致性,于是有了下面的代码:

            public class DaichoKey : IEquatable<DaichoKey>
            {
                public int ID { get; set; }
                public int SubID { get; set; }
    
                public bool Equals(DaichoKey other)
                {
                    return this.ID == other.ID && this.SubID == other.SubID;
                }
                public override bool Equals(object obj)
                {
                    if (obj == null) return base.Equals(obj);
    
                    if (obj is DaichoKey)
                        return Equals(obj as DaichoKey);
                    else
                        throw new InvalidCastException("the 'obj' Argument is not a DaichoKey object");
                }
                public override int GetHashCode()
                {
                    return base.GetHashCode();//return object's hashcode
                }
            }
    

     上面的代码依然还有缺陷,没重写==和!=运算符,但这不是本文讨论的重点。绕了一大圈,终于来到了GetHashCode函数身上,貌似他对我们的Contains函数没有啥影响呀,不重写又何妨?我们再来试试List<T>的一个扩展函数Distinct: 

                List<DaichoKey> lst = new List<DaichoKey>() { 
                new DaichoKey(){ID = 1,SubID =2},
                new DaichoKey(){ID = 1,SubID = 3}
                };
                var newItem = new DaichoKey() { ID = 1, SubID = 2 };
                lst.Add(newItem);
                if (lst != null)
                {
                    lst = lst.Distinct<DaichoKey>().ToList();
                }
                //result:
                //1 2
                //1 3
                //1 2
    

     悲剧发生了,数据1,2的重复数据没有被去掉呀,我们不是实现了IEquatable<T>接口接口吗。在园子上找到了一篇文章(c# 扩展方法奇思妙用基础篇八:Distinct 扩展),在回复中提到要将GetHashCode返回固定值,以强制调用IEquatable<T>的Equels方法。如下:

            public class DaichoKey : IEquatable<DaichoKey>
            {
                public int ID { get; set; }
                public int SubID { get; set; }
    
                public bool Equals(DaichoKey other)
                {
                    return this.ID == other.ID && this.SubID == other.SubID;
                }
                public override bool Equals(object obj)
                {
                    if (obj == null) return base.Equals(obj);
    
                    if (obj is DaichoKey)
                        return Equals(obj as DaichoKey);
                    else
                        throw new InvalidCastException("the 'obj' Argument is not a DaichoKey object");
                }
                public override int GetHashCode()
                {
                    return 0;//base.GetHashCode();
                }
            }
    

     结果立马就对了,难道是这个Distinct函数在比较时,先比较的HashCode值?

    带着这个疑问,反编译了下Distinct的代码,确实如我所猜测的那样。下面是源代码,有兴趣的同学,可以往下看看:

    public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source)
    {
        if (source == null) throw Error.ArgumentNull("source");
        return DistinctIterator<TSource>(source, null);
    }
    
     private static IEnumerable<TSource> DistinctIterator<TSource>(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
    {
        <DistinctIterator>d__81<TSource> d__ = new <DistinctIterator>d__81<TSource>(-2);
        d__.<>3__source = source;
        d__.<>3__comparer = comparer;
        return d__;
    }
    
     private sealed class <DistinctIterator>d__81<TSource> : IEnumerable<TSource>, IEnumerable, IEnumerator<TSource>, IEnumerator, IDisposable
    {
        // Fields
        private int <>1__state;
        private TSource <>2__current;
        public IEqualityComparer<TSource> <>3__comparer;
        public IEnumerable<TSource> <>3__source;
        public IEnumerator<TSource> <>7__wrap84;
        private int <>l__initialThreadId;
        public TSource <element>5__83;
        public Set<TSource> <set>5__82;
        public IEqualityComparer<TSource> comparer;
        public IEnumerable<TSource> source;
    
        // Methods
        [DebuggerHidden]
        public <DistinctIterator>d__81(int <>1__state);
        private void <>m__Finally85();
        private bool MoveNext();
        [DebuggerHidden]
        IEnumerator<TSource> IEnumerable<TSource>.GetEnumerator();
        [DebuggerHidden, TargetedPatchingOptOut("Performance critical to inline this type of method across NGen image boundaries")]
        IEnumerator IEnumerable.GetEnumerator();
        [DebuggerHidden]
        void IEnumerator.Reset();
        void IDisposable.Dispose();
    
        // Properties
        TSource IEnumerator<TSource>.Current { [DebuggerHidden] get; }
        object IEnumerator.Current { [DebuggerHidden] get; }
    }
    
    private sealed class <DistinctIterator>d__81<TSource> : IEnumerable<TSource>, IEnumerable, IEnumerator<TSource>, IEnumerator, IDisposable
    {
        // Fields
        private int <>1__state;
        private TSource <>2__current;
        public IEqualityComparer<TSource> <>3__comparer;
        public IEnumerable<TSource> <>3__source;
        public IEnumerator<TSource> <>7__wrap84;
        private int <>l__initialThreadId;
        public TSource <element>5__83;
        public Set<TSource> <set>5__82;
        public IEqualityComparer<TSource> comparer;
        public IEnumerable<TSource> source;
    
        // Methods
        [DebuggerHidden]
        public <DistinctIterator>d__81(int <>1__state);
        private void <>m__Finally85();
        private bool MoveNext();
        [DebuggerHidden]
        IEnumerator<TSource> IEnumerable<TSource>.GetEnumerator();
        [DebuggerHidden, TargetedPatchingOptOut("Performance critical to inline this type of method across NGen image boundaries")]
        IEnumerator IEnumerable.GetEnumerator();
        [DebuggerHidden]
        void IEnumerator.Reset();
        void IDisposable.Dispose();
    
        // Properties
        TSource IEnumerator<TSource>.Current { [DebuggerHidden] get; }
        object IEnumerator.Current { [DebuggerHidden] get; }
    }
    
    private bool MoveNext()
    {
        bool flag;
        try
        {
            switch (this.<>1__state)
            {
                case 0:
                    this.<>1__state = -1;
                    this.<set>5__82 = new Set<TSource>(this.comparer);
                    this.<>7__wrap84 = this.source.GetEnumerator();
                    this.<>1__state = 1;
                    goto Label_0092;
    
                case 2:
                    this.<>1__state = 1;
                    goto Label_0092;
    
                default:
                    goto Label_00A5;
            }
        Label_0050:
            this.<element>5__83 = this.<>7__wrap84.Current;
            if (this.<set>5__82.Add(this.<element>5__83))
            {
                this.<>2__current = this.<element>5__83;
                this.<>1__state = 2;
                return true;
            }
        Label_0092:
            if (this.<>7__wrap84.MoveNext()) goto Label_0050;
            this.<>m__Finally85();
        Label_00A5:
            flag = false;
        }
        fault
        {
            this.System.IDisposable.Dispose();
        }
        return flag;
    }
    
    internal class Set<TElement>
    {
        // Fields
        private int[] buckets;
        private IEqualityComparer<TElement> comparer;
        private int count;
        private int freeList;
        private Slot<TElement>[] slots;
    
        // Methods
        [TargetedPatchingOptOut("Performance critical to inline this type of method across NGen image boundaries")]
        public Set();
        public Set(IEqualityComparer<TElement> comparer);
        public bool Add(TElement value);
        [TargetedPatchingOptOut("Performance critical to inline this type of method across NGen image boundaries")]
        public bool Contains(TElement value);
        private bool Find(TElement value, bool add);
        internal int InternalGetHashCode(TElement value);
        public bool Remove(TElement value);
        private void Resize();
    
        // Nested Types
        [StructLayout(LayoutKind.Sequential)]
        internal struct Slot
        {
            internal int hashCode;
            internal TElement value;
            internal int next;
        }
    }
    public bool Add(TElement value)
    {
        return !this.Find(value, true);
    }
     
    public bool Contains(TElement value)
    {
        return this.Find(value, false);
    }
    
    private bool Find(TElement value, bool add)
    {
        int hashCode = this.InternalGetHashCode(value);
        for (int i = this.buckets[hashCode % this.buckets.Length] - 1; i >= 0; i = this.slots[i].next)
        {
            if (this.slots[i].hashCode == hashCode && this.comparer.Equals(this.slots[i].value, value)) return true;//就是这一句了
        }
        if (add)
        {
            int freeList;
            if (this.freeList >= 0)
            {
                freeList = this.freeList;
                this.freeList = this.slots[freeList].next;
            }
            else
            {
                if (this.count == this.slots.Length) this.Resize();
                freeList = this.count;
                this.count++;
            }
            int index = hashCode % this.buckets.Length;
            this.slots[freeList].hashCode = hashCode;
            this.slots[freeList].value = value;
            this.slots[freeList].next = this.buckets[index] - 1;
            this.buckets[index] = freeList + 1;
        }
        return false;
    }
    
    

     在这段代码中可以看出,扩展函数Distinct在内部使用了一个Set<T>的类来帮助踢掉重复数据,而这个内部类使用的是hash表的方式存储数据,所以会调用到我们自定义类的GetHashCode函数,如果返回的hashcode值不等,它就不会再调用Equels方法进行比较了。

    原因已经一目了然了,得出的结论就是:

    1,重写Equles方法的时候,尽量重写GetHashCode函数,并且不要简单的调用object的GetHashCode函数,返回一个设计合理的hash值,以保证结果如我们的预期。上面的做法直接返回了0,虽然解决了问题,但明显不是每个对象的hash值都是0,做法欠妥。

    2,List<T>的Contains,IndexOf方法,不会用到GetHashCode函数。

    3,扩展函数Distinct,Except用到了GetHashCode函数,必须重写这个函数。其他还有哪些函数用到了GetHashCode函数,以后再做补充,使用时多加注意就是了。

    4,如果对象要作为字典类(Dictionary)的主键,必须重写GetHashCode函数。

    2014/07/08 补充

    5,HashSet等容器的Add方法内部,也是先判断GetHashCode,如果GetHashCode值相等,进一步判断Equals方法是否相等来确定对象的相等性。

    所以,Equals是相等的,那么GetHashCode也必须要保证相等。相反却不一定,GetHashCode相等,Equals方法可以不等。

    6,改变影响GetHashCode返回值的字段值,会造成对象的HashCode值变化,如果对象已经存入了HashSet等容器中,将会是HashSet找不到这个对象,从而使得Remove等方法失败。

                Point a = new Point(1, 2);
                Point b = new Point(1, 2);
    
                HashSet<Point> hashSet = new HashSet<Point>();
                hashSet.Add(a);
                hashSet.Remove(b); //能删除a吗?答案是可以
    
    //hashset的Count变为0,原因就是我们重新了Equals方法,a和
    //b被认为相等的。

    7,记录一个自定义值类型重写GetHashCode等方法的完整实现,作为参考。

     1     public struct Point
     2     {
     3         private int x;
     4         private int y;
     5         public Point(int x, int y)
     6         {
     7             this.x = x;
     8             this.y = y;
     9         }
    10         public int X
    11         {
    12             get { return x; }
    13         }
    14         public int Y
    15         {
    16             get { return y; }
    17         }
    18 
    19         public static bool operator ==(Point left,Point right)
    20         {
    21             if (object.ReferenceEquals(left, null))
    22                 return object.ReferenceEquals(right, null);
    23             return left.Equals(right);
    24         }
    25 
    26         public static bool operator !=(Point left, Point right)
    27         {
    28             return !(left == right);
    29         }
    30 
    31         public override bool Equals(object obj)
    32         {
    33             if (obj.GetType() != typeof(Point))
    34                 return false;
    35             Point other = (Point)obj;
    36             return this.x == other.x && this.y == other.y;
    37         }
    38 
    39         public override int GetHashCode()
    40         {
    41             return x.GetHashCode() ^ y.GetHashCode();
    42         }
    43     }
    View Code
  • 相关阅读:
    分解质因数算法
    js 的 Math 对象
    字符串操作
    简化求质数算法
    数值类型小数点后是否可以接零问题
    新博第一篇,思考的重要性与求质数算法
    一、制作屏幕录像
    四、同步线程
    常见问题
    jni数据处理
  • 原文地址:https://www.cnblogs.com/xiashengwang/p/2942555.html
Copyright © 2020-2023  润新知