在.NET中,GetHashCode方法在整个.NET基类库的许多地方都使用。正确执行它对于在集合中或确定相等时快速查找项目尤为重要。

对于如何为自定义类实现GetHashCode,是否有标准算法或最佳实践,以便不会降低性能?


当前回答

如果我们的财产不超过8处(希望如此),这里还有另一种选择。

ValueTuple是一个结构,似乎有一个可靠的GetHashCode实现。

这意味着我们可以简单地这样做:

// Yay, no allocations and no custom implementations!
public override int GetHashCode() => (this.PropA, this.PropB).GetHashCode();

让我们来看看.NETCore当前对ValueTuple的GetHashCode的实现。

这来自ValueTuple:

    internal static int CombineHashCodes(int h1, int h2)
    {
        return HashHelpers.Combine(HashHelpers.Combine(HashHelpers.RandomSeed, h1), h2);
    }

    internal static int CombineHashCodes(int h1, int h2, int h3)
    {
        return HashHelpers.Combine(CombineHashCodes(h1, h2), h3);
    }

这来自HashHelper:

    public static readonly int RandomSeed = Guid.NewGuid().GetHashCode();

    public static int Combine(int h1, int h2)
    {
        unchecked
        {
            // RyuJIT optimizes this to use the ROL instruction
            // Related GitHub pull request: dotnet/coreclr#1830
            uint rol5 = ((uint)h1 << 5) | ((uint)h1 >> 27);
            return ((int)rol5 + h1) ^ h2;
        }
    }

英语:

向左旋转(循环移位)h1 5个位置。将结果和h1相加。将结果与h2进行异或运算。首先对{static random seed,h1}执行上述操作。对于每个其他项目,对上一个结果和下一个项目(例如h2)执行操作。

如果能更多地了解这个ROL-5散列代码算法的财产,那就太好了。

遗憾的是,为我们自己的GetHashCode延迟ValueTuple可能不像我们希望的那样快。相关讨论中的这条评论说明了直接调用HashHelpers.Combine更具性能。另一方面,这是内部的,所以我们必须复制代码,牺牲了我们在这里获得的大部分。此外,我们将负责记住首先与随机种子结合。我不知道如果我们跳过这一步会有什么后果。

其他回答

与夜编码器的解决方案非常相似,只是如果你想提高素数更容易。

PS:这是你嘴里吐出一点东西的时候之一,因为你知道这可以用9个默认值重构成一个方法,但它会更慢,所以你闭上眼睛,试着忘掉它。

/// <summary>
/// Try not to look at the source code. It works. Just rely on it.
/// </summary>
public static class HashHelper
{
    private const int PrimeOne = 17;
    private const int PrimeTwo = 23;

    public static int GetHashCode<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();
            hash = hash * PrimeTwo + arg4.GetHashCode();
            hash = hash * PrimeTwo + arg5.GetHashCode();
            hash = hash * PrimeTwo + arg6.GetHashCode();
            hash = hash * PrimeTwo + arg7.GetHashCode();
            hash = hash * PrimeTwo + arg8.GetHashCode();
            hash = hash * PrimeTwo + arg9.GetHashCode();
            hash = hash * PrimeTwo + arg10.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2, T3, T4, T5, T6, T7, T8, T9>(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();
            hash = hash * PrimeTwo + arg4.GetHashCode();
            hash = hash * PrimeTwo + arg5.GetHashCode();
            hash = hash * PrimeTwo + arg6.GetHashCode();
            hash = hash * PrimeTwo + arg7.GetHashCode();
            hash = hash * PrimeTwo + arg8.GetHashCode();
            hash = hash * PrimeTwo + arg9.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2, T3, T4, T5, T6, T7, T8>(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();
            hash = hash * PrimeTwo + arg4.GetHashCode();
            hash = hash * PrimeTwo + arg5.GetHashCode();
            hash = hash * PrimeTwo + arg6.GetHashCode();
            hash = hash * PrimeTwo + arg7.GetHashCode();
            hash = hash * PrimeTwo + arg8.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2, T3, T4, T5, T6, T7>(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();
            hash = hash * PrimeTwo + arg4.GetHashCode();
            hash = hash * PrimeTwo + arg5.GetHashCode();
            hash = hash * PrimeTwo + arg6.GetHashCode();
            hash = hash * PrimeTwo + arg7.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2, T3, T4, T5, T6>(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();
            hash = hash * PrimeTwo + arg4.GetHashCode();
            hash = hash * PrimeTwo + arg5.GetHashCode();
            hash = hash * PrimeTwo + arg6.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2, T3, T4, T5>(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();
            hash = hash * PrimeTwo + arg4.GetHashCode();
            hash = hash * PrimeTwo + arg5.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2, T3, T4>(T1 arg1, T2 arg2, T3 arg3, T4 arg4)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();
            hash = hash * PrimeTwo + arg4.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2, T3>(T1 arg1, T2 arg2, T3 arg3)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();
            hash = hash * PrimeTwo + arg3.GetHashCode();

            return hash;
        }
    }

    public static int GetHashCode<T1, T2>(T1 arg1, T2 arg2)
    {
        unchecked
        {
            int hash = PrimeOne;
            hash = hash * PrimeTwo + arg1.GetHashCode();
            hash = hash * PrimeTwo + arg2.GetHashCode();

            return hash;
        }
    }
}

这是我的哈希码助手。它的优点是它使用泛型类型参数,因此不会导致装箱:

public static class HashHelper
{
    public static int GetHashCode<T1, T2>(T1 arg1, T2 arg2)
    {
         unchecked
         {
             return 31 * arg1.GetHashCode() + arg2.GetHashCode();
         }
    }

    public static int GetHashCode<T1, T2, T3>(T1 arg1, T2 arg2, T3 arg3)
    {
        unchecked
        {
            int hash = arg1.GetHashCode();
            hash = 31 * hash + arg2.GetHashCode();
            return 31 * hash + arg3.GetHashCode();
        }
    }

    public static int GetHashCode<T1, T2, T3, T4>(T1 arg1, T2 arg2, T3 arg3, 
        T4 arg4)
    {
        unchecked
        {
            int hash = arg1.GetHashCode();
            hash = 31 * hash + arg2.GetHashCode();
            hash = 31 * hash + arg3.GetHashCode();
            return 31 * hash + arg4.GetHashCode();
        }
    }

    public static int GetHashCode<T>(T[] list)
    {
        unchecked
        {
            int hash = 0;
            foreach (var item in list)
            {
                hash = 31 * hash + item.GetHashCode();
            }
            return hash;
        }
    }

    public static int GetHashCode<T>(IEnumerable<T> list)
    {
        unchecked
        {
            int hash = 0;
            foreach (var item in list)
            {
                hash = 31 * hash + item.GetHashCode();
            }
            return hash;
        }
    }

    /// <summary>
    /// Gets a hashcode for a collection for that the order of items 
    /// does not matter.
    /// So {1, 2, 3} and {3, 2, 1} will get same hash code.
    /// </summary>
    public static int GetHashCodeForOrderNoMatterCollection<T>(
        IEnumerable<T> list)
    {
        unchecked
        {
            int hash = 0;
            int count = 0;
            foreach (var item in list)
            {
                hash += item.GetHashCode();
                count++;
            }
            return 31 * hash + count.GetHashCode();
        }
    }

    /// <summary>
    /// Alternative way to get a hashcode is to use a fluent 
    /// interface like this:<br />
    /// return 0.CombineHashCode(field1).CombineHashCode(field2).
    ///     CombineHashCode(field3);
    /// </summary>
    public static int CombineHashCode<T>(this int hashCode, T arg)
    {
        unchecked
        {
            return 31 * hashCode + arg.GetHashCode();   
        }
    }

此外,它还具有扩展方法来提供流畅的界面,因此您可以这样使用它:

public override int GetHashCode()
{
    return HashHelper.GetHashCode(Manufacturer, PartN, Quantity);
}

或者像这样:

public override int GetHashCode()
{
    return 0.CombineHashCode(Manufacturer)
        .CombineHashCode(PartN)
        .CombineHashCode(Quantity);
}

我想把我的最新发现添加到我经常提到的这个主题中。

我当前的visual studio/项目设置提供了将元组自动重构为结构的功能。这将生成如下GetHashCode函数:

        public override int GetHashCode()
        {
            int hashCode = -2088324004;
            hashCode = hashCode * -1521134295 + AuftragGesperrt.GetHashCode();
            hashCode = hashCode * -1521134295 + Auftrag_gesperrt_von.GetHashCode();
            hashCode = hashCode * -1521134295 + Auftrag_gesperrt_am.GetHashCode();
            return hashCode;
        }

编辑:为了澄清AuftragGesperrt、Auftrag _gesperrt_von和Auftrag-gesperrt _am是财产。如果微软的开发人员使用这个功能,这可能是一个不错的解决方案。

ValueTuple-C#7更新

正如@cactuaroid在评论中提到的,可以使用值元组。这节省了一些击键,更重要的是纯粹在堆栈上执行(无垃圾):

(PropA, PropB, PropC, PropD).GetHashCode();

(注意:使用匿名类型的原始技术似乎在堆上创建了一个对象,即垃圾,因为匿名类型被实现为类,尽管编译器可能会对此进行优化。对这些选项进行基准测试会很有趣,但元组选项应该更优。)

匿名类型(原始答案)

Microsoft已经提供了一个很好的通用HashCode生成器:只需将属性/字段值复制到匿名类型并对其进行哈希:

new { PropA, PropB, PropC, PropD }.GetHashCode();

这适用于任何数量的财产。它不使用拳击。它只是使用了框架中已经实现的匿名类型的算法。

这是我的简单方法。我使用的是经典的生成器模式。它是类型安全的(无装箱/拆箱),并且与.NET 2.0兼容(无扩展方法等)。

它的用法如下:

public override int GetHashCode()
{
    HashBuilder b = new HashBuilder();
    b.AddItems(this.member1, this.member2, this.member3);
    return b.Result;
} 

这里是实际的生成器类:

internal class HashBuilder
{
    private const int Prime1 = 17;
    private const int Prime2 = 23;
    private int result = Prime1;

    public HashBuilder()
    {
    }

    public HashBuilder(int startHash)
    {
        this.result = startHash;
    }

    public int Result
    {
        get
        {
            return this.result;
        }
    }

    public void AddItem<T>(T item)
    {
        unchecked
        {
            this.result = this.result * Prime2 + item.GetHashCode();
        }
    }

    public void AddItems<T1, T2>(T1 item1, T2 item2)
    {
        this.AddItem(item1);
        this.AddItem(item2);
    }

    public void AddItems<T1, T2, T3>(T1 item1, T2 item2, T3 item3)
    {
        this.AddItem(item1);
        this.AddItem(item2);
        this.AddItem(item3);
    }

    public void AddItems<T1, T2, T3, T4>(T1 item1, T2 item2, T3 item3, 
        T4 item4)
    {
        this.AddItem(item1);
        this.AddItem(item2);
        this.AddItem(item3);
        this.AddItem(item4);
    }

    public void AddItems<T1, T2, T3, T4, T5>(T1 item1, T2 item2, T3 item3, 
        T4 item4, T5 item5)
    {
        this.AddItem(item1);
        this.AddItem(item2);
        this.AddItem(item3);
        this.AddItem(item4);
        this.AddItem(item5);
    }        

    public void AddItems<T>(params T[] items)
    {
        foreach (T item in items)
        {
            this.AddItem(item);
        }
    }
}