我怎样才能做得快呢?

当然我可以这样做:

static bool ByteArrayCompare(byte[] a1, byte[] a2)
{
    if (a1.Length != a2.Length)
        return false;

    for (int i=0; i<a1.Length; i++)
        if (a1[i]!=a2[i])
            return false;

    return true;
}

但我正在寻找一个BCL函数或一些高度优化的已证明的方法来做到这一点。

java.util.Arrays.equals((sbyte[])(Array)a1, (sbyte[])(Array)a2);

工作得很好,但这似乎不适用于x64。

注意我的快速回答。


当前回答

我开发了一个方法,稍微击败memcmp() (plinth的答案)和非常轻微击败EqualBytesLongUnrolled() (Arek Bulski的答案)在我的PC上。基本上,它以4而不是8展开循环。

2019年3月30日更新:

从。net核心3.0开始,我们有了SIMD支持!

这个解决方案在我的PC上是最快的:

#if NETCOREAPP3_0
using System.Runtime.Intrinsics.X86;
#endif
…

public static unsafe bool Compare(byte[] arr0, byte[] arr1)
{
    if (arr0 == arr1)
    {
        return true;
    }
    if (arr0 == null || arr1 == null)
    {
        return false;
    }
    if (arr0.Length != arr1.Length)
    {
        return false;
    }
    if (arr0.Length == 0)
    {
        return true;
    }
    fixed (byte* b0 = arr0, b1 = arr1)
    {
#if NETCOREAPP3_0
        if (Avx2.IsSupported)
        {
            return Compare256(b0, b1, arr0.Length);
        }
        else if (Sse2.IsSupported)
        {
            return Compare128(b0, b1, arr0.Length);
        }
        else
#endif
        {
            return Compare64(b0, b1, arr0.Length);
        }
    }
}
#if NETCOREAPP3_0
public static unsafe bool Compare256(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus128 = lastAddr - 128;
    const int mask = -1;
    while (b0 < lastAddrMinus128) // unroll the loop so that we are comparing 128 bytes at a time.
    {
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0), Avx.LoadVector256(b1))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 32), Avx.LoadVector256(b1 + 32))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 64), Avx.LoadVector256(b1 + 64))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 96), Avx.LoadVector256(b1 + 96))) != mask)
        {
            return false;
        }
        b0 += 128;
        b1 += 128;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
public static unsafe bool Compare128(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus64 = lastAddr - 64;
    const int mask = 0xFFFF;
    while (b0 < lastAddrMinus64) // unroll the loop so that we are comparing 64 bytes at a time.
    {
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0), Sse2.LoadVector128(b1))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 16), Sse2.LoadVector128(b1 + 16))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 32), Sse2.LoadVector128(b1 + 32))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 48), Sse2.LoadVector128(b1 + 48))) != mask)
        {
            return false;
        }
        b0 += 64;
        b1 += 64;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
#endif
public static unsafe bool Compare64(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus32 = lastAddr - 32;
    while (b0 < lastAddrMinus32) // unroll the loop so that we are comparing 32 bytes at a time.
    {
        if (*(ulong*)b0 != *(ulong*)b1) return false;
        if (*(ulong*)(b0 + 8) != *(ulong*)(b1 + 8)) return false;
        if (*(ulong*)(b0 + 16) != *(ulong*)(b1 + 16)) return false;
        if (*(ulong*)(b0 + 24) != *(ulong*)(b1 + 24)) return false;
        b0 += 32;
        b1 += 32;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}

其他回答

. net 3.5及更新版本有一个新的公共类型System.Data.Linq.Binary,它封装了byte[]。它实现了IEquatable<Binary>,(实际上)比较两个字节数组。注意System.Data.Linq.Binary也有来自byte[]的隐式转换运算符。

MSDN文档:System.Data.Linq.Binary

Equals方法的反射器反编译:

private bool EqualsTo(Binary binary)
{
    if (this != binary)
    {
        if (binary == null)
        {
            return false;
        }
        if (this.bytes.Length != binary.bytes.Length)
        {
            return false;
        }
        if (this.hashCode != binary.hashCode)
        {
            return false;
        }
        int index = 0;
        int length = this.bytes.Length;
        while (index < length)
        {
            if (this.bytes[index] != binary.bytes[index])
            {
                return false;
            }
            index++;
        }
    }
    return true;
}

有趣的是,只有当两个Binary对象的哈希值相同时,它们才会进行逐字节比较循环。然而,这是以在二进制对象的构造函数中计算哈希值为代价的(通过使用for loop:-)遍历数组)。

上述实现意味着,在最坏的情况下,您可能必须遍历数组三次:首先计算array1的哈希值,然后计算array2的哈希值,最后(因为这是最坏的情况,长度和哈希值相等)比较array1中的字节和数组2中的字节。

总的来说,即使System.Data.Linq.Binary被内置到BCL中,我不认为这是比较两个字节数组的最快方法:-|。

看看。net是如何处理字符串的。Equals,你可以看到它使用了一个叫做EqualsHelper的私有方法,它有一个“不安全”的指针实现。net Reflector是你的朋友,可以看到内部是如何完成的。

这可以用作字节数组比较的模板,我在博客文章中用c#快速字节数组比较中做了一个实现。我还做了一些基本的基准测试,看看什么时候安全的实现比不安全的实现更快。

也就是说,除非你真的需要杀手级的性能,否则我会选择简单的fr循环比较。

简单的回答是:

    public bool Compare(byte[] b1, byte[] b2)
    {
        return Encoding.ASCII.GetString(b1) == Encoding.ASCII.GetString(b2);
    }

通过这种方式,您可以使用优化的. net字符串比较来进行字节数组比较,而不需要编写不安全的代码。这是它如何在后台完成的:

private unsafe static bool EqualsHelper(String strA, String strB)
{
    Contract.Requires(strA != null);
    Contract.Requires(strB != null);
    Contract.Requires(strA.Length == strB.Length);

    int length = strA.Length;

    fixed (char* ap = &strA.m_firstChar) fixed (char* bp = &strB.m_firstChar)
    {
        char* a = ap;
        char* b = bp;

        // Unroll the loop

        #if AMD64
            // For the AMD64 bit platform we unroll by 12 and
            // check three qwords at a time. This is less code
            // than the 32 bit case and is shorter
            // pathlength.

            while (length >= 12)
            {
                if (*(long*)a     != *(long*)b)     return false;
                if (*(long*)(a+4) != *(long*)(b+4)) return false;
                if (*(long*)(a+8) != *(long*)(b+8)) return false;
                a += 12; b += 12; length -= 12;
            }
       #else
           while (length >= 10)
           {
               if (*(int*)a != *(int*)b) return false;
               if (*(int*)(a+2) != *(int*)(b+2)) return false;
               if (*(int*)(a+4) != *(int*)(b+4)) return false;
               if (*(int*)(a+6) != *(int*)(b+6)) return false;
               if (*(int*)(a+8) != *(int*)(b+8)) return false;
               a += 10; b += 10; length -= 10;
           }
       #endif

        // This depends on the fact that the String objects are
        // always zero terminated and that the terminating zero is not included
        // in the length. For odd string sizes, the last compare will include
        // the zero terminator.
        while (length > 0)
        {
            if (*(int*)a != *(int*)b) break;
            a += 2; b += 2; length -= 2;
        }

        return (length <= 0);
    }
}

net 4中有一个新的内置解决方案——IStructuralEquatable

static bool ByteArrayCompare(byte[] a1, byte[] a2) 
{
    return StructuralComparisons.StructuralEqualityComparer.Equals(a1, a2);
}

似乎EqualBytesLongUnrolled是上述建议中最好的。

被跳过的方法(Enumerable.SequenceEqual,StructuralComparisons.StructuralEqualityComparer.Equals)不是慢速的。在265MB的数组上,我测量了这个:

Host Process Environment Information:
BenchmarkDotNet.Core=v0.9.9.0
OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-3770 CPU 3.40GHz, ProcessorCount=8
Frequency=3323582 ticks, Resolution=300.8802 ns, Timer=TSC
CLR=MS.NET 4.0.30319.42000, Arch=64-bit RELEASE [RyuJIT]
GC=Concurrent Workstation
JitModules=clrjit-v4.6.1590.0

Type=CompareMemoriesBenchmarks  Mode=Throughput  

                 Method |      Median |    StdDev | Scaled | Scaled-SD |
----------------------- |------------ |---------- |------- |---------- |
             NewMemCopy |  30.0443 ms | 1.1880 ms |   1.00 |      0.00 |
 EqualBytesLongUnrolled |  29.9917 ms | 0.7480 ms |   0.99 |      0.04 |
          msvcrt_memcmp |  30.0930 ms | 0.2964 ms |   1.00 |      0.03 |
          UnsafeCompare |  31.0520 ms | 0.7072 ms |   1.03 |      0.04 |
       ByteArrayCompare | 212.9980 ms | 2.0776 ms |   7.06 |      0.25 |

OS=Windows
Processor=?, ProcessorCount=8
Frequency=3323582 ticks, Resolution=300.8802 ns, Timer=TSC
CLR=CORE, Arch=64-bit ? [RyuJIT]
GC=Concurrent Workstation
dotnet cli version: 1.0.0-preview2-003131

Type=CompareMemoriesBenchmarks  Mode=Throughput  

                 Method |      Median |    StdDev | Scaled | Scaled-SD |
----------------------- |------------ |---------- |------- |---------- |
             NewMemCopy |  30.1789 ms | 0.0437 ms |   1.00 |      0.00 |
 EqualBytesLongUnrolled |  30.1985 ms | 0.1782 ms |   1.00 |      0.01 |
          msvcrt_memcmp |  30.1084 ms | 0.0660 ms |   1.00 |      0.00 |
          UnsafeCompare |  31.1845 ms | 0.4051 ms |   1.03 |      0.01 |
       ByteArrayCompare | 212.0213 ms | 0.1694 ms |   7.03 |      0.01 |