我怎样才能做得快呢?

当然我可以这样做:

static bool ByteArrayCompare(byte[] a1, byte[] a2)
{
    if (a1.Length != a2.Length)
        return false;

    for (int i=0; i<a1.Length; i++)
        if (a1[i]!=a2[i])
            return false;

    return true;
}

但我正在寻找一个BCL函数或一些高度优化的已证明的方法来做到这一点。

java.util.Arrays.equals((sbyte[])(Array)a1, (sbyte[])(Array)a2);

工作得很好,但这似乎不适用于x64。

注意我的快速回答。


当前回答

我开发了一个方法,稍微击败memcmp() (plinth的答案)和非常轻微击败EqualBytesLongUnrolled() (Arek Bulski的答案)在我的PC上。基本上,它以4而不是8展开循环。

2019年3月30日更新:

从。net核心3.0开始,我们有了SIMD支持!

这个解决方案在我的PC上是最快的:

#if NETCOREAPP3_0
using System.Runtime.Intrinsics.X86;
#endif
…

public static unsafe bool Compare(byte[] arr0, byte[] arr1)
{
    if (arr0 == arr1)
    {
        return true;
    }
    if (arr0 == null || arr1 == null)
    {
        return false;
    }
    if (arr0.Length != arr1.Length)
    {
        return false;
    }
    if (arr0.Length == 0)
    {
        return true;
    }
    fixed (byte* b0 = arr0, b1 = arr1)
    {
#if NETCOREAPP3_0
        if (Avx2.IsSupported)
        {
            return Compare256(b0, b1, arr0.Length);
        }
        else if (Sse2.IsSupported)
        {
            return Compare128(b0, b1, arr0.Length);
        }
        else
#endif
        {
            return Compare64(b0, b1, arr0.Length);
        }
    }
}
#if NETCOREAPP3_0
public static unsafe bool Compare256(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus128 = lastAddr - 128;
    const int mask = -1;
    while (b0 < lastAddrMinus128) // unroll the loop so that we are comparing 128 bytes at a time.
    {
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0), Avx.LoadVector256(b1))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 32), Avx.LoadVector256(b1 + 32))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 64), Avx.LoadVector256(b1 + 64))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 96), Avx.LoadVector256(b1 + 96))) != mask)
        {
            return false;
        }
        b0 += 128;
        b1 += 128;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
public static unsafe bool Compare128(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus64 = lastAddr - 64;
    const int mask = 0xFFFF;
    while (b0 < lastAddrMinus64) // unroll the loop so that we are comparing 64 bytes at a time.
    {
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0), Sse2.LoadVector128(b1))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 16), Sse2.LoadVector128(b1 + 16))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 32), Sse2.LoadVector128(b1 + 32))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 48), Sse2.LoadVector128(b1 + 48))) != mask)
        {
            return false;
        }
        b0 += 64;
        b1 += 64;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
#endif
public static unsafe bool Compare64(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus32 = lastAddr - 32;
    while (b0 < lastAddrMinus32) // unroll the loop so that we are comparing 32 bytes at a time.
    {
        if (*(ulong*)b0 != *(ulong*)b1) return false;
        if (*(ulong*)(b0 + 8) != *(ulong*)(b1 + 8)) return false;
        if (*(ulong*)(b0 + 16) != *(ulong*)(b1 + 16)) return false;
        if (*(ulong*)(b0 + 24) != *(ulong*)(b1 + 24)) return false;
        b0 += 32;
        b1 += 32;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}

其他回答

我想到了许多显卡内置的块传输加速方法。但是这样你就必须按字节复制所有的数据,所以如果你不想在非托管和依赖硬件的代码中实现你的整个逻辑,这对你没有多大帮助……

Another way of optimization similar to the approach shown above would be to store as much of your data as possible in a long[] rather than a byte[] right from the start, for example if you are reading it sequentially from a binary file, or if you use a memory mapped file, read in data as long[] or single long values. Then, your comparison loop will only need 1/8th of the number of iterations it would have to do for a byte[] containing the same amount of data. It is a matter of when and how often you need to compare vs. when and how often you need to access the data in a byte-by-byte manner, e.g. to use it in an API call as a parameter in a method that expects a byte[]. In the end, you only can tell if you really know the use case...

看看。net是如何处理字符串的。Equals,你可以看到它使用了一个叫做EqualsHelper的私有方法,它有一个“不安全”的指针实现。net Reflector是你的朋友,可以看到内部是如何完成的。

这可以用作字节数组比较的模板,我在博客文章中用c#快速字节数组比较中做了一个实现。我还做了一些基本的基准测试,看看什么时候安全的实现比不安全的实现更快。

也就是说,除非你真的需要杀手级的性能,否则我会选择简单的fr循环比较。

简单的回答是:

    public bool Compare(byte[] b1, byte[] b2)
    {
        return Encoding.ASCII.GetString(b1) == Encoding.ASCII.GetString(b2);
    }

通过这种方式,您可以使用优化的. net字符串比较来进行字节数组比较,而不需要编写不安全的代码。这是它如何在后台完成的:

private unsafe static bool EqualsHelper(String strA, String strB)
{
    Contract.Requires(strA != null);
    Contract.Requires(strB != null);
    Contract.Requires(strA.Length == strB.Length);

    int length = strA.Length;

    fixed (char* ap = &strA.m_firstChar) fixed (char* bp = &strB.m_firstChar)
    {
        char* a = ap;
        char* b = bp;

        // Unroll the loop

        #if AMD64
            // For the AMD64 bit platform we unroll by 12 and
            // check three qwords at a time. This is less code
            // than the 32 bit case and is shorter
            // pathlength.

            while (length >= 12)
            {
                if (*(long*)a     != *(long*)b)     return false;
                if (*(long*)(a+4) != *(long*)(b+4)) return false;
                if (*(long*)(a+8) != *(long*)(b+8)) return false;
                a += 12; b += 12; length -= 12;
            }
       #else
           while (length >= 10)
           {
               if (*(int*)a != *(int*)b) return false;
               if (*(int*)(a+2) != *(int*)(b+2)) return false;
               if (*(int*)(a+4) != *(int*)(b+4)) return false;
               if (*(int*)(a+6) != *(int*)(b+6)) return false;
               if (*(int*)(a+8) != *(int*)(b+8)) return false;
               a += 10; b += 10; length -= 10;
           }
       #endif

        // This depends on the fact that the String objects are
        // always zero terminated and that the terminating zero is not included
        // in the length. For odd string sizes, the last compare will include
        // the zero terminator.
        while (length > 0)
        {
            if (*(int*)a != *(int*)b) break;
            a += 2; b += 2; length -= 2;
        }

        return (length <= 0);
    }
}

因为上面的许多花哨的解决方案都不能与UWP一起工作,而且因为我喜欢Linq和函数方法,所以我向您介绍我对这个问题的版本。 为了在出现第一个差异时避免比较,我选择了.FirstOrDefault()

public static bool CompareByteArrays(byte[] ba0, byte[] ba1) =>
    !(ba0.Length != ba1.Length || Enumerable.Range(1,ba0.Length)
        .FirstOrDefault(n => ba0[n] != ba1[n]) > 0);

让我们再加一个!

最近微软发布了一个特殊的NuGet包System.Runtime.CompilerServices.Unsafe。它的特殊之处在于它是用IL编写的,并且提供了c#中无法直接使用的低级功能。

它的一个方法unsafety . as <T>(object)允许将任何引用类型转换为另一个引用类型,跳过任何安全检查。这通常是一个非常糟糕的主意,但如果两种类型具有相同的结构,它就可以工作。因此,我们可以使用这个函数将字节[]转换为长[]:

bool CompareWithUnsafeLibrary(byte[] a1, byte[] a2)
{
    if (a1.Length != a2.Length) return false;

    var longSize = (int)Math.Floor(a1.Length / 8.0);
    var long1 = Unsafe.As<long[]>(a1);
    var long2 = Unsafe.As<long[]>(a2);

    for (var i = 0; i < longSize; i++)
    {
        if (long1[i] != long2[i]) return false;
    }

    for (var i = longSize * 8; i < a1.Length; i++)
    {
        if (a1[i] != a2[i]) return false;
    }

    return true;
}

注意long1。Length仍然会返回原始数组的长度,因为它存储在数组内存结构中的字段中。

这个方法没有这里演示的其他方法那么快,但它比朴素方法快得多,不使用不安全的代码或P/Invoke或固定,实现非常简单(IMO)。以下是我的机器上的一些BenchmarkDotNet结果:

BenchmarkDotNet=v0.10.3.0, OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-4870HQ CPU 2.50GHz, ProcessorCount=8
Frequency=2435775 Hz, Resolution=410.5470 ns, Timer=TSC
  [Host]     : Clr 4.0.30319.42000, 64bit RyuJIT-v4.6.1637.0
  DefaultJob : Clr 4.0.30319.42000, 64bit RyuJIT-v4.6.1637.0

                 Method |          Mean |    StdDev |
----------------------- |-------------- |---------- |
          UnsafeLibrary |   125.8229 ns | 0.3588 ns |
          UnsafeCompare |    89.9036 ns | 0.8243 ns |
           JSharpEquals | 1,432.1717 ns | 1.3161 ns |
 EqualBytesLongUnrolled |    43.7863 ns | 0.8923 ns |
              NewMemCmp |    65.4108 ns | 0.2202 ns |
            ArraysEqual |   910.8372 ns | 2.6082 ns |
          PInvokeMemcmp |    52.7201 ns | 0.1105 ns |

我还为所有测试创建了一个要点。