我怎样才能做得快呢?

当然我可以这样做:

static bool ByteArrayCompare(byte[] a1, byte[] a2)
{
    if (a1.Length != a2.Length)
        return false;

    for (int i=0; i<a1.Length; i++)
        if (a1[i]!=a2[i])
            return false;

    return true;
}

但我正在寻找一个BCL函数或一些高度优化的已证明的方法来做到这一点。

java.util.Arrays.equals((sbyte[])(Array)a1, (sbyte[])(Array)a2);

工作得很好,但这似乎不适用于x64。

注意我的快速回答。


当前回答

我开发了一个方法,稍微击败memcmp() (plinth的答案)和非常轻微击败EqualBytesLongUnrolled() (Arek Bulski的答案)在我的PC上。基本上,它以4而不是8展开循环。

2019年3月30日更新:

从。net核心3.0开始,我们有了SIMD支持!

这个解决方案在我的PC上是最快的:

#if NETCOREAPP3_0
using System.Runtime.Intrinsics.X86;
#endif
…

public static unsafe bool Compare(byte[] arr0, byte[] arr1)
{
    if (arr0 == arr1)
    {
        return true;
    }
    if (arr0 == null || arr1 == null)
    {
        return false;
    }
    if (arr0.Length != arr1.Length)
    {
        return false;
    }
    if (arr0.Length == 0)
    {
        return true;
    }
    fixed (byte* b0 = arr0, b1 = arr1)
    {
#if NETCOREAPP3_0
        if (Avx2.IsSupported)
        {
            return Compare256(b0, b1, arr0.Length);
        }
        else if (Sse2.IsSupported)
        {
            return Compare128(b0, b1, arr0.Length);
        }
        else
#endif
        {
            return Compare64(b0, b1, arr0.Length);
        }
    }
}
#if NETCOREAPP3_0
public static unsafe bool Compare256(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus128 = lastAddr - 128;
    const int mask = -1;
    while (b0 < lastAddrMinus128) // unroll the loop so that we are comparing 128 bytes at a time.
    {
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0), Avx.LoadVector256(b1))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 32), Avx.LoadVector256(b1 + 32))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 64), Avx.LoadVector256(b1 + 64))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 96), Avx.LoadVector256(b1 + 96))) != mask)
        {
            return false;
        }
        b0 += 128;
        b1 += 128;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
public static unsafe bool Compare128(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus64 = lastAddr - 64;
    const int mask = 0xFFFF;
    while (b0 < lastAddrMinus64) // unroll the loop so that we are comparing 64 bytes at a time.
    {
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0), Sse2.LoadVector128(b1))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 16), Sse2.LoadVector128(b1 + 16))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 32), Sse2.LoadVector128(b1 + 32))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 48), Sse2.LoadVector128(b1 + 48))) != mask)
        {
            return false;
        }
        b0 += 64;
        b1 += 64;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
#endif
public static unsafe bool Compare64(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus32 = lastAddr - 32;
    while (b0 < lastAddrMinus32) // unroll the loop so that we are comparing 32 bytes at a time.
    {
        if (*(ulong*)b0 != *(ulong*)b1) return false;
        if (*(ulong*)(b0 + 8) != *(ulong*)(b1 + 8)) return false;
        if (*(ulong*)(b0 + 16) != *(ulong*)(b1 + 16)) return false;
        if (*(ulong*)(b0 + 24) != *(ulong*)(b1 + 24)) return false;
        b0 += 32;
        b1 += 32;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}

其他回答

抱歉,如果你正在寻找一种管理的方式,你已经正确地做了,据我所知,在BCL中没有内置的方法来做这个。

你应该添加一些初始的空检查,然后重用它,就好像它在BCL。

这与其他方法类似,但这里的不同之处在于,不存在我可以一次检查的下一个最高字节数,例如,如果我有63个字节(在我的SIMD示例中),我可以检查前32个字节的相等性,然后是后32个字节,这比检查32个字节、16个字节、8个字节等等要快。您输入的第一个检查是比较所有字节所需要的唯一检查。

这确实在我的测试中名列前茅,但仅以微弱之差。

下面的代码正是我在airbreather/ArrayComparePerf.cs中测试它的方式。

public unsafe bool SIMDNoFallThrough()    #requires  System.Runtime.Intrinsics.X86
{
    if (a1 == null || a2 == null)
        return false;

    int length0 = a1.Length;

    if (length0 != a2.Length) return false;

    fixed (byte* b00 = a1, b01 = a2)
    {
        byte* b0 = b00, b1 = b01, last0 = b0 + length0, last1 = b1 + length0, last32 = last0 - 31;

        if (length0 > 31)
        {
            while (b0 < last32)
            {
                if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0), Avx.LoadVector256(b1))) != -1)
                    return false;
                b0 += 32;
                b1 += 32;
            }
            return Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(last0 - 32), Avx.LoadVector256(last1 - 32))) == -1;
        }

        if (length0 > 15)
        {
            if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0), Sse2.LoadVector128(b1))) != 65535)
                return false;
            return Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(last0 - 16), Sse2.LoadVector128(last1 - 16))) == 65535;
        }

        if (length0 > 7)
        {
            if (*(ulong*)b0 != *(ulong*)b1)
                return false;
            return *(ulong*)(last0 - 8) == *(ulong*)(last1 - 8);
        }

        if (length0 > 3)
        {
            if (*(uint*)b0 != *(uint*)b1)
                return false;
            return *(uint*)(last0 - 4) == *(uint*)(last1 - 4);
        }

        if (length0 > 1)
        {
            if (*(ushort*)b0 != *(ushort*)b1)
                return false;
            return *(ushort*)(last0 - 2) == *(ushort*)(last1 - 2);
        }

        return *b0 == *b1;
    }
}

如果没有首选的SIMD,与现有的longpointer算法相同的方法:

public unsafe bool LongPointersNoFallThrough()
{
    if (a1 == null || a2 == null || a1.Length != a2.Length)
        return false;
    fixed (byte* p1 = a1, p2 = a2)
    {
        byte* x1 = p1, x2 = p2;
        int l = a1.Length;
        if ((l & 8) != 0)
        {
            for (int i = 0; i < l / 8; i++, x1 += 8, x2 += 8)
                if (*(long*)x1 != *(long*)x2) return false;
            return *(long*)(x1 + (l - 8)) == *(long*)(x2 + (l - 8));
        }
        if ((l & 4) != 0)
        {
            if (*(int*)x1 != *(int*)x2) return false; x1 += 4; x2 += 4;
            return *(int*)(x1 + (l - 4)) == *(int*)(x2 + (l - 4));
        }
        if ((l & 2) != 0)
        {
            if (*(short*)x1 != *(short*)x2) return false; x1 += 2; x2 += 2;
            return *(short*)(x1 + (l - 2)) == *(short*)(x2 + (l - 2));
        }
        return *x1 == *x2;
    }
}

编辑:现代的快速方法是使用a1.SequenceEquals(a2)

用户gil提出了不安全的代码,产生了这个解决方案:

// Copyright (c) 2008-2013 Hafthor Stefansson
// Distributed under the MIT/X11 software license
// Ref: http://www.opensource.org/licenses/mit-license.php.
static unsafe bool UnsafeCompare(byte[] a1, byte[] a2) {
  unchecked {
    if(a1==a2) return true;
    if(a1==null || a2==null || a1.Length!=a2.Length)
      return false;
    fixed (byte* p1=a1, p2=a2) {
      byte* x1=p1, x2=p2;
      int l = a1.Length;
      for (int i=0; i < l/8; i++, x1+=8, x2+=8)
        if (*((long*)x1) != *((long*)x2)) return false;
      if ((l & 4)!=0) { if (*((int*)x1)!=*((int*)x2)) return false; x1+=4; x2+=4; }
      if ((l & 2)!=0) { if (*((short*)x1)!=*((short*)x2)) return false; x1+=2; x2+=2; }
      if ((l & 1)!=0) if (*((byte*)x1) != *((byte*)x2)) return false;
      return true;
    }
  }
}

它对尽可能多的数组进行基于64位的比较。这依赖于数组以qword对齐开始的事实。它会工作,如果不是qword对齐,只是没有那么快,如果它是。

它比简单的“for”循环快了大约7个计时器。使用j#库执行相当于原来的' for '循环。使用.SequenceEqual会慢7倍左右;我想只是因为它使用了ienumerator。movenext。我认为基于linq的解决方案至少会这么慢,甚至更糟。

我开发了一个方法,稍微击败memcmp() (plinth的答案)和非常轻微击败EqualBytesLongUnrolled() (Arek Bulski的答案)在我的PC上。基本上,它以4而不是8展开循环。

2019年3月30日更新:

从。net核心3.0开始,我们有了SIMD支持!

这个解决方案在我的PC上是最快的:

#if NETCOREAPP3_0
using System.Runtime.Intrinsics.X86;
#endif
…

public static unsafe bool Compare(byte[] arr0, byte[] arr1)
{
    if (arr0 == arr1)
    {
        return true;
    }
    if (arr0 == null || arr1 == null)
    {
        return false;
    }
    if (arr0.Length != arr1.Length)
    {
        return false;
    }
    if (arr0.Length == 0)
    {
        return true;
    }
    fixed (byte* b0 = arr0, b1 = arr1)
    {
#if NETCOREAPP3_0
        if (Avx2.IsSupported)
        {
            return Compare256(b0, b1, arr0.Length);
        }
        else if (Sse2.IsSupported)
        {
            return Compare128(b0, b1, arr0.Length);
        }
        else
#endif
        {
            return Compare64(b0, b1, arr0.Length);
        }
    }
}
#if NETCOREAPP3_0
public static unsafe bool Compare256(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus128 = lastAddr - 128;
    const int mask = -1;
    while (b0 < lastAddrMinus128) // unroll the loop so that we are comparing 128 bytes at a time.
    {
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0), Avx.LoadVector256(b1))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 32), Avx.LoadVector256(b1 + 32))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 64), Avx.LoadVector256(b1 + 64))) != mask)
        {
            return false;
        }
        if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 96), Avx.LoadVector256(b1 + 96))) != mask)
        {
            return false;
        }
        b0 += 128;
        b1 += 128;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
public static unsafe bool Compare128(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus64 = lastAddr - 64;
    const int mask = 0xFFFF;
    while (b0 < lastAddrMinus64) // unroll the loop so that we are comparing 64 bytes at a time.
    {
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0), Sse2.LoadVector128(b1))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 16), Sse2.LoadVector128(b1 + 16))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 32), Sse2.LoadVector128(b1 + 32))) != mask)
        {
            return false;
        }
        if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 48), Sse2.LoadVector128(b1 + 48))) != mask)
        {
            return false;
        }
        b0 += 64;
        b1 += 64;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}
#endif
public static unsafe bool Compare64(byte* b0, byte* b1, int length)
{
    byte* lastAddr = b0 + length;
    byte* lastAddrMinus32 = lastAddr - 32;
    while (b0 < lastAddrMinus32) // unroll the loop so that we are comparing 32 bytes at a time.
    {
        if (*(ulong*)b0 != *(ulong*)b1) return false;
        if (*(ulong*)(b0 + 8) != *(ulong*)(b1 + 8)) return false;
        if (*(ulong*)(b0 + 16) != *(ulong*)(b1 + 16)) return false;
        if (*(ulong*)(b0 + 24) != *(ulong*)(b1 + 24)) return false;
        b0 += 32;
        b1 += 32;
    }
    while (b0 < lastAddr)
    {
        if (*b0 != *b1) return false;
        b0++;
        b1++;
    }
    return true;
}

net 4中有一个新的内置解决方案——IStructuralEquatable

static bool ByteArrayCompare(byte[] a1, byte[] a2) 
{
    return StructuralComparisons.StructuralEqualityComparer.Equals(a1, a2);
}