我怎样才能做得快呢?
当然我可以这样做:
static bool ByteArrayCompare(byte[] a1, byte[] a2)
{
if (a1.Length != a2.Length)
return false;
for (int i=0; i<a1.Length; i++)
if (a1[i]!=a2[i])
return false;
return true;
}
但我正在寻找一个BCL函数或一些高度优化的已证明的方法来做到这一点。
java.util.Arrays.equals((sbyte[])(Array)a1, (sbyte[])(Array)a2);
工作得很好,但这似乎不适用于x64。
注意我的快速回答。
找不到一个我完全满意的解决方案(合理的性能,但没有不安全的代码/pinvoke),所以我想出了这个,没有真正的原创,但工作:
/// <summary>
///
/// </summary>
/// <param name="array1"></param>
/// <param name="array2"></param>
/// <param name="bytesToCompare"> 0 means compare entire arrays</param>
/// <returns></returns>
public static bool ArraysEqual(byte[] array1, byte[] array2, int bytesToCompare = 0)
{
if (array1.Length != array2.Length) return false;
var length = (bytesToCompare == 0) ? array1.Length : bytesToCompare;
var tailIdx = length - length % sizeof(Int64);
//check in 8 byte chunks
for (var i = 0; i < tailIdx; i += sizeof(Int64))
{
if (BitConverter.ToInt64(array1, i) != BitConverter.ToInt64(array2, i)) return false;
}
//check the remainder of the array, always shorter than 8 bytes
for (var i = tailIdx; i < length; i++)
{
if (array1[i] != array2[i]) return false;
}
return true;
}
与本页上的其他解决方案相比,性能:
简单循环:19837滴答,1.00
*位收敛器:4886 ticks, 4.06
unsafcompare: 1636 ticks, 12.12
EqualBytesLongUnrolled: 637 tick, 31.09
P/Invoke memcmp: 369 ticks, 53.67
在linqpad上测试,1000000字节的相同数组(最坏的情况),每个数组500次迭代。
你可以使用Enumerable。SequenceEqual方法。
using System;
using System.Linq;
...
var a1 = new int[] { 1, 2, 3};
var a2 = new int[] { 1, 2, 3};
var a3 = new int[] { 1, 2, 4};
var x = a1.SequenceEqual(a2); // true
var y = a1.SequenceEqual(a3); // false
如果你因为某些原因不能使用. net 3.5,你的方法是可以的。
编译器运行时环境会优化你的循环,所以你不需要担心性能。
我发布了一个类似的关于检查byte[]是否全是0的问题。(SIMD代码被打败了,所以我从这个答案中删除了它。)下面是我比较过的最快的代码:
static unsafe bool EqualBytesLongUnrolled (byte[] data1, byte[] data2)
{
if (data1 == data2)
return true;
if (data1.Length != data2.Length)
return false;
fixed (byte* bytes1 = data1, bytes2 = data2) {
int len = data1.Length;
int rem = len % (sizeof(long) * 16);
long* b1 = (long*)bytes1;
long* b2 = (long*)bytes2;
long* e1 = (long*)(bytes1 + len - rem);
while (b1 < e1) {
if (*(b1) != *(b2) || *(b1 + 1) != *(b2 + 1) ||
*(b1 + 2) != *(b2 + 2) || *(b1 + 3) != *(b2 + 3) ||
*(b1 + 4) != *(b2 + 4) || *(b1 + 5) != *(b2 + 5) ||
*(b1 + 6) != *(b2 + 6) || *(b1 + 7) != *(b2 + 7) ||
*(b1 + 8) != *(b2 + 8) || *(b1 + 9) != *(b2 + 9) ||
*(b1 + 10) != *(b2 + 10) || *(b1 + 11) != *(b2 + 11) ||
*(b1 + 12) != *(b2 + 12) || *(b1 + 13) != *(b2 + 13) ||
*(b1 + 14) != *(b2 + 14) || *(b1 + 15) != *(b2 + 15))
return false;
b1 += 16;
b2 += 16;
}
for (int i = 0; i < rem; i++)
if (data1 [len - 1 - i] != data2 [len - 1 - i])
return false;
return true;
}
}
测量两个256MB字节数组:
UnsafeCompare : 86,8784 ms
EqualBytesSimd : 71,5125 ms
EqualBytesSimdUnrolled : 73,1917 ms
EqualBytesLongUnrolled : 39,8623 ms
我开发了一个方法,稍微击败memcmp() (plinth的答案)和非常轻微击败EqualBytesLongUnrolled() (Arek Bulski的答案)在我的PC上。基本上,它以4而不是8展开循环。
2019年3月30日更新:
从。net核心3.0开始,我们有了SIMD支持!
这个解决方案在我的PC上是最快的:
#if NETCOREAPP3_0
using System.Runtime.Intrinsics.X86;
#endif
…
public static unsafe bool Compare(byte[] arr0, byte[] arr1)
{
if (arr0 == arr1)
{
return true;
}
if (arr0 == null || arr1 == null)
{
return false;
}
if (arr0.Length != arr1.Length)
{
return false;
}
if (arr0.Length == 0)
{
return true;
}
fixed (byte* b0 = arr0, b1 = arr1)
{
#if NETCOREAPP3_0
if (Avx2.IsSupported)
{
return Compare256(b0, b1, arr0.Length);
}
else if (Sse2.IsSupported)
{
return Compare128(b0, b1, arr0.Length);
}
else
#endif
{
return Compare64(b0, b1, arr0.Length);
}
}
}
#if NETCOREAPP3_0
public static unsafe bool Compare256(byte* b0, byte* b1, int length)
{
byte* lastAddr = b0 + length;
byte* lastAddrMinus128 = lastAddr - 128;
const int mask = -1;
while (b0 < lastAddrMinus128) // unroll the loop so that we are comparing 128 bytes at a time.
{
if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0), Avx.LoadVector256(b1))) != mask)
{
return false;
}
if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 32), Avx.LoadVector256(b1 + 32))) != mask)
{
return false;
}
if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 64), Avx.LoadVector256(b1 + 64))) != mask)
{
return false;
}
if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0 + 96), Avx.LoadVector256(b1 + 96))) != mask)
{
return false;
}
b0 += 128;
b1 += 128;
}
while (b0 < lastAddr)
{
if (*b0 != *b1) return false;
b0++;
b1++;
}
return true;
}
public static unsafe bool Compare128(byte* b0, byte* b1, int length)
{
byte* lastAddr = b0 + length;
byte* lastAddrMinus64 = lastAddr - 64;
const int mask = 0xFFFF;
while (b0 < lastAddrMinus64) // unroll the loop so that we are comparing 64 bytes at a time.
{
if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0), Sse2.LoadVector128(b1))) != mask)
{
return false;
}
if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 16), Sse2.LoadVector128(b1 + 16))) != mask)
{
return false;
}
if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 32), Sse2.LoadVector128(b1 + 32))) != mask)
{
return false;
}
if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0 + 48), Sse2.LoadVector128(b1 + 48))) != mask)
{
return false;
}
b0 += 64;
b1 += 64;
}
while (b0 < lastAddr)
{
if (*b0 != *b1) return false;
b0++;
b1++;
}
return true;
}
#endif
public static unsafe bool Compare64(byte* b0, byte* b1, int length)
{
byte* lastAddr = b0 + length;
byte* lastAddrMinus32 = lastAddr - 32;
while (b0 < lastAddrMinus32) // unroll the loop so that we are comparing 32 bytes at a time.
{
if (*(ulong*)b0 != *(ulong*)b1) return false;
if (*(ulong*)(b0 + 8) != *(ulong*)(b1 + 8)) return false;
if (*(ulong*)(b0 + 16) != *(ulong*)(b1 + 16)) return false;
if (*(ulong*)(b0 + 24) != *(ulong*)(b1 + 24)) return false;
b0 += 32;
b1 += 32;
}
while (b0 < lastAddr)
{
if (*b0 != *b1) return false;
b0++;
b1++;
}
return true;
}