我怎样才能做得快呢?
当然我可以这样做:
static bool ByteArrayCompare(byte[] a1, byte[] a2)
{
if (a1.Length != a2.Length)
return false;
for (int i=0; i<a1.Length; i++)
if (a1[i]!=a2[i])
return false;
return true;
}
但我正在寻找一个BCL函数或一些高度优化的已证明的方法来做到这一点。
java.util.Arrays.equals((sbyte[])(Array)a1, (sbyte[])(Array)a2);
工作得很好,但这似乎不适用于x64。
注意我的快速回答。
找不到一个我完全满意的解决方案(合理的性能,但没有不安全的代码/pinvoke),所以我想出了这个,没有真正的原创,但工作:
/// <summary>
///
/// </summary>
/// <param name="array1"></param>
/// <param name="array2"></param>
/// <param name="bytesToCompare"> 0 means compare entire arrays</param>
/// <returns></returns>
public static bool ArraysEqual(byte[] array1, byte[] array2, int bytesToCompare = 0)
{
if (array1.Length != array2.Length) return false;
var length = (bytesToCompare == 0) ? array1.Length : bytesToCompare;
var tailIdx = length - length % sizeof(Int64);
//check in 8 byte chunks
for (var i = 0; i < tailIdx; i += sizeof(Int64))
{
if (BitConverter.ToInt64(array1, i) != BitConverter.ToInt64(array2, i)) return false;
}
//check the remainder of the array, always shorter than 8 bytes
for (var i = tailIdx; i < length; i++)
{
if (array1[i] != array2[i]) return false;
}
return true;
}
与本页上的其他解决方案相比,性能:
简单循环:19837滴答,1.00
*位收敛器:4886 ticks, 4.06
unsafcompare: 1636 ticks, 12.12
EqualBytesLongUnrolled: 637 tick, 31.09
P/Invoke memcmp: 369 ticks, 53.67
在linqpad上测试,1000000字节的相同数组(最坏的情况),每个数组500次迭代。
受到ArekBulski发布的EqualBytesLongUnrolled方法的启发,我确定了一个附加优化的解决方案。在我的实例中,数组中的数组差异往往在数组的尾部附近。在测试中,我发现当这种情况发生在大型数组中时,能够以相反的顺序比较数组元素使这种解决方案比基于memcmp的解决方案获得了巨大的性能提升。下面是解决方案:
public enum CompareDirection { Forward, Backward }
private static unsafe bool UnsafeEquals(byte[] a, byte[] b, CompareDirection direction = CompareDirection.Forward)
{
// returns when a and b are same array or both null
if (a == b) return true;
// if either is null or different lengths, can't be equal
if (a == null || b == null || a.Length != b.Length)
return false;
const int UNROLLED = 16; // count of longs 'unrolled' in optimization
int size = sizeof(long) * UNROLLED; // 128 bytes (min size for 'unrolled' optimization)
int len = a.Length;
int n = len / size; // count of full 128 byte segments
int r = len % size; // count of remaining 'unoptimized' bytes
// pin the arrays and access them via pointers
fixed (byte* pb_a = a, pb_b = b)
{
if (r > 0 && direction == CompareDirection.Backward)
{
byte* pa = pb_a + len - 1;
byte* pb = pb_b + len - 1;
byte* phead = pb_a + len - r;
while(pa >= phead)
{
if (*pa != *pb) return false;
pa--;
pb--;
}
}
if (n > 0)
{
int nOffset = n * size;
if (direction == CompareDirection.Forward)
{
long* pa = (long*)pb_a;
long* pb = (long*)pb_b;
long* ptail = (long*)(pb_a + nOffset);
while (pa < ptail)
{
if (*(pa + 0) != *(pb + 0) || *(pa + 1) != *(pb + 1) ||
*(pa + 2) != *(pb + 2) || *(pa + 3) != *(pb + 3) ||
*(pa + 4) != *(pb + 4) || *(pa + 5) != *(pb + 5) ||
*(pa + 6) != *(pb + 6) || *(pa + 7) != *(pb + 7) ||
*(pa + 8) != *(pb + 8) || *(pa + 9) != *(pb + 9) ||
*(pa + 10) != *(pb + 10) || *(pa + 11) != *(pb + 11) ||
*(pa + 12) != *(pb + 12) || *(pa + 13) != *(pb + 13) ||
*(pa + 14) != *(pb + 14) || *(pa + 15) != *(pb + 15)
)
{
return false;
}
pa += UNROLLED;
pb += UNROLLED;
}
}
else
{
long* pa = (long*)(pb_a + nOffset);
long* pb = (long*)(pb_b + nOffset);
long* phead = (long*)pb_a;
while (phead < pa)
{
if (*(pa - 1) != *(pb - 1) || *(pa - 2) != *(pb - 2) ||
*(pa - 3) != *(pb - 3) || *(pa - 4) != *(pb - 4) ||
*(pa - 5) != *(pb - 5) || *(pa - 6) != *(pb - 6) ||
*(pa - 7) != *(pb - 7) || *(pa - 8) != *(pb - 8) ||
*(pa - 9) != *(pb - 9) || *(pa - 10) != *(pb - 10) ||
*(pa - 11) != *(pb - 11) || *(pa - 12) != *(pb - 12) ||
*(pa - 13) != *(pb - 13) || *(pa - 14) != *(pb - 14) ||
*(pa - 15) != *(pb - 15) || *(pa - 16) != *(pb - 16)
)
{
return false;
}
pa -= UNROLLED;
pb -= UNROLLED;
}
}
}
if (r > 0 && direction == CompareDirection.Forward)
{
byte* pa = pb_a + len - r;
byte* pb = pb_b + len - r;
byte* ptail = pb_a + len;
while(pa < ptail)
{
if (*pa != *pb) return false;
pa++;
pb++;
}
}
}
return true;
}
这与其他方法类似,但这里的不同之处在于,不存在我可以一次检查的下一个最高字节数,例如,如果我有63个字节(在我的SIMD示例中),我可以检查前32个字节的相等性,然后是后32个字节,这比检查32个字节、16个字节、8个字节等等要快。您输入的第一个检查是比较所有字节所需要的唯一检查。
这确实在我的测试中名列前茅,但仅以微弱之差。
下面的代码正是我在airbreather/ArrayComparePerf.cs中测试它的方式。
public unsafe bool SIMDNoFallThrough() #requires System.Runtime.Intrinsics.X86
{
if (a1 == null || a2 == null)
return false;
int length0 = a1.Length;
if (length0 != a2.Length) return false;
fixed (byte* b00 = a1, b01 = a2)
{
byte* b0 = b00, b1 = b01, last0 = b0 + length0, last1 = b1 + length0, last32 = last0 - 31;
if (length0 > 31)
{
while (b0 < last32)
{
if (Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(b0), Avx.LoadVector256(b1))) != -1)
return false;
b0 += 32;
b1 += 32;
}
return Avx2.MoveMask(Avx2.CompareEqual(Avx.LoadVector256(last0 - 32), Avx.LoadVector256(last1 - 32))) == -1;
}
if (length0 > 15)
{
if (Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(b0), Sse2.LoadVector128(b1))) != 65535)
return false;
return Sse2.MoveMask(Sse2.CompareEqual(Sse2.LoadVector128(last0 - 16), Sse2.LoadVector128(last1 - 16))) == 65535;
}
if (length0 > 7)
{
if (*(ulong*)b0 != *(ulong*)b1)
return false;
return *(ulong*)(last0 - 8) == *(ulong*)(last1 - 8);
}
if (length0 > 3)
{
if (*(uint*)b0 != *(uint*)b1)
return false;
return *(uint*)(last0 - 4) == *(uint*)(last1 - 4);
}
if (length0 > 1)
{
if (*(ushort*)b0 != *(ushort*)b1)
return false;
return *(ushort*)(last0 - 2) == *(ushort*)(last1 - 2);
}
return *b0 == *b1;
}
}
如果没有首选的SIMD,与现有的longpointer算法相同的方法:
public unsafe bool LongPointersNoFallThrough()
{
if (a1 == null || a2 == null || a1.Length != a2.Length)
return false;
fixed (byte* p1 = a1, p2 = a2)
{
byte* x1 = p1, x2 = p2;
int l = a1.Length;
if ((l & 8) != 0)
{
for (int i = 0; i < l / 8; i++, x1 += 8, x2 += 8)
if (*(long*)x1 != *(long*)x2) return false;
return *(long*)(x1 + (l - 8)) == *(long*)(x2 + (l - 8));
}
if ((l & 4) != 0)
{
if (*(int*)x1 != *(int*)x2) return false; x1 += 4; x2 += 4;
return *(int*)(x1 + (l - 4)) == *(int*)(x2 + (l - 4));
}
if ((l & 2) != 0)
{
if (*(short*)x1 != *(short*)x2) return false; x1 += 2; x2 += 2;
return *(short*)(x1 + (l - 2)) == *(short*)(x2 + (l - 2));
}
return *x1 == *x2;
}
}
. net 3.5及更新版本有一个新的公共类型System.Data.Linq.Binary,它封装了byte[]。它实现了IEquatable<Binary>,(实际上)比较两个字节数组。注意System.Data.Linq.Binary也有来自byte[]的隐式转换运算符。
MSDN文档:System.Data.Linq.Binary
Equals方法的反射器反编译:
private bool EqualsTo(Binary binary)
{
if (this != binary)
{
if (binary == null)
{
return false;
}
if (this.bytes.Length != binary.bytes.Length)
{
return false;
}
if (this.hashCode != binary.hashCode)
{
return false;
}
int index = 0;
int length = this.bytes.Length;
while (index < length)
{
if (this.bytes[index] != binary.bytes[index])
{
return false;
}
index++;
}
}
return true;
}
有趣的是,只有当两个Binary对象的哈希值相同时,它们才会进行逐字节比较循环。然而,这是以在二进制对象的构造函数中计算哈希值为代价的(通过使用for loop:-)遍历数组)。
上述实现意味着,在最坏的情况下,您可能必须遍历数组三次:首先计算array1的哈希值,然后计算array2的哈希值,最后(因为这是最坏的情况,长度和哈希值相等)比较array1中的字节和数组2中的字节。
总的来说,即使System.Data.Linq.Binary被内置到BCL中,我不认为这是比较两个字节数组的最快方法:-|。