我怎样才能做得快呢?
当然我可以这样做:
static bool ByteArrayCompare(byte[] a1, byte[] a2)
{
if (a1.Length != a2.Length)
return false;
for (int i=0; i<a1.Length; i++)
if (a1[i]!=a2[i])
return false;
return true;
}
但我正在寻找一个BCL函数或一些高度优化的已证明的方法来做到这一点。
java.util.Arrays.equals((sbyte[])(Array)a1, (sbyte[])(Array)a2);
工作得很好,但这似乎不适用于x64。
注意我的快速回答。
受到ArekBulski发布的EqualBytesLongUnrolled方法的启发,我确定了一个附加优化的解决方案。在我的实例中,数组中的数组差异往往在数组的尾部附近。在测试中,我发现当这种情况发生在大型数组中时,能够以相反的顺序比较数组元素使这种解决方案比基于memcmp的解决方案获得了巨大的性能提升。下面是解决方案:
public enum CompareDirection { Forward, Backward }
private static unsafe bool UnsafeEquals(byte[] a, byte[] b, CompareDirection direction = CompareDirection.Forward)
{
// returns when a and b are same array or both null
if (a == b) return true;
// if either is null or different lengths, can't be equal
if (a == null || b == null || a.Length != b.Length)
return false;
const int UNROLLED = 16; // count of longs 'unrolled' in optimization
int size = sizeof(long) * UNROLLED; // 128 bytes (min size for 'unrolled' optimization)
int len = a.Length;
int n = len / size; // count of full 128 byte segments
int r = len % size; // count of remaining 'unoptimized' bytes
// pin the arrays and access them via pointers
fixed (byte* pb_a = a, pb_b = b)
{
if (r > 0 && direction == CompareDirection.Backward)
{
byte* pa = pb_a + len - 1;
byte* pb = pb_b + len - 1;
byte* phead = pb_a + len - r;
while(pa >= phead)
{
if (*pa != *pb) return false;
pa--;
pb--;
}
}
if (n > 0)
{
int nOffset = n * size;
if (direction == CompareDirection.Forward)
{
long* pa = (long*)pb_a;
long* pb = (long*)pb_b;
long* ptail = (long*)(pb_a + nOffset);
while (pa < ptail)
{
if (*(pa + 0) != *(pb + 0) || *(pa + 1) != *(pb + 1) ||
*(pa + 2) != *(pb + 2) || *(pa + 3) != *(pb + 3) ||
*(pa + 4) != *(pb + 4) || *(pa + 5) != *(pb + 5) ||
*(pa + 6) != *(pb + 6) || *(pa + 7) != *(pb + 7) ||
*(pa + 8) != *(pb + 8) || *(pa + 9) != *(pb + 9) ||
*(pa + 10) != *(pb + 10) || *(pa + 11) != *(pb + 11) ||
*(pa + 12) != *(pb + 12) || *(pa + 13) != *(pb + 13) ||
*(pa + 14) != *(pb + 14) || *(pa + 15) != *(pb + 15)
)
{
return false;
}
pa += UNROLLED;
pb += UNROLLED;
}
}
else
{
long* pa = (long*)(pb_a + nOffset);
long* pb = (long*)(pb_b + nOffset);
long* phead = (long*)pb_a;
while (phead < pa)
{
if (*(pa - 1) != *(pb - 1) || *(pa - 2) != *(pb - 2) ||
*(pa - 3) != *(pb - 3) || *(pa - 4) != *(pb - 4) ||
*(pa - 5) != *(pb - 5) || *(pa - 6) != *(pb - 6) ||
*(pa - 7) != *(pb - 7) || *(pa - 8) != *(pb - 8) ||
*(pa - 9) != *(pb - 9) || *(pa - 10) != *(pb - 10) ||
*(pa - 11) != *(pb - 11) || *(pa - 12) != *(pb - 12) ||
*(pa - 13) != *(pb - 13) || *(pa - 14) != *(pb - 14) ||
*(pa - 15) != *(pb - 15) || *(pa - 16) != *(pb - 16)
)
{
return false;
}
pa -= UNROLLED;
pb -= UNROLLED;
}
}
}
if (r > 0 && direction == CompareDirection.Forward)
{
byte* pa = pb_a + len - r;
byte* pb = pb_b + len - r;
byte* ptail = pb_a + len;
while(pa < ptail)
{
if (*pa != *pb) return false;
pa++;
pb++;
}
}
}
return true;
}
对于那些关心顺序的人(即希望你的memcmp返回一个int而不是什么都没有),. net Core 3.0(以及。net Standard 2.1也就是。net 5.0)将包括一个Span.SequenceCompareTo(…)扩展方法(加上一个Span.SequenceEqualTo),可以用来比较两个ReadOnlySpan<T>实例(其中T: IComparable<T>)。
在最初的GitHub提案中,讨论了与跳转表计算的方法比较,将字节[]读为长[],SIMD使用,以及对CLR实现的memcmp的p/调用。
继续向前,这应该是您比较字节数组或字节范围的首选方法(对于. net Standard 2.1 api,应该使用Span<byte>而不是byte[]),并且它足够快,您应该不再关心优化它(不,尽管在名称上有相似之处,但它的性能不像可怕的Enumerable.SequenceEqual那样糟糕)。
#if NETCOREAPP3_0_OR_GREATER
// Using the platform-native Span<T>.SequenceEqual<T>(..)
public static int Compare(byte[] range1, int offset1, byte[] range2, int offset2, int count)
{
var span1 = range1.AsSpan(offset1, count);
var span2 = range2.AsSpan(offset2, count);
return span1.SequenceCompareTo(span2);
// or, if you don't care about ordering
// return span1.SequenceEqual(span2);
}
#else
// The most basic implementation, in platform-agnostic, safe C#
public static bool Compare(byte[] range1, int offset1, byte[] range2, int offset2, int count)
{
// Working backwards lets the compiler optimize away bound checking after the first loop
for (int i = count - 1; i >= 0; --i)
{
if (range1[offset1 + i] != range2[offset2 + i])
{
return false;
}
}
return true;
}
#endif
受到ArekBulski发布的EqualBytesLongUnrolled方法的启发,我确定了一个附加优化的解决方案。在我的实例中,数组中的数组差异往往在数组的尾部附近。在测试中,我发现当这种情况发生在大型数组中时,能够以相反的顺序比较数组元素使这种解决方案比基于memcmp的解决方案获得了巨大的性能提升。下面是解决方案:
public enum CompareDirection { Forward, Backward }
private static unsafe bool UnsafeEquals(byte[] a, byte[] b, CompareDirection direction = CompareDirection.Forward)
{
// returns when a and b are same array or both null
if (a == b) return true;
// if either is null or different lengths, can't be equal
if (a == null || b == null || a.Length != b.Length)
return false;
const int UNROLLED = 16; // count of longs 'unrolled' in optimization
int size = sizeof(long) * UNROLLED; // 128 bytes (min size for 'unrolled' optimization)
int len = a.Length;
int n = len / size; // count of full 128 byte segments
int r = len % size; // count of remaining 'unoptimized' bytes
// pin the arrays and access them via pointers
fixed (byte* pb_a = a, pb_b = b)
{
if (r > 0 && direction == CompareDirection.Backward)
{
byte* pa = pb_a + len - 1;
byte* pb = pb_b + len - 1;
byte* phead = pb_a + len - r;
while(pa >= phead)
{
if (*pa != *pb) return false;
pa--;
pb--;
}
}
if (n > 0)
{
int nOffset = n * size;
if (direction == CompareDirection.Forward)
{
long* pa = (long*)pb_a;
long* pb = (long*)pb_b;
long* ptail = (long*)(pb_a + nOffset);
while (pa < ptail)
{
if (*(pa + 0) != *(pb + 0) || *(pa + 1) != *(pb + 1) ||
*(pa + 2) != *(pb + 2) || *(pa + 3) != *(pb + 3) ||
*(pa + 4) != *(pb + 4) || *(pa + 5) != *(pb + 5) ||
*(pa + 6) != *(pb + 6) || *(pa + 7) != *(pb + 7) ||
*(pa + 8) != *(pb + 8) || *(pa + 9) != *(pb + 9) ||
*(pa + 10) != *(pb + 10) || *(pa + 11) != *(pb + 11) ||
*(pa + 12) != *(pb + 12) || *(pa + 13) != *(pb + 13) ||
*(pa + 14) != *(pb + 14) || *(pa + 15) != *(pb + 15)
)
{
return false;
}
pa += UNROLLED;
pb += UNROLLED;
}
}
else
{
long* pa = (long*)(pb_a + nOffset);
long* pb = (long*)(pb_b + nOffset);
long* phead = (long*)pb_a;
while (phead < pa)
{
if (*(pa - 1) != *(pb - 1) || *(pa - 2) != *(pb - 2) ||
*(pa - 3) != *(pb - 3) || *(pa - 4) != *(pb - 4) ||
*(pa - 5) != *(pb - 5) || *(pa - 6) != *(pb - 6) ||
*(pa - 7) != *(pb - 7) || *(pa - 8) != *(pb - 8) ||
*(pa - 9) != *(pb - 9) || *(pa - 10) != *(pb - 10) ||
*(pa - 11) != *(pb - 11) || *(pa - 12) != *(pb - 12) ||
*(pa - 13) != *(pb - 13) || *(pa - 14) != *(pb - 14) ||
*(pa - 15) != *(pb - 15) || *(pa - 16) != *(pb - 16)
)
{
return false;
}
pa -= UNROLLED;
pb -= UNROLLED;
}
}
}
if (r > 0 && direction == CompareDirection.Forward)
{
byte* pa = pb_a + len - r;
byte* pb = pb_b + len - r;
byte* ptail = pb_a + len;
while(pa < ptail)
{
if (*pa != *pb) return false;
pa++;
pb++;
}
}
}
return true;
}
我发布了一个类似的关于检查byte[]是否全是0的问题。(SIMD代码被打败了,所以我从这个答案中删除了它。)下面是我比较过的最快的代码:
static unsafe bool EqualBytesLongUnrolled (byte[] data1, byte[] data2)
{
if (data1 == data2)
return true;
if (data1.Length != data2.Length)
return false;
fixed (byte* bytes1 = data1, bytes2 = data2) {
int len = data1.Length;
int rem = len % (sizeof(long) * 16);
long* b1 = (long*)bytes1;
long* b2 = (long*)bytes2;
long* e1 = (long*)(bytes1 + len - rem);
while (b1 < e1) {
if (*(b1) != *(b2) || *(b1 + 1) != *(b2 + 1) ||
*(b1 + 2) != *(b2 + 2) || *(b1 + 3) != *(b2 + 3) ||
*(b1 + 4) != *(b2 + 4) || *(b1 + 5) != *(b2 + 5) ||
*(b1 + 6) != *(b2 + 6) || *(b1 + 7) != *(b2 + 7) ||
*(b1 + 8) != *(b2 + 8) || *(b1 + 9) != *(b2 + 9) ||
*(b1 + 10) != *(b2 + 10) || *(b1 + 11) != *(b2 + 11) ||
*(b1 + 12) != *(b2 + 12) || *(b1 + 13) != *(b2 + 13) ||
*(b1 + 14) != *(b2 + 14) || *(b1 + 15) != *(b2 + 15))
return false;
b1 += 16;
b2 += 16;
}
for (int i = 0; i < rem; i++)
if (data1 [len - 1 - i] != data2 [len - 1 - i])
return false;
return true;
}
}
测量两个256MB字节数组:
UnsafeCompare : 86,8784 ms
EqualBytesSimd : 71,5125 ms
EqualBytesSimdUnrolled : 73,1917 ms
EqualBytesLongUnrolled : 39,8623 ms