如何将字节数组转换为十六进制字符串,反之亦然?


当前回答

这是我的尝试。我创建了一对扩展类来扩展字符串和字节。在大文件测试中,性能与Byte Manipulation 2相当。

下面的ToHexString代码是查找和移位算法的优化实现。它与Behrooz的方法几乎相同,但使用foreach进行迭代,计数器比显式索引更快。

在我的机器上,它排在Byte Manipulation 2之后,排在第二位,是非常可读的代码。以下测试结果也值得关注:

ToHexStringCharArrayWithCharArrayLookup:41589.69平均刻度(超过1000次),1.5倍ToHexStringCharArrayWithStringLookup:50764.06平均滴答(超过1000次),1.2XToHexStringStringBuilderWithCharArrayLookup:62812.87平均滴答(超过1000次),1.0X

根据上述结果,可以得出以下结论:

索引到字符串以执行查找与char数组在大型文件测试中非常重要。使用已知容量的StringBuilder与使用字符的惩罚创建字符串的已知大小的数组甚至更重要。

代码如下:

using System;

namespace ConversionExtensions
{
    public static class ByteArrayExtensions
    {
        private readonly static char[] digits = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };

        public static string ToHexString(this byte[] bytes)
        {
            char[] hex = new char[bytes.Length * 2];
            int index = 0;

            foreach (byte b in bytes)
            {
                hex[index++] = digits[b >> 4];
                hex[index++] = digits[b & 0x0F];
            }

            return new string(hex);
        }
    }
}


using System;
using System.IO;

namespace ConversionExtensions
{
    public static class StringExtensions
    {
        public static byte[] ToBytes(this string hexString)
        {
            if (!string.IsNullOrEmpty(hexString) && hexString.Length % 2 != 0)
            {
                throw new FormatException("Hexadecimal string must not be empty and must contain an even number of digits to be valid.");
            }

            hexString = hexString.ToUpperInvariant();
            byte[] data = new byte[hexString.Length / 2];

            for (int index = 0; index < hexString.Length; index += 2)
            {
                int highDigitValue = hexString[index] <= '9' ? hexString[index] - '0' : hexString[index] - 'A' + 10;
                int lowDigitValue = hexString[index + 1] <= '9' ? hexString[index + 1] - '0' : hexString[index + 1] - 'A' + 10;

                if (highDigitValue < 0 || lowDigitValue < 0 || highDigitValue > 15 || lowDigitValue > 15)
                {
                    throw new FormatException("An invalid digit was encountered. Valid hexadecimal digits are 0-9 and A-F.");
                }
                else
                {
                    byte value = (byte)((highDigitValue << 4) | (lowDigitValue & 0x0F));
                    data[index / 2] = value;
                }
            }

            return data;
        }
    }
}

下面是当我将代码放在我机器上的@patridge测试项目中时得到的测试结果。我还添加了一个从十六进制转换为字节数组的测试。使用我的代码的测试运行是ByteArrayToHexViaOptimizedLookupAndShift和HexToByteArrayViaByteManipulation。HexToByteArrayViaConvertToByte取自XXXX。HexToByteArrayViaSoapHexBinary是@Mykroft的答案。

Intel Pentium III Xeon处理器核心:4<br/>当前时钟速度:1576<br/>最大时钟速度:3092<br/>将字节数组转换为十六进制字符串表示ByteArrayToHexViaByteManipulation2:39366.64平均滴答(超过1000次),22.4倍ByteArrayToHexViaOptimizedLookupAndShift:41588.64平均刻度(超过1000次),21.2倍ByteArrayToHexViaLookup:55509.56次平均点击(超过1000次),15.9倍ByteArrayToHexViaByteManipulation:65349.12平均刻度(超过1000次),13.5XByteArrayToHexViaLookupAndShift:86926.87平均刻度(超过1000运行),10.2XByteArrayToHexStringViaBitConverter:平均139353.73滴答声(超过1000次),6.3XByteArrayToHexViaSoapHexBinary:314598.77平均刻度(超过1000次),2.8XByteArrayToHexStringViaStringBuilderForEachByteToString:344264.63平均刻度(超过1000次),2.6XByteArrayToHexStringViaStringBuilderAggregateByteToString:382623.44平均滴答声(超过1000次),2.3XByteArrayToHexStringViaStringBuilderForEachAppend格式:818111.95平均滴答声(超过1000次),1.1倍ByteArrayToHexStringViaStringConcatArray ConvertAll:839424.84平均值滴答声(超过1000次),1.1XByteArrayToHexStringViaStringBuilderAggregateAppend格式:867303.98平均刻度(超过1000次),1.0XByteArrayToHexStringViaStringJoinArray ConvertAll:平均882710.28滴答声(超过1000次),1.0X

其他回答

另一个快速功能。。。

private static readonly byte[] HexNibble = new byte[] {
    0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
    0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0,
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF
};

public static byte[] HexStringToByteArray( string str )
{
    int byteCount = str.Length >> 1;
    byte[] result = new byte[byteCount + (str.Length & 1)];
    for( int i = 0; i < byteCount; i++ )
        result[i] = (byte) (HexNibble[str[i << 1] - 48] << 4 | HexNibble[str[(i << 1) + 1] - 48]);
    if( (str.Length & 1) != 0 )
        result[byteCount] = (byte) HexNibble[str[str.Length - 1] - 48];
    return result;
}

多样性的另一种变化:

public static byte[] FromHexString(string src)
{
    if (String.IsNullOrEmpty(src))
        return null;

    int index = src.Length;
    int sz = index / 2;
    if (sz <= 0)
        return null;

    byte[] rc = new byte[sz];

    while (--sz >= 0)
    {
        char lo = src[--index];
        char hi = src[--index];

        rc[sz] = (byte)(
            (
                (hi >= '0' && hi <= '9') ? hi - '0' :
                (hi >= 'a' && hi <= 'f') ? hi - 'a' + 10 :
                (hi >= 'A' && hi <= 'F') ? hi - 'A' + 10 :
                0
            )
            << 4 | 
            (
                (lo >= '0' && lo <= '9') ? lo - '0' :
                (lo >= 'a' && lo <= 'f') ? lo - 'a' + 10 :
                (lo >= 'A' && lo <= 'F') ? lo - 'A' + 10 :
                0
            )
        );
    }

    return rc;          
}

下面还通过允许本机小写选项扩展了这里的优秀答案,并且还处理null或空输入,并使其成为扩展方法。

    /// <summary>
    /// Converts the byte array to a hex string very fast. Excellent job
    /// with code lightly adapted from 'community wiki' here: https://stackoverflow.com/a/14333437/264031
    /// (the function was originally named: ByteToHexBitFiddle). Now allows a native lowerCase option
    /// to be input and allows null or empty inputs (null returns null, empty returns empty).
    /// </summary>
    public static string ToHexString(this byte[] bytes, bool lowerCase = false)
    {
        if (bytes == null)
            return null;
        else if (bytes.Length == 0)
            return "";

        char[] c = new char[bytes.Length * 2];

        int b;
        int xAddToAlpha = lowerCase ? 87 : 55;
        int xAddToDigit = lowerCase ? -39 : -7;

        for (int i = 0; i < bytes.Length; i++) {

            b = bytes[i] >> 4;
            c[i * 2] = (char)(xAddToAlpha + b + (((b - 10) >> 31) & xAddToDigit));

            b = bytes[i] & 0xF;
            c[i * 2 + 1] = (char)(xAddToAlpha + b + (((b - 10) >> 31) & xAddToDigit));
        }

        string val = new string(c);
        return val;
    }

    public static string ToHexString(this IEnumerable<byte> bytes, bool lowerCase = false)
    {
        if (bytes == null)
            return null;
        byte[] arr = bytes.ToArray();
        return arr.ToHexString(lowerCase);
    }

安全版本:

public static class HexHelper
{
    [System.Diagnostics.Contracts.Pure]
    public static string ToHex(this byte[] value)
    {
        if (value == null)
            throw new ArgumentNullException("value");

        const string hexAlphabet = @"0123456789ABCDEF";

        var chars = new char[checked(value.Length * 2)];
        unchecked
        {
            for (int i = 0; i < value.Length; i++)
            {
                chars[i * 2] = hexAlphabet[value[i] >> 4];
                chars[i * 2 + 1] = hexAlphabet[value[i] & 0xF];
            }
        }
        return new string(chars);
    }

    [System.Diagnostics.Contracts.Pure]
    public static byte[] FromHex(this string value)
    {
        if (value == null)
            throw new ArgumentNullException("value");
        if (value.Length % 2 != 0)
            throw new ArgumentException("Hexadecimal value length must be even.", "value");

        unchecked
        {
            byte[] result = new byte[value.Length / 2];
            for (int i = 0; i < result.Length; i++)
            {
                // 0(48) - 9(57) -> 0 - 9
                // A(65) - F(70) -> 10 - 15
                int b = value[i * 2]; // High 4 bits.
                int val = ((b - '0') + ((('9' - b) >> 31) & -7)) << 4;
                b = value[i * 2 + 1]; // Low 4 bits.
                val += (b - '0') + ((('9' - b) >> 31) & -7);
                result[i] = checked((byte)val);
            }
            return result;
        }
    }
}

不安全版本适用于那些喜欢性能且不怕不安全的人。ToHex快35%,FromHex快10%。

public static class HexUnsafeHelper
{
    [System.Diagnostics.Contracts.Pure]
    public static unsafe string ToHex(this byte[] value)
    {
        if (value == null)
            throw new ArgumentNullException("value");

        const string alphabet = @"0123456789ABCDEF";

        string result = new string(' ', checked(value.Length * 2));
        fixed (char* alphabetPtr = alphabet)
        fixed (char* resultPtr = result)
        {
            char* ptr = resultPtr;
            unchecked
            {
                for (int i = 0; i < value.Length; i++)
                {
                    *ptr++ = *(alphabetPtr + (value[i] >> 4));
                    *ptr++ = *(alphabetPtr + (value[i] & 0xF));
                }
            }
        }
        return result;
    }

    [System.Diagnostics.Contracts.Pure]
    public static unsafe byte[] FromHex(this string value)
    {
        if (value == null)
            throw new ArgumentNullException("value");
        if (value.Length % 2 != 0)
            throw new ArgumentException("Hexadecimal value length must be even.", "value");

        unchecked
        {
            byte[] result = new byte[value.Length / 2];
            fixed (char* valuePtr = value)
            {
                char* valPtr = valuePtr;
                for (int i = 0; i < result.Length; i++)
                {
                    // 0(48) - 9(57) -> 0 - 9
                    // A(65) - F(70) -> 10 - 15
                    int b = *valPtr++; // High 4 bits.
                    int val = ((b - '0') + ((('9' - b) >> 31) & -7)) << 4;
                    b = *valPtr++; // Low 4 bits.
                    val += (b - '0') + ((('9' - b) >> 31) & -7);
                    result[i] = checked((byte)val);
                }
            }
            return result;
        }
    }
}

顺便提一下对于每次调用的转换函数错误时初始化字母表的基准测试,字母表必须是常量(对于字符串)或静态只读(对于字符[])。然后,基于字母表的字节[]到字符串的转换变得和字节操作版本一样快。

当然,测试必须在Release中编译(带有优化),并关闭调试选项“抑制JIT优化”(如果代码必须可调试,则“仅启用我的代码”也是如此)。

我将参加这个比特拨弄比赛,因为我有一个同样使用比特拨弄来解码十六进制的答案。请注意,使用字符数组可能会更快,因为调用StringBuilder方法也需要时间。

public static String ToHex (byte[] data)
{
    int dataLength = data.Length;
    // pre-create the stringbuilder using the length of the data * 2, precisely enough
    StringBuilder sb = new StringBuilder (dataLength * 2);
    for (int i = 0; i < dataLength; i++) {
        int b = data [i];

        // check using calculation over bits to see if first tuple is a letter
        // isLetter is zero if it is a digit, 1 if it is a letter
        int isLetter = (b >> 7) & ((b >> 6) | (b >> 5)) & 1;

        // calculate the code using a multiplication to make up the difference between
        // a digit character and an alphanumerical character
        int code = '0' + ((b >> 4) & 0xF) + isLetter * ('A' - '9' - 1);
        // now append the result, after casting the code point to a character
        sb.Append ((Char)code);

        // do the same with the lower (less significant) tuple
        isLetter = (b >> 3) & ((b >> 2) | (b >> 1)) & 1;
        code = '0' + (b & 0xF) + isLetter * ('A' - '9' - 1);
        sb.Append ((Char)code);
    }
    return sb.ToString ();
}

public static byte[] FromHex (String hex)
{

    // pre-create the array
    int resultLength = hex.Length / 2;
    byte[] result = new byte[resultLength];
    // set validity = 0 (0 = valid, anything else is not valid)
    int validity = 0;
    int c, isLetter, value, validDigitStruct, validDigit, validLetterStruct, validLetter;
    for (int i = 0, hexOffset = 0; i < resultLength; i++, hexOffset += 2) {
        c = hex [hexOffset];

        // check using calculation over bits to see if first char is a letter
        // isLetter is zero if it is a digit, 1 if it is a letter (upper & lowercase)
        isLetter = (c >> 6) & 1;

        // calculate the tuple value using a multiplication to make up the difference between
        // a digit character and an alphanumerical character
        // minus 1 for the fact that the letters are not zero based
        value = ((c & 0xF) + isLetter * (-1 + 10)) << 4;

        // check validity of all the other bits
        validity |= c >> 7; // changed to >>, maybe not OK, use UInt?

        validDigitStruct = (c & 0x30) ^ 0x30;
        validDigit = ((c & 0x8) >> 3) * (c & 0x6);
        validity |= (isLetter ^ 1) * (validDigitStruct | validDigit);

        validLetterStruct = c & 0x18;
        validLetter = (((c - 1) & 0x4) >> 2) * ((c - 1) & 0x2);
        validity |= isLetter * (validLetterStruct | validLetter);

        // do the same with the lower (less significant) tuple
        c = hex [hexOffset + 1];
        isLetter = (c >> 6) & 1;
        value ^= (c & 0xF) + isLetter * (-1 + 10);
        result [i] = (byte)value;

        // check validity of all the other bits
        validity |= c >> 7; // changed to >>, maybe not OK, use UInt?

        validDigitStruct = (c & 0x30) ^ 0x30;
        validDigit = ((c & 0x8) >> 3) * (c & 0x6);
        validity |= (isLetter ^ 1) * (validDigitStruct | validDigit);

        validLetterStruct = c & 0x18;
        validLetter = (((c - 1) & 0x4) >> 2) * ((c - 1) & 0x2);
        validity |= isLetter * (validLetterStruct | validLetter);
    }

    if (validity != 0) {
        throw new ArgumentException ("Hexadecimal encoding incorrect for input " + hex);
    }

    return result;
}

从Java代码转换而来。