如何在c++中转换大端值和小端值?

为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。


注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。


当前回答

查找位移位,因为这基本上是所有你需要做的交换从小->大端dian。然后根据位的大小,改变位移位的方式。

其他回答

如果您这样做是为了在不同平台之间传输数据,请查看ntoh和hton函数。

从大端序到小端序的过程与从小端序到大端序的过程是一样的。

下面是一些示例代码:

void swapByteOrder(unsigned short& us)
{
    us = (us >> 8) |
         (us << 8);
}

void swapByteOrder(unsigned int& ui)
{
    ui = (ui >> 24) |
         ((ui<<8) & 0x00FF0000) |
         ((ui>>8) & 0x0000FF00) |
         (ui << 24);
}

void swapByteOrder(unsigned long long& ull)
{
    ull = (ull >> 56) |
          ((ull<<40) & 0x00FF000000000000) |
          ((ull<<24) & 0x0000FF0000000000) |
          ((ull<<8) & 0x000000FF00000000) |
          ((ull>>8) & 0x00000000FF000000) |
          ((ull>>24) & 0x0000000000FF0000) |
          ((ull>>40) & 0x000000000000FF00) |
          (ull << 56);
}

摘自Rob Pike的《字节顺序谬误》:

假设数据流有一个小端编码的32位整数。下面是如何提取它(假设无符号字节):

i = (data[0]<<0) | (data[1]<<8) | (data[2]<<16) | ((unsigned)data[3]<<24);

如果它是big-endian,下面是如何提取它:

i = (data[3]<<0) | (data[2]<<8) | (data[1]<<16) | ((unsigned)data[0]<<24);

TL;DR:不要担心你的平台原生顺序,重要的是你从中读取的流的字节顺序,你最好希望它是定义良好的。

注1:这里int和unsigned int是32位,否则类型可能需要调整。

注2:最后一个字节必须在移位前显式转换为unsigned,因为默认情况下它被提升为int,移位24位意味着操作符号位,这是未定义行为。

虽然没有使用固有函数有效,但肯定是可移植的。我的回答:

#include <cstdint>
#include <type_traits>

/**
 * Perform an endian swap of bytes against a templatized unsigned word.
 *
 * @tparam value_type The data type to perform the endian swap against.
 * @param value       The data value to swap.
 *
 * @return value_type The resulting swapped word.
 */
template <typename value_type>
constexpr inline auto endian_swap(value_type value) -> value_type
{
    using half_type = typename std::conditional<
        sizeof(value_type) == 8u,
        uint32_t,
        typename std::conditional<sizeof(value_type) == 4u, uint16_t, uint8_t>::
            type>::type;

    size_t const    half_bits  = sizeof(value_type) * 8u / 2u;
    half_type const upper_half = static_cast<half_type>(value >> half_bits);
    half_type const lower_half = static_cast<half_type>(value);

    if (sizeof(value_type) == 2u)
    {
        return (static_cast<value_type>(lower_half) << half_bits) | upper_half;
    }

    return ((static_cast<value_type>(endian_swap(lower_half)) << half_bits) |
            endian_swap(upper_half));
}

实现优化器友好的未对齐非就地末端访问器的可移植技术。它们处理每个编译器、每个边界对齐和每个字节排序。这些未对齐的例程被补充或讨论,取决于本机的端序和对齐方式。部分列出,但你懂的。BO*是基于本机字节排序的常数值。

uint32_t sw_get_uint32_1234(pu32)
uint32_1234 *pu32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32_1234[0] = (*pu32)[BO32_0];
  bou32.u32_1234[1] = (*pu32)[BO32_1];
  bou32.u32_1234[2] = (*pu32)[BO32_2];
  bou32.u32_1234[3] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_1234(pu32, u32)
uint32_1234 *pu32;
uint32_t u32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_1234[0];
  (*pu32)[BO32_1] = bou32.u32_1234[1];
  (*pu32)[BO32_2] = bou32.u32_1234[2];
  (*pu32)[BO32_3] = bou32.u32_1234[3];
}

#if HAS_SW_INT64
int64 sw_get_int64_12345678(pi64)
int64_12345678 *pi64;
{
  union {
    int64_12345678 i64_12345678;
    int64 i64;
  } boi64;
  boi64.i64_12345678[0] = (*pi64)[BO64_0];
  boi64.i64_12345678[1] = (*pi64)[BO64_1];
  boi64.i64_12345678[2] = (*pi64)[BO64_2];
  boi64.i64_12345678[3] = (*pi64)[BO64_3];
  boi64.i64_12345678[4] = (*pi64)[BO64_4];
  boi64.i64_12345678[5] = (*pi64)[BO64_5];
  boi64.i64_12345678[6] = (*pi64)[BO64_6];
  boi64.i64_12345678[7] = (*pi64)[BO64_7];
  return(boi64.i64);
}
#endif

int32_t sw_get_int32_3412(pi32)
int32_3412 *pi32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32_3412[2] = (*pi32)[BO32_0];
  boi32.i32_3412[3] = (*pi32)[BO32_1];
  boi32.i32_3412[0] = (*pi32)[BO32_2];
  boi32.i32_3412[1] = (*pi32)[BO32_3];
  return(boi32.i32);
}

void sw_set_int32_3412(pi32, i32)
int32_3412 *pi32;
int32_t i32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32 = i32;
  (*pi32)[BO32_0] = boi32.i32_3412[2];
  (*pi32)[BO32_1] = boi32.i32_3412[3];
  (*pi32)[BO32_2] = boi32.i32_3412[0];
  (*pi32)[BO32_3] = boi32.i32_3412[1];
}

uint32_t sw_get_uint32_3412(pu32)
uint32_3412 *pu32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32_3412[2] = (*pu32)[BO32_0];
  bou32.u32_3412[3] = (*pu32)[BO32_1];
  bou32.u32_3412[0] = (*pu32)[BO32_2];
  bou32.u32_3412[1] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_3412(pu32, u32)
uint32_3412 *pu32;
uint32_t u32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_3412[2];
  (*pu32)[BO32_1] = bou32.u32_3412[3];
  (*pu32)[BO32_2] = bou32.u32_3412[0];
  (*pu32)[BO32_3] = bou32.u32_3412[1];
}

float sw_get_float_1234(pf)
float_1234 *pf;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f_1234[0] = (*pf)[BO32_0];
  bof.f_1234[1] = (*pf)[BO32_1];
  bof.f_1234[2] = (*pf)[BO32_2];
  bof.f_1234[3] = (*pf)[BO32_3];
  return(bof.f);
}

void sw_set_float_1234(pf, f)
float_1234 *pf;
float f;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f = (float)f;
  (*pf)[BO32_0] = bof.f_1234[0];
  (*pf)[BO32_1] = bof.f_1234[1];
  (*pf)[BO32_2] = bof.f_1234[2];
  (*pf)[BO32_3] = bof.f_1234[3];
}

double sw_get_double_12345678(pd)
double_12345678 *pd;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d_12345678[0] = (*pd)[BO64_0];
  bod.d_12345678[1] = (*pd)[BO64_1];
  bod.d_12345678[2] = (*pd)[BO64_2];
  bod.d_12345678[3] = (*pd)[BO64_3];
  bod.d_12345678[4] = (*pd)[BO64_4];
  bod.d_12345678[5] = (*pd)[BO64_5];
  bod.d_12345678[6] = (*pd)[BO64_6];
  bod.d_12345678[7] = (*pd)[BO64_7];
  return(bod.d);
}

void sw_set_double_12345678(pd, d)
double_12345678 *pd;
double d;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d = d;
  (*pd)[BO64_0] = bod.d_12345678[0];
  (*pd)[BO64_1] = bod.d_12345678[1];
  (*pd)[BO64_2] = bod.d_12345678[2];
  (*pd)[BO64_3] = bod.d_12345678[3];
  (*pd)[BO64_4] = bod.d_12345678[4];
  (*pd)[BO64_5] = bod.d_12345678[5];
  (*pd)[BO64_6] = bod.d_12345678[6];
  (*pd)[BO64_7] = bod.d_12345678[7];
}

如果不与访问器一起使用,这些类型def的好处是会引发编译器错误,从而减少被遗忘的访问器错误。

typedef char int8_1[1], uint8_1[1];

typedef char int16_12[2], uint16_12[2]; /* little endian */
typedef char int16_21[2], uint16_21[2]; /* big endian */

typedef char int24_321[3], uint24_321[3]; /* Alpha Micro, PDP-11 */

typedef char int32_1234[4], uint32_1234[4]; /* little endian */
typedef char int32_3412[4], uint32_3412[4]; /* Alpha Micro, PDP-11 */
typedef char int32_4321[4], uint32_4321[4]; /* big endian */

typedef char int64_12345678[8], uint64_12345678[8]; /* little endian */
typedef char int64_34128756[8], uint64_34128756[8]; /* Alpha Micro, PDP-11 */
typedef char int64_87654321[8], uint64_87654321[8]; /* big endian */

typedef char float_1234[4]; /* little endian */
typedef char float_3412[4]; /* Alpha Micro, PDP-11 */
typedef char float_4321[4]; /* big endian */

typedef char double_12345678[8]; /* little endian */
typedef char double_78563412[8]; /* Alpha Micro? */
typedef char double_87654321[8]; /* big endian */