如何在c++中转换大端值和小端值?

为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。


注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。


当前回答

我有这个代码,允许我从HOST_ENDIAN_ORDER(无论它是什么)转换为LITTLE_ENDIAN_ORDER或BIG_ENDIAN_ORDER。我使用一个模板,所以如果我试图从HOST_ENDIAN_ORDER转换为LITTLE_ENDIAN_ORDER,他们恰好是相同的机器为我编译,不会生成任何代码。

下面是带有注释的代码:

// We define some constant for little, big and host endianess. Here I use 
// BOOST_LITTLE_ENDIAN/BOOST_BIG_ENDIAN to check the host indianess. If you
// don't want to use boost you will have to modify this part a bit.
enum EEndian
{
  LITTLE_ENDIAN_ORDER,
  BIG_ENDIAN_ORDER,
#if defined(BOOST_LITTLE_ENDIAN)
  HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
#elif defined(BOOST_BIG_ENDIAN)
  HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
#else
#error "Impossible de determiner l'indianness du systeme cible."
#endif
};

// this function swap the bytes of values given it's size as a template
// parameter (could sizeof be used?).
template <class T, unsigned int size>
inline T SwapBytes(T value)
{
  union
  {
     T value;
     char bytes[size];
  } in, out;

  in.value = value;

  for (unsigned int i = 0; i < size / 2; ++i)
  {
     out.bytes[i] = in.bytes[size - 1 - i];
     out.bytes[size - 1 - i] = in.bytes[i];
  }

  return out.value;
}

// Here is the function you will use. Again there is two compile-time assertion
// that use the boost librarie. You could probably comment them out, but if you
// do be cautious not to use this function for anything else than integers
// types. This function need to be calles like this :
//
//     int x = someValue;
//     int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
//
template<EEndian from, EEndian to, class T>
inline T EndianSwapBytes(T value)
{
  // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
  BOOST_STATIC_ASSERT(sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);

  // A : La donnée à swapper est d'un type arithmetic
  BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);

  // Si from et to sont du même type on ne swap pas.
  if (from == to)
     return value;

  return SwapBytes<T, sizeof(T)>(value);
}

其他回答

我真的很惊讶没有人提到htobeXX和betohXX函数。它们定义在end .h中,非常类似于网络函数htonXX。

我喜欢这个,只是为了风格:-)

long swap(long i) {
    char *c = (char *) &i;
    return * (long *) (char[]) {c[3], c[2], c[1], c[0] };
}

请注意,至少对于Windows, htonl()比它们的内在对应_byteswap_ulong()慢得多。前者是对ws2_32.dll的一个DLL库调用,后者是一条BSWAP汇编指令。因此,如果你正在编写一些依赖于平台的代码,为了提高速度,最好使用intrinsic:

#define htonl(x) _byteswap_ulong(x)

这对于。png图像处理尤其重要,其中所有整数都保存在大端格式中,并说明“One can use htonl()…”{用来降低典型Windows程序的速度,如果你没有准备好}。

实现优化器友好的未对齐非就地末端访问器的可移植技术。它们处理每个编译器、每个边界对齐和每个字节排序。这些未对齐的例程被补充或讨论,取决于本机的端序和对齐方式。部分列出,但你懂的。BO*是基于本机字节排序的常数值。

uint32_t sw_get_uint32_1234(pu32)
uint32_1234 *pu32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32_1234[0] = (*pu32)[BO32_0];
  bou32.u32_1234[1] = (*pu32)[BO32_1];
  bou32.u32_1234[2] = (*pu32)[BO32_2];
  bou32.u32_1234[3] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_1234(pu32, u32)
uint32_1234 *pu32;
uint32_t u32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_1234[0];
  (*pu32)[BO32_1] = bou32.u32_1234[1];
  (*pu32)[BO32_2] = bou32.u32_1234[2];
  (*pu32)[BO32_3] = bou32.u32_1234[3];
}

#if HAS_SW_INT64
int64 sw_get_int64_12345678(pi64)
int64_12345678 *pi64;
{
  union {
    int64_12345678 i64_12345678;
    int64 i64;
  } boi64;
  boi64.i64_12345678[0] = (*pi64)[BO64_0];
  boi64.i64_12345678[1] = (*pi64)[BO64_1];
  boi64.i64_12345678[2] = (*pi64)[BO64_2];
  boi64.i64_12345678[3] = (*pi64)[BO64_3];
  boi64.i64_12345678[4] = (*pi64)[BO64_4];
  boi64.i64_12345678[5] = (*pi64)[BO64_5];
  boi64.i64_12345678[6] = (*pi64)[BO64_6];
  boi64.i64_12345678[7] = (*pi64)[BO64_7];
  return(boi64.i64);
}
#endif

int32_t sw_get_int32_3412(pi32)
int32_3412 *pi32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32_3412[2] = (*pi32)[BO32_0];
  boi32.i32_3412[3] = (*pi32)[BO32_1];
  boi32.i32_3412[0] = (*pi32)[BO32_2];
  boi32.i32_3412[1] = (*pi32)[BO32_3];
  return(boi32.i32);
}

void sw_set_int32_3412(pi32, i32)
int32_3412 *pi32;
int32_t i32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32 = i32;
  (*pi32)[BO32_0] = boi32.i32_3412[2];
  (*pi32)[BO32_1] = boi32.i32_3412[3];
  (*pi32)[BO32_2] = boi32.i32_3412[0];
  (*pi32)[BO32_3] = boi32.i32_3412[1];
}

uint32_t sw_get_uint32_3412(pu32)
uint32_3412 *pu32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32_3412[2] = (*pu32)[BO32_0];
  bou32.u32_3412[3] = (*pu32)[BO32_1];
  bou32.u32_3412[0] = (*pu32)[BO32_2];
  bou32.u32_3412[1] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_3412(pu32, u32)
uint32_3412 *pu32;
uint32_t u32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_3412[2];
  (*pu32)[BO32_1] = bou32.u32_3412[3];
  (*pu32)[BO32_2] = bou32.u32_3412[0];
  (*pu32)[BO32_3] = bou32.u32_3412[1];
}

float sw_get_float_1234(pf)
float_1234 *pf;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f_1234[0] = (*pf)[BO32_0];
  bof.f_1234[1] = (*pf)[BO32_1];
  bof.f_1234[2] = (*pf)[BO32_2];
  bof.f_1234[3] = (*pf)[BO32_3];
  return(bof.f);
}

void sw_set_float_1234(pf, f)
float_1234 *pf;
float f;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f = (float)f;
  (*pf)[BO32_0] = bof.f_1234[0];
  (*pf)[BO32_1] = bof.f_1234[1];
  (*pf)[BO32_2] = bof.f_1234[2];
  (*pf)[BO32_3] = bof.f_1234[3];
}

double sw_get_double_12345678(pd)
double_12345678 *pd;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d_12345678[0] = (*pd)[BO64_0];
  bod.d_12345678[1] = (*pd)[BO64_1];
  bod.d_12345678[2] = (*pd)[BO64_2];
  bod.d_12345678[3] = (*pd)[BO64_3];
  bod.d_12345678[4] = (*pd)[BO64_4];
  bod.d_12345678[5] = (*pd)[BO64_5];
  bod.d_12345678[6] = (*pd)[BO64_6];
  bod.d_12345678[7] = (*pd)[BO64_7];
  return(bod.d);
}

void sw_set_double_12345678(pd, d)
double_12345678 *pd;
double d;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d = d;
  (*pd)[BO64_0] = bod.d_12345678[0];
  (*pd)[BO64_1] = bod.d_12345678[1];
  (*pd)[BO64_2] = bod.d_12345678[2];
  (*pd)[BO64_3] = bod.d_12345678[3];
  (*pd)[BO64_4] = bod.d_12345678[4];
  (*pd)[BO64_5] = bod.d_12345678[5];
  (*pd)[BO64_6] = bod.d_12345678[6];
  (*pd)[BO64_7] = bod.d_12345678[7];
}

如果不与访问器一起使用,这些类型def的好处是会引发编译器错误,从而减少被遗忘的访问器错误。

typedef char int8_1[1], uint8_1[1];

typedef char int16_12[2], uint16_12[2]; /* little endian */
typedef char int16_21[2], uint16_21[2]; /* big endian */

typedef char int24_321[3], uint24_321[3]; /* Alpha Micro, PDP-11 */

typedef char int32_1234[4], uint32_1234[4]; /* little endian */
typedef char int32_3412[4], uint32_3412[4]; /* Alpha Micro, PDP-11 */
typedef char int32_4321[4], uint32_4321[4]; /* big endian */

typedef char int64_12345678[8], uint64_12345678[8]; /* little endian */
typedef char int64_34128756[8], uint64_34128756[8]; /* Alpha Micro, PDP-11 */
typedef char int64_87654321[8], uint64_87654321[8]; /* big endian */

typedef char float_1234[4]; /* little endian */
typedef char float_3412[4]; /* Alpha Micro, PDP-11 */
typedef char float_4321[4]; /* big endian */

typedef char double_12345678[8]; /* little endian */
typedef char double_78563412[8]; /* Alpha Micro? */
typedef char double_87654321[8]; /* big endian */

认真……我不明白为什么所有的解决方案都那么复杂!最简单、最通用的模板函数如何?它可以在任何操作系统的任何情况下交换任何大小的任何类型????

template <typename T>
void SwapEnd(T& var)
{
    static_assert(std::is_pod<T>::value, "Type must be POD type for safety");
    std::array<char, sizeof(T)> varArray;
    std::memcpy(varArray.data(), &var, sizeof(T));
    for(int i = 0; i < static_cast<int>(sizeof(var)/2); i++)
        std::swap(varArray[sizeof(var) - 1 - i],varArray[i]);
    std::memcpy(&var, varArray.data(), sizeof(T));
}

这是C和c++结合的神奇力量!只需逐个字符交换原始变量。

要点1:没有操作符:请记住,我没有使用简单的赋值操作符“=”,因为当反转字节序时,一些对象将被打乱,复制构造函数(或赋值操作符)将不起作用。因此,一个字符一个字符地复制它们更加可靠。

Point 2: Be aware of alignment issues: Notice that we're copying to and from an array, which is the right thing to do because the C++ compiler doesn't guarantee that we can access unaligned memory (this answer was updated from its original form for this). For example, if you allocate uint64_t, your compiler cannot guarantee that you can access the 3rd byte of that as a uint8_t. Therefore, the right thing to do is to copy this to a char array, swap it, then copy it back (so no reinterpret_cast). Notice that compilers are mostly smart enough to convert what you did back to a reinterpret_cast if they're capable of accessing individual bytes regardless of alignment.

使用此函数:

double x = 5;
SwapEnd(x);

现在x的字节序不同了。