如何在c++中转换大端值和小端值?

为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。


注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。


当前回答

实现优化器友好的未对齐非就地末端访问器的可移植技术。它们处理每个编译器、每个边界对齐和每个字节排序。这些未对齐的例程被补充或讨论,取决于本机的端序和对齐方式。部分列出,但你懂的。BO*是基于本机字节排序的常数值。

uint32_t sw_get_uint32_1234(pu32)
uint32_1234 *pu32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32_1234[0] = (*pu32)[BO32_0];
  bou32.u32_1234[1] = (*pu32)[BO32_1];
  bou32.u32_1234[2] = (*pu32)[BO32_2];
  bou32.u32_1234[3] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_1234(pu32, u32)
uint32_1234 *pu32;
uint32_t u32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_1234[0];
  (*pu32)[BO32_1] = bou32.u32_1234[1];
  (*pu32)[BO32_2] = bou32.u32_1234[2];
  (*pu32)[BO32_3] = bou32.u32_1234[3];
}

#if HAS_SW_INT64
int64 sw_get_int64_12345678(pi64)
int64_12345678 *pi64;
{
  union {
    int64_12345678 i64_12345678;
    int64 i64;
  } boi64;
  boi64.i64_12345678[0] = (*pi64)[BO64_0];
  boi64.i64_12345678[1] = (*pi64)[BO64_1];
  boi64.i64_12345678[2] = (*pi64)[BO64_2];
  boi64.i64_12345678[3] = (*pi64)[BO64_3];
  boi64.i64_12345678[4] = (*pi64)[BO64_4];
  boi64.i64_12345678[5] = (*pi64)[BO64_5];
  boi64.i64_12345678[6] = (*pi64)[BO64_6];
  boi64.i64_12345678[7] = (*pi64)[BO64_7];
  return(boi64.i64);
}
#endif

int32_t sw_get_int32_3412(pi32)
int32_3412 *pi32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32_3412[2] = (*pi32)[BO32_0];
  boi32.i32_3412[3] = (*pi32)[BO32_1];
  boi32.i32_3412[0] = (*pi32)[BO32_2];
  boi32.i32_3412[1] = (*pi32)[BO32_3];
  return(boi32.i32);
}

void sw_set_int32_3412(pi32, i32)
int32_3412 *pi32;
int32_t i32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32 = i32;
  (*pi32)[BO32_0] = boi32.i32_3412[2];
  (*pi32)[BO32_1] = boi32.i32_3412[3];
  (*pi32)[BO32_2] = boi32.i32_3412[0];
  (*pi32)[BO32_3] = boi32.i32_3412[1];
}

uint32_t sw_get_uint32_3412(pu32)
uint32_3412 *pu32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32_3412[2] = (*pu32)[BO32_0];
  bou32.u32_3412[3] = (*pu32)[BO32_1];
  bou32.u32_3412[0] = (*pu32)[BO32_2];
  bou32.u32_3412[1] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_3412(pu32, u32)
uint32_3412 *pu32;
uint32_t u32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_3412[2];
  (*pu32)[BO32_1] = bou32.u32_3412[3];
  (*pu32)[BO32_2] = bou32.u32_3412[0];
  (*pu32)[BO32_3] = bou32.u32_3412[1];
}

float sw_get_float_1234(pf)
float_1234 *pf;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f_1234[0] = (*pf)[BO32_0];
  bof.f_1234[1] = (*pf)[BO32_1];
  bof.f_1234[2] = (*pf)[BO32_2];
  bof.f_1234[3] = (*pf)[BO32_3];
  return(bof.f);
}

void sw_set_float_1234(pf, f)
float_1234 *pf;
float f;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f = (float)f;
  (*pf)[BO32_0] = bof.f_1234[0];
  (*pf)[BO32_1] = bof.f_1234[1];
  (*pf)[BO32_2] = bof.f_1234[2];
  (*pf)[BO32_3] = bof.f_1234[3];
}

double sw_get_double_12345678(pd)
double_12345678 *pd;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d_12345678[0] = (*pd)[BO64_0];
  bod.d_12345678[1] = (*pd)[BO64_1];
  bod.d_12345678[2] = (*pd)[BO64_2];
  bod.d_12345678[3] = (*pd)[BO64_3];
  bod.d_12345678[4] = (*pd)[BO64_4];
  bod.d_12345678[5] = (*pd)[BO64_5];
  bod.d_12345678[6] = (*pd)[BO64_6];
  bod.d_12345678[7] = (*pd)[BO64_7];
  return(bod.d);
}

void sw_set_double_12345678(pd, d)
double_12345678 *pd;
double d;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d = d;
  (*pd)[BO64_0] = bod.d_12345678[0];
  (*pd)[BO64_1] = bod.d_12345678[1];
  (*pd)[BO64_2] = bod.d_12345678[2];
  (*pd)[BO64_3] = bod.d_12345678[3];
  (*pd)[BO64_4] = bod.d_12345678[4];
  (*pd)[BO64_5] = bod.d_12345678[5];
  (*pd)[BO64_6] = bod.d_12345678[6];
  (*pd)[BO64_7] = bod.d_12345678[7];
}

如果不与访问器一起使用,这些类型def的好处是会引发编译器错误,从而减少被遗忘的访问器错误。

typedef char int8_1[1], uint8_1[1];

typedef char int16_12[2], uint16_12[2]; /* little endian */
typedef char int16_21[2], uint16_21[2]; /* big endian */

typedef char int24_321[3], uint24_321[3]; /* Alpha Micro, PDP-11 */

typedef char int32_1234[4], uint32_1234[4]; /* little endian */
typedef char int32_3412[4], uint32_3412[4]; /* Alpha Micro, PDP-11 */
typedef char int32_4321[4], uint32_4321[4]; /* big endian */

typedef char int64_12345678[8], uint64_12345678[8]; /* little endian */
typedef char int64_34128756[8], uint64_34128756[8]; /* Alpha Micro, PDP-11 */
typedef char int64_87654321[8], uint64_87654321[8]; /* big endian */

typedef char float_1234[4]; /* little endian */
typedef char float_3412[4]; /* Alpha Micro, PDP-11 */
typedef char float_4321[4]; /* big endian */

typedef char double_12345678[8]; /* little endian */
typedef char double_78563412[8]; /* Alpha Micro? */
typedef char double_87654321[8]; /* big endian */

其他回答

如果您这样做是为了在不同平台之间传输数据,请查看ntoh和hton函数。

请注意,至少对于Windows, htonl()比它们的内在对应_byteswap_ulong()慢得多。前者是对ws2_32.dll的一个DLL库调用,后者是一条BSWAP汇编指令。因此,如果你正在编写一些依赖于平台的代码,为了提高速度,最好使用intrinsic:

#define htonl(x) _byteswap_ulong(x)

这对于。png图像处理尤其重要,其中所有整数都保存在大端格式中,并说明“One can use htonl()…”{用来降低典型Windows程序的速度,如果你没有准备好}。

c++20无分支版本,现在std::endian已经存在,但在c++23之前增加了std::byteswap

#include <bit>
#include <type_traits>
#include <concepts>
#include <array>
#include <cstring>
#include <iostream>
#include <bitset>

template <int LEN, int OFF=LEN/2>
class do_swap
{
    // FOR 8 bytes:
    // LEN=8 (LEN/2==4)       <H><G><F><E><D><C><B><A>
    // OFF=4: FROM=0, TO=7 => [A]<G><F><E><D><C><B>[H]
    // OFF=3: FROM=1, TO=6 => [A][B]<F><E><D><C>[G][H]
    // OFF=2: FROM=2, TO=5 => [A][B][C]<E><D>[F][G][H]
    // OFF=1: FROM=3, TO=4 => [A][B][C][D][E][F][G][H]
    // OFF=0: FROM=4, TO=3 => DONE
public:
    enum consts {FROM=LEN/2-OFF, TO=(LEN-1)-FROM};
    using NXT=do_swap<LEN, OFF-1>;
// flip the first and last for the current iteration's range
    static void flip(std::array<std::byte, LEN>& b)
    {
        std::byte tmp=b[FROM];
        b[FROM]=b[TO];
        b[TO]=tmp;
        NXT::flip(b);
    }
};
template <int LEN>
class do_swap<LEN, 0> // STOP the template recursion
{
public:
    static void flip(std::array<std::byte, LEN>&)
    {
    }
};

template<std::integral T, std::endian TO, std::endian FROM=std::endian::native>
        requires ((TO==std::endian::big) || (TO==std::endian::little))
              && ((FROM==std::endian::big) || (FROM==std::endian::little))
class endian_swap
{
public:
    enum consts {BYTE_COUNT=sizeof(T)};
    static T cvt(const T integral)
    {
    // if FROM and TO are the same -- nothing to do
        if (TO==FROM)
        {
                return integral;
        }

    // endian::big --> endian::little is the same as endian::little --> endian::big
    // the bytes have to be reversed
    // memcpy seems to be the most supported way to do byte swaps in a defined way
        std::array<std::byte, BYTE_COUNT> bytes;
        std::memcpy(&bytes, &integral, BYTE_COUNT);
        do_swap<BYTE_COUNT>::flip(bytes);
        T ret;
        std::memcpy(&ret, &bytes, BYTE_COUNT);
        return ret;
    }
};

std::endian big()
{
    return std::endian::big;
}

std::endian little()
{
    return std::endian::little;
}

std::endian native()
{
    return std::endian::native;
}

long long swap_to_big(long long x)
{
    return endian_swap<long long, std::endian::big>::cvt(x);
}

long long swap_to_little(long long x)
{
    return endian_swap<long long, std::endian::little>::cvt(x);
}

void show(std::string label, long long x)
{
    std::cout << label << "\t: " << std::bitset<64>(x) << " (" << x << ")" << std::endl;
}

int main(int argv, char ** argc)
{
    long long init=0xF8FCFEFF7F3F1F0;
    long long to_big=swap_to_big(init);
    long long to_little=swap_to_little(init);
    show("Init", init);
    show(">big", to_big);
    show(">little", to_little);
}

这里有一个基本的函数来交换大小端序。它是基本的,但不需要补充库。

void endianness_swap(uint32_t& val) {
    uint8_t a, b, c;
    a = (val & 0xFF000000) >> 24;
    b = (val & 0x00FF0000) >> 16;
    c = (val & 0x0000FF00) >> 8;
    val=(val & 0x000000FF) << 24;
    val = val + (c << 16) + (b << 8) + (a);
}

我真的很惊讶没有人提到htobeXX和betohXX函数。它们定义在end .h中,非常类似于网络函数htonXX。