如何在c++中转换大端值和小端值?
为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。
注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。
如何在c++中转换大端值和小端值?
为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。
注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。
当前回答
c++20无分支版本,现在std::endian已经存在,但在c++23之前增加了std::byteswap
#include <bit>
#include <type_traits>
#include <concepts>
#include <array>
#include <cstring>
#include <iostream>
#include <bitset>
template <int LEN, int OFF=LEN/2>
class do_swap
{
// FOR 8 bytes:
// LEN=8 (LEN/2==4) <H><G><F><E><D><C><B><A>
// OFF=4: FROM=0, TO=7 => [A]<G><F><E><D><C><B>[H]
// OFF=3: FROM=1, TO=6 => [A][B]<F><E><D><C>[G][H]
// OFF=2: FROM=2, TO=5 => [A][B][C]<E><D>[F][G][H]
// OFF=1: FROM=3, TO=4 => [A][B][C][D][E][F][G][H]
// OFF=0: FROM=4, TO=3 => DONE
public:
enum consts {FROM=LEN/2-OFF, TO=(LEN-1)-FROM};
using NXT=do_swap<LEN, OFF-1>;
// flip the first and last for the current iteration's range
static void flip(std::array<std::byte, LEN>& b)
{
std::byte tmp=b[FROM];
b[FROM]=b[TO];
b[TO]=tmp;
NXT::flip(b);
}
};
template <int LEN>
class do_swap<LEN, 0> // STOP the template recursion
{
public:
static void flip(std::array<std::byte, LEN>&)
{
}
};
template<std::integral T, std::endian TO, std::endian FROM=std::endian::native>
requires ((TO==std::endian::big) || (TO==std::endian::little))
&& ((FROM==std::endian::big) || (FROM==std::endian::little))
class endian_swap
{
public:
enum consts {BYTE_COUNT=sizeof(T)};
static T cvt(const T integral)
{
// if FROM and TO are the same -- nothing to do
if (TO==FROM)
{
return integral;
}
// endian::big --> endian::little is the same as endian::little --> endian::big
// the bytes have to be reversed
// memcpy seems to be the most supported way to do byte swaps in a defined way
std::array<std::byte, BYTE_COUNT> bytes;
std::memcpy(&bytes, &integral, BYTE_COUNT);
do_swap<BYTE_COUNT>::flip(bytes);
T ret;
std::memcpy(&ret, &bytes, BYTE_COUNT);
return ret;
}
};
std::endian big()
{
return std::endian::big;
}
std::endian little()
{
return std::endian::little;
}
std::endian native()
{
return std::endian::native;
}
long long swap_to_big(long long x)
{
return endian_swap<long long, std::endian::big>::cvt(x);
}
long long swap_to_little(long long x)
{
return endian_swap<long long, std::endian::little>::cvt(x);
}
void show(std::string label, long long x)
{
std::cout << label << "\t: " << std::bitset<64>(x) << " (" << x << ")" << std::endl;
}
int main(int argv, char ** argc)
{
long long init=0xF8FCFEFF7F3F1F0;
long long to_big=swap_to_big(init);
long long to_little=swap_to_little(init);
show("Init", init);
show(">big", to_big);
show(">little", to_little);
}
其他回答
哇,我简直不敢相信我在这里读到的一些答案。实际上汇编中有一条指令比其他任何程序都快。bswap。你可以简单地写一个这样的函数…
__declspec(naked) uint32_t EndianSwap(uint32 value)
{
__asm
{
mov eax, dword ptr[esp + 4]
bswap eax
ret
}
}
它比之前提到的内在函数要快得多。我把它们拆开看了看。上面的函数没有序言/尾声,因此实际上没有任何开销。
unsigned long _byteswap_ulong(unsigned long value);
做16位同样容易,除了你会使用xchg al,啊。Bswap仅适用于32位寄存器。
64位有点棘手,但也不过分。比上面所有带有循环和模板的例子都要好得多。
这里有一些注意事项……首先,bswap只能在80x486以上的CPU上使用。有人打算在386上运行吗?!?如果是这样,你仍然可以用…替换bswap。
mov ebx, eax
shr ebx, 16
xchg al, ah
xchg bl, bh
shl eax, 16
or eax, ebx
内联汇编也只能在Visual Studio的x86代码中使用。裸函数不能内衬,而且在x64版本中也不可用。对于那个实例,你必须使用编译器的内在函数。
实现优化器友好的未对齐非就地末端访问器的可移植技术。它们处理每个编译器、每个边界对齐和每个字节排序。这些未对齐的例程被补充或讨论,取决于本机的端序和对齐方式。部分列出,但你懂的。BO*是基于本机字节排序的常数值。
uint32_t sw_get_uint32_1234(pu32)
uint32_1234 *pu32;
{
union {
uint32_1234 u32_1234;
uint32_t u32;
} bou32;
bou32.u32_1234[0] = (*pu32)[BO32_0];
bou32.u32_1234[1] = (*pu32)[BO32_1];
bou32.u32_1234[2] = (*pu32)[BO32_2];
bou32.u32_1234[3] = (*pu32)[BO32_3];
return(bou32.u32);
}
void sw_set_uint32_1234(pu32, u32)
uint32_1234 *pu32;
uint32_t u32;
{
union {
uint32_1234 u32_1234;
uint32_t u32;
} bou32;
bou32.u32 = u32;
(*pu32)[BO32_0] = bou32.u32_1234[0];
(*pu32)[BO32_1] = bou32.u32_1234[1];
(*pu32)[BO32_2] = bou32.u32_1234[2];
(*pu32)[BO32_3] = bou32.u32_1234[3];
}
#if HAS_SW_INT64
int64 sw_get_int64_12345678(pi64)
int64_12345678 *pi64;
{
union {
int64_12345678 i64_12345678;
int64 i64;
} boi64;
boi64.i64_12345678[0] = (*pi64)[BO64_0];
boi64.i64_12345678[1] = (*pi64)[BO64_1];
boi64.i64_12345678[2] = (*pi64)[BO64_2];
boi64.i64_12345678[3] = (*pi64)[BO64_3];
boi64.i64_12345678[4] = (*pi64)[BO64_4];
boi64.i64_12345678[5] = (*pi64)[BO64_5];
boi64.i64_12345678[6] = (*pi64)[BO64_6];
boi64.i64_12345678[7] = (*pi64)[BO64_7];
return(boi64.i64);
}
#endif
int32_t sw_get_int32_3412(pi32)
int32_3412 *pi32;
{
union {
int32_3412 i32_3412;
int32_t i32;
} boi32;
boi32.i32_3412[2] = (*pi32)[BO32_0];
boi32.i32_3412[3] = (*pi32)[BO32_1];
boi32.i32_3412[0] = (*pi32)[BO32_2];
boi32.i32_3412[1] = (*pi32)[BO32_3];
return(boi32.i32);
}
void sw_set_int32_3412(pi32, i32)
int32_3412 *pi32;
int32_t i32;
{
union {
int32_3412 i32_3412;
int32_t i32;
} boi32;
boi32.i32 = i32;
(*pi32)[BO32_0] = boi32.i32_3412[2];
(*pi32)[BO32_1] = boi32.i32_3412[3];
(*pi32)[BO32_2] = boi32.i32_3412[0];
(*pi32)[BO32_3] = boi32.i32_3412[1];
}
uint32_t sw_get_uint32_3412(pu32)
uint32_3412 *pu32;
{
union {
uint32_3412 u32_3412;
uint32_t u32;
} bou32;
bou32.u32_3412[2] = (*pu32)[BO32_0];
bou32.u32_3412[3] = (*pu32)[BO32_1];
bou32.u32_3412[0] = (*pu32)[BO32_2];
bou32.u32_3412[1] = (*pu32)[BO32_3];
return(bou32.u32);
}
void sw_set_uint32_3412(pu32, u32)
uint32_3412 *pu32;
uint32_t u32;
{
union {
uint32_3412 u32_3412;
uint32_t u32;
} bou32;
bou32.u32 = u32;
(*pu32)[BO32_0] = bou32.u32_3412[2];
(*pu32)[BO32_1] = bou32.u32_3412[3];
(*pu32)[BO32_2] = bou32.u32_3412[0];
(*pu32)[BO32_3] = bou32.u32_3412[1];
}
float sw_get_float_1234(pf)
float_1234 *pf;
{
union {
float_1234 f_1234;
float f;
} bof;
bof.f_1234[0] = (*pf)[BO32_0];
bof.f_1234[1] = (*pf)[BO32_1];
bof.f_1234[2] = (*pf)[BO32_2];
bof.f_1234[3] = (*pf)[BO32_3];
return(bof.f);
}
void sw_set_float_1234(pf, f)
float_1234 *pf;
float f;
{
union {
float_1234 f_1234;
float f;
} bof;
bof.f = (float)f;
(*pf)[BO32_0] = bof.f_1234[0];
(*pf)[BO32_1] = bof.f_1234[1];
(*pf)[BO32_2] = bof.f_1234[2];
(*pf)[BO32_3] = bof.f_1234[3];
}
double sw_get_double_12345678(pd)
double_12345678 *pd;
{
union {
double_12345678 d_12345678;
double d;
} bod;
bod.d_12345678[0] = (*pd)[BO64_0];
bod.d_12345678[1] = (*pd)[BO64_1];
bod.d_12345678[2] = (*pd)[BO64_2];
bod.d_12345678[3] = (*pd)[BO64_3];
bod.d_12345678[4] = (*pd)[BO64_4];
bod.d_12345678[5] = (*pd)[BO64_5];
bod.d_12345678[6] = (*pd)[BO64_6];
bod.d_12345678[7] = (*pd)[BO64_7];
return(bod.d);
}
void sw_set_double_12345678(pd, d)
double_12345678 *pd;
double d;
{
union {
double_12345678 d_12345678;
double d;
} bod;
bod.d = d;
(*pd)[BO64_0] = bod.d_12345678[0];
(*pd)[BO64_1] = bod.d_12345678[1];
(*pd)[BO64_2] = bod.d_12345678[2];
(*pd)[BO64_3] = bod.d_12345678[3];
(*pd)[BO64_4] = bod.d_12345678[4];
(*pd)[BO64_5] = bod.d_12345678[5];
(*pd)[BO64_6] = bod.d_12345678[6];
(*pd)[BO64_7] = bod.d_12345678[7];
}
如果不与访问器一起使用,这些类型def的好处是会引发编译器错误,从而减少被遗忘的访问器错误。
typedef char int8_1[1], uint8_1[1];
typedef char int16_12[2], uint16_12[2]; /* little endian */
typedef char int16_21[2], uint16_21[2]; /* big endian */
typedef char int24_321[3], uint24_321[3]; /* Alpha Micro, PDP-11 */
typedef char int32_1234[4], uint32_1234[4]; /* little endian */
typedef char int32_3412[4], uint32_3412[4]; /* Alpha Micro, PDP-11 */
typedef char int32_4321[4], uint32_4321[4]; /* big endian */
typedef char int64_12345678[8], uint64_12345678[8]; /* little endian */
typedef char int64_34128756[8], uint64_34128756[8]; /* Alpha Micro, PDP-11 */
typedef char int64_87654321[8], uint64_87654321[8]; /* big endian */
typedef char float_1234[4]; /* little endian */
typedef char float_3412[4]; /* Alpha Micro, PDP-11 */
typedef char float_4321[4]; /* big endian */
typedef char double_12345678[8]; /* little endian */
typedef char double_78563412[8]; /* Alpha Micro? */
typedef char double_87654321[8]; /* big endian */
似乎安全的方法是在每个单词上使用“顿音”。所以,如果你有。
std::vector<uint16_t> storage(n); // where n is the number to be converted
// the following would do the trick
std::transform(word_storage.cbegin(), word_storage.cend()
, word_storage.begin(), [](const uint16_t input)->uint16_t {
return htons(input); });
如果您是在一个大端系统上,那么上面的代码将是一个无操作,因此我将查找您的平台使用的任何编译时条件,以确定htons是否是一个无操作。毕竟是O(n)在Mac上,它会是这样的……
#if (__DARWIN_BYTE_ORDER != __DARWIN_BIG_ENDIAN)
std::transform(word_storage.cbegin(), word_storage.cend()
, word_storage.begin(), [](const uint16_t input)->uint16_t {
return htons(input); });
#endif
在模板函数中围绕枢轴使用老式的3-step-xor技巧进行字节交换,提供了一个灵活、快速的O(ln2)解决方案,不需要库,这里的风格也拒绝1字节类型:
template<typename T>void swap(T &t){
for(uint8_t pivot = 0; pivot < sizeof(t)/2; pivot ++){
*((uint8_t *)&t + pivot) ^= *((uint8_t *)&t+sizeof(t)-1- pivot);
*((uint8_t *)&t+sizeof(t)-1- pivot) ^= *((uint8_t *)&t + pivot);
*((uint8_t *)&t + pivot) ^= *((uint8_t *)&t+sizeof(t)-1- pivot);
}
}
虽然没有使用固有函数有效,但肯定是可移植的。我的回答:
#include <cstdint>
#include <type_traits>
/**
* Perform an endian swap of bytes against a templatized unsigned word.
*
* @tparam value_type The data type to perform the endian swap against.
* @param value The data value to swap.
*
* @return value_type The resulting swapped word.
*/
template <typename value_type>
constexpr inline auto endian_swap(value_type value) -> value_type
{
using half_type = typename std::conditional<
sizeof(value_type) == 8u,
uint32_t,
typename std::conditional<sizeof(value_type) == 4u, uint16_t, uint8_t>::
type>::type;
size_t const half_bits = sizeof(value_type) * 8u / 2u;
half_type const upper_half = static_cast<half_type>(value >> half_bits);
half_type const lower_half = static_cast<half_type>(value);
if (sizeof(value_type) == 2u)
{
return (static_cast<value_type>(lower_half) << half_bits) | upper_half;
}
return ((static_cast<value_type>(endian_swap(lower_half)) << half_bits) |
endian_swap(upper_half));
}