Skip to content

Commit

Permalink
Use compiler intrinsics for byte swapping
Browse files Browse the repository at this point in the history
There is really no advantage to messing with inline assembly here,
but plenty of disadvantages. The compiler can't optimize this for
whatever domain the data happens to be in (integer vs SSE+ registers),
some of the code violates the C standard, and it generates warnings
on newer gcc platforms (on i686 at least). Just get rid of it, use
the compiler intrinsics, and fall back to generic C instructions for
unknown compilers.
  • Loading branch information
Keno committed Jan 20, 2017
1 parent db9c22b commit 0e6672d
Showing 1 changed file with 16 additions and 60 deletions.
76 changes: 16 additions & 60 deletions src/support/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,75 +29,31 @@ int cmp_lt(void *a, numerictype_t atag, void *b, numerictype_t btag);
int cmp_eq(void *a, numerictype_t atag, void *b, numerictype_t btag,
int equalnans);

#ifdef __x86_64__
# define LEGACY_REGS "=Q"
#else
# define LEGACY_REGS "=q"
#endif

#if (!defined(__INTEL_COMPILER) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
STATIC_INLINE uint16_t ByteSwap16(uint16_t x)
{
__asm("xchgb %b0,%h0" :
LEGACY_REGS (x) :
"0" (x));
return x;
}
#define bswap_16(x) ByteSwap16(x)

STATIC_INLINE uint32_t ByteSwap32(uint32_t x)
{
__asm("bswap %0":
"=r" (x) :
"0" (x));
return x;
}

#define bswap_32(x) ByteSwap32(x)

STATIC_INLINE uint64_t ByteSwap64(uint64_t x)
{
#ifdef __x86_64__
__asm("bswap %0":
"=r" (x) :
"0" (x));
return x;
#else
register union { __extension__ uint64_t __ll;
uint32_t __l[2]; } __x;
asm("xchgl %0,%1":
"=r"(__x.__l[0]),"=r"(__x.__l[1]):
"0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
return __x.__ll;
#endif
}
#define bswap_64(x) ByteSwap64(x)

#else

#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)

#if defined(__INTEL_COMPILER) && !defined(__clang__)
#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || __GNUC_MINOR__ >= 8))
#define bswap_16(x) __builtin_bswap16(x)
#define bswap_32(x) __builtin_bswap32(x)
#define bswap_64(x) __builtin_bswap64(x)
#elif defined(_MSC_VER)
#define bswap_16(x) _byteswap_ushort(x)
#define bswap_32(x) _byteswap_ulong(x)
#define bswap_64(x) _byteswap_uint64(x)
#elif defined(__INTEL_COMPILER)
#define bswap_16(x) _bswap16(x)
#define bswap_32(x) _bswap(x)
#define bswap_64(x) _bswap64(x)
#else
#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
#define bswap_32(x) \
((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
(((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
#endif

STATIC_INLINE uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
uint32_t l[2];
} w, r;
w.ll = x;
r.l[0] = bswap_32 (w.l[1]);
r.l[1] = bswap_32 (w.l[0]);
return r.ll;
uint32_t high = (uint32_t) (x >> 32);
uint32_t low = (uint32_t) x;
return ((uint64_t) bswap_32 (high)) |
(((uint64_t) bswap_32 (low)) << 32)
}
#define bswap_64(x) ByteSwap64(x)

#endif

#ifdef __cplusplus
Expand Down

0 comments on commit 0e6672d

Please sign in to comment.