Skip to content

Commit

Permalink
Avoid UB on clang
Browse files Browse the repository at this point in the history
  • Loading branch information
dr-m committed Apr 29, 2024
1 parent 9eec158 commit 51200ac
Showing 1 changed file with 20 additions and 7 deletions.
27 changes: 20 additions & 7 deletions mysys/crc32/crc32c_x86.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,21 @@ static inline __m128i load128(const char *buf)
#define xor128(a, b) _mm_xor_epi64(a, b)
#define and128(a, b) _mm_and_si128(a, b)

/** Shift right by 384 bits */
USE_VPCLMULQDQ
static inline __m512i shrl512_384(__m512i a)
{
#if defined __GNUC__ && __GNUC__ >= 11
/* While technically incorrect, this would seem to translate into a
vextracti32x4 instruction, which actually outputs a ZMM register
(anything above the XMM range is cleared). */
return _mm512_castsi128_si512(_mm512_extracti64x2_epi64(a, 3));
#else
/* On clang, this is needed in order to get a correct result. */
return _mm512_maskz_shuffle_i64x2(3, a, a, 3);
#endif
}

USE_VPCLMULQDQ ATTRIBUTE_NOINLINE
static unsigned crc32_avx512(unsigned crc, const char *buf, size_t size,
const crc32_tab &tab)
Expand Down Expand Up @@ -224,13 +239,11 @@ static unsigned crc32_avx512(unsigned crc, const char *buf, size_t size,
b896= _mm512_load_epi32(&tab.b896),
b384= _mm512_load_epi32(&tab.b384);

crc_out= _mm512_extracti64x2_epi64(m4, 3);
__m512i m1;
m1= xor3_512(_mm512_clmulepi64_epi128(m0, b896, 1),
_mm512_clmulepi64_epi128(m0, b896, 0x10),
_mm512_clmulepi64_epi128(m4, b384, 1));
__m512i m1= xor3_512(_mm512_clmulepi64_epi128(m0, b896, 1),
_mm512_clmulepi64_epi128(m0, b896, 0x10),
_mm512_clmulepi64_epi128(m4, b384, 1));
m1= xor3_512(m1, _mm512_clmulepi64_epi128(m4, b384, 0x10),
_mm512_castsi128_si512(crc_out));
shrl512_384(m4));

__m256i m8=
_mm512_castsi512_si256(_mm512_shuffle_i64x2(m1, m1, 0b01001110));
Expand Down Expand Up @@ -273,7 +286,7 @@ static unsigned crc32_avx512(unsigned crc, const char *buf, size_t size,
__m512i crc512=
xor3_512(_mm512_clmulepi64_epi128(m0, b384, 0x1),
_mm512_clmulepi64_epi128(m0, b384, 0x10),
_mm512_castsi128_si512(_mm512_extracti64x2_epi64(m0, 3)));
shrl512_384(m0));
crc512= xor512(crc512, _mm512_shuffle_i64x2(crc512, crc512, 0b01001110));
const __m256i crc256= _mm512_castsi512_si256(crc512);
crc_out= xor128(_mm256_extracti64x2_epi64(crc256, 1),
Expand Down

0 comments on commit 51200ac

Please sign in to comment.