Skip to content

Commit

Permalink
avx512f: implement simde_mm512_cmp{,lt,eq}_ps_mask
Browse files Browse the repository at this point in the history
  • Loading branch information
himanshi18037 authored and nemequ committed Jul 12, 2020
1 parent 6dd36a8 commit a8bf991
Show file tree
Hide file tree
Showing 3 changed files with 866 additions and 1 deletion.
292 changes: 292 additions & 0 deletions simde/x86/avx512f.h
Original file line number Diff line number Diff line change
Expand Up @@ -4500,6 +4500,298 @@ simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512
#define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__mmask16
simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8)
SIMDE_REQUIRE_CONSTANT(imm8)
HEDLEY_REQUIRE_MSG(((imm8 >= 0) && (imm8 <= 31)), "imm8 must be one of the SIMDE_CMP_* macros (values: [0, 31])") {
#if defined(SIMDE_X86_AVX512F_NATIVE)
simde__mmask16 r;
SIMDE_CONSTIFY_32_(_mm512_cmp_ps_mask, r, (HEDLEY_UNREACHABLE(), 0), imm8, a, b);
return r;
#else
simde__m512_private
r_,
a_ = simde__m512_to_private(a),
b_ = simde__m512_to_private(b);

#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
switch (imm8) {
case SIMDE_CMP_EQ_OQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
break;
case SIMDE_CMP_LT_OS:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
break;
case SIMDE_CMP_LE_OS:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
break;
case SIMDE_CMP_UNORD_Q:
#if defined(simde_math_isnanf)
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
}
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_NEQ_UQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
break;
case SIMDE_CMP_NLT_US:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
break;
case SIMDE_CMP_NLE_US:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
break;
case SIMDE_CMP_ORD_Q:
#if defined(simde_math_isnanf)
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
}
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_EQ_UQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
break;
case SIMDE_CMP_NGE_US:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
break;
case SIMDE_CMP_NGT_US:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
break;
case SIMDE_CMP_FALSE_OQ:
r_ = simde__m512_to_private(simde_mm512_setzero_ps());
break;
case SIMDE_CMP_NEQ_OQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
break;
case SIMDE_CMP_GE_OS:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
break;
case SIMDE_CMP_GT_OS:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
break;
case SIMDE_CMP_TRUE_UQ:
r_ = simde__m512_to_private(simde_x_mm512_setone_ps());
break;
case SIMDE_CMP_EQ_OS:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
break;
case SIMDE_CMP_LT_OQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
break;
case SIMDE_CMP_LE_OQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
break;
case SIMDE_CMP_UNORD_S:
#if defined(simde_math_isnanf)
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
}
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_NEQ_US:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
break;
case SIMDE_CMP_NLT_UQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
break;
case SIMDE_CMP_NLE_UQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
break;
case SIMDE_CMP_ORD_S:
#if defined(simde_math_isnanf)
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
}
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_EQ_US:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32));
break;
case SIMDE_CMP_NGE_UQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32));
break;
case SIMDE_CMP_NGT_UQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32));
break;
case SIMDE_CMP_FALSE_OS:
r_ = simde__m512_to_private(simde_mm512_setzero_ps());
break;
case SIMDE_CMP_NEQ_OS:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32));
break;
case SIMDE_CMP_GE_OQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32));
break;
case SIMDE_CMP_GT_OQ:
r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32));
break;
case SIMDE_CMP_TRUE_US:
r_ = simde__m512_to_private(simde_x_mm512_setone_ps());
break;
default:
HEDLEY_UNREACHABLE();
break;
}
#else /* defined(SIMDE_VECTOR_SUBSCRIPT_OPS) */
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) {
switch (imm8) {
case SIMDE_CMP_EQ_OQ:
r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_LT_OS:
r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_LE_OS:
r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_UNORD_Q:
#if defined(simde_math_isnanf)
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_NEQ_UQ:
r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NLT_US:
r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NLE_US:
r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_ORD_Q:
#if defined(simde_math_isnanf)
r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_EQ_UQ:
r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NGE_US:
r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NGT_US:
r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_FALSE_OQ:
r_.u32[i] = UINT32_C(0);
break;
case SIMDE_CMP_NEQ_OQ:
r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_GE_OS:
r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_GT_OS:
r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_TRUE_UQ:
r_.u32[i] = ~UINT32_C(0);
break;
case SIMDE_CMP_EQ_OS:
r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_LT_OQ:
r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_LE_OQ:
r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_UNORD_S:
#if defined(simde_math_isnanf)
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0);
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_NEQ_US:
r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NLT_UQ:
r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NLE_UQ:
r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_ORD_S:
#if defined(simde_math_isnanf)
r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0);
#else
HEDLEY_UNREACHABLE();
#endif
break;
case SIMDE_CMP_EQ_US:
r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NGE_UQ:
r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_NGT_UQ:
r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_FALSE_OS:
r_.u32[i] = UINT32_C(0);
break;
case SIMDE_CMP_NEQ_OS:
r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_GE_OQ:
r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_GT_OQ:
r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0);
break;
case SIMDE_CMP_TRUE_US:
r_.u32[i] = ~UINT32_C(0);
break;
default:
HEDLEY_UNREACHABLE();
break;
}
}
#endif

return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_)));
#endif
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_cmp_ps_mask
#define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__mmask16
simde_mm512_cmplt_ps_mask (simde__m512 a, simde__m512 b) {
return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ);
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_cmplt_ps_mask
#define _mm512_cmplt_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__mmask16
simde_mm512_cmpeq_ps_mask (simde__m512 a, simde__m512 b) {
return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ);
}
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_cmpeq_ps_mask
#define _mm512_cmpeq_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_cvtepi8_epi32 (simde__m128i a) {
Expand Down
Loading

0 comments on commit a8bf991

Please sign in to comment.