Add sqrt_llvm_fast intrinsic (see JuliaLang#33220)

Note: requires LLVM 7+ to generatic rsqrt intrinsics
halleysfifthinc · Sep 12, 2019 · 640bb57 · 640bb57
1 parent f54cdf4
commit 640bb57
Show file tree

Hide file tree

Showing 7 changed files with 12 additions and 2 deletions.
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
@@ -262,6 +262,7 @@ function is_pure_intrinsic_infer(f::IntrinsicFunction)
  f === Intrinsics.llvmcall || # this one is never effect-free
  f === Intrinsics.arraylen || # this one is volatile
  f === Intrinsics.sqrt_llvm || # this one may differ at runtime (by a few ulps)
+ f === Intrinsics.sqrt_llvm_fast || # this one may differ at runtime (by a few ulps)
  f === Intrinsics.cglobal) # cglobal lookup answer changes at runtime
 end
 

diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
@@ -158,6 +158,7 @@ add_tfunc(floor_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(trunc_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(rint_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(sqrt_llvm, 1, 1, math_tfunc, 20)
+add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc, 20)
  ## same-type comparisons ##
 cmp_tfunc(@nospecialize(x), @nospecialize(y)) = Bool
 add_tfunc(eq_int, 2, 2, cmp_tfunc, 1)

diff --git a/base/fastmath.jl b/base/fastmath.jl
@@ -24,7 +24,7 @@ module FastMath
 
 export @fastmath
 
-import Core.Intrinsics: sqrt_llvm, neg_float_fast,
+import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
  add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
  eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
 
@@ -277,7 +277,7 @@ pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Fl
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 
-sqrt_fast(x::FloatTypes) = sqrt_llvm(x)
+sqrt_fast(x::FloatTypes) = sqrt_llvm_fast(x)
 
 # libm
 

diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
@@ -70,6 +70,7 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
  float_func[trunc_llvm] = true;
  float_func[rint_llvm] = true;
  float_func[sqrt_llvm] = true;
+ float_func[sqrt_llvm_fast] = true;
 }
 
 extern "C"
@@ -1264,6 +1265,10 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
  Value *sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
  return ctx.builder.CreateCall(sqrtintr, x);
  }
+ case sqrt_llvm_fast: {
+ Value *sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+ return math_builder(ctx, true)().CreateCall(sqrtintr, x);
+ }
 
  default:
  assert(0 && "invalid intrinsic");

diff --git a/src/intrinsics.h b/src/intrinsics.h
@@ -88,6 +88,7 @@
  ADD_I(trunc_llvm, 1) \
  ADD_I(rint_llvm, 1) \
  ADD_I(sqrt_llvm, 1) \
+ ADD_I(sqrt_llvm_fast, 1) \
  /* pointer access */ \
  ADD_I(pointerref, 3) \
  ADD_I(pointerset, 4) \

diff --git a/src/julia_internal.h b/src/julia_internal.h
@@ -833,6 +833,7 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
+JL_DLLEXPORT jl_value_t *jl_sqrt_llvm_fast(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);

diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
@@ -914,6 +914,7 @@ un_fintrinsic(floor_float,floor_llvm)
 un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
+un_fintrinsic(sqrt_float,sqrt_llvm_fast)
 
 JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
 {