Skip to content

Commit

Permalink
Add sqrt_llvm_fast intrinsic (see JuliaLang#33220)
Browse files Browse the repository at this point in the history
Note: requires LLVM 7+ to generatic rsqrt intrinsics
  • Loading branch information
smallnamespace committed Sep 12, 2019
1 parent f54cdf4 commit 640bb57
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 2 deletions.
1 change: 1 addition & 0 deletions base/compiler/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ function is_pure_intrinsic_infer(f::IntrinsicFunction)
f === Intrinsics.llvmcall || # this one is never effect-free
f === Intrinsics.arraylen || # this one is volatile
f === Intrinsics.sqrt_llvm || # this one may differ at runtime (by a few ulps)
f === Intrinsics.sqrt_llvm_fast || # this one may differ at runtime (by a few ulps)
f === Intrinsics.cglobal) # cglobal lookup answer changes at runtime
end

Expand Down
1 change: 1 addition & 0 deletions base/compiler/tfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ add_tfunc(floor_llvm, 1, 1, math_tfunc, 10)
add_tfunc(trunc_llvm, 1, 1, math_tfunc, 10)
add_tfunc(rint_llvm, 1, 1, math_tfunc, 10)
add_tfunc(sqrt_llvm, 1, 1, math_tfunc, 20)
add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc, 20)
## same-type comparisons ##
cmp_tfunc(@nospecialize(x), @nospecialize(y)) = Bool
add_tfunc(eq_int, 2, 2, cmp_tfunc, 1)
Expand Down
4 changes: 2 additions & 2 deletions base/fastmath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module FastMath

export @fastmath

import Core.Intrinsics: sqrt_llvm, neg_float_fast,
import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast

Expand Down Expand Up @@ -277,7 +277,7 @@ pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Fl
pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
@inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)

sqrt_fast(x::FloatTypes) = sqrt_llvm(x)
sqrt_fast(x::FloatTypes) = sqrt_llvm_fast(x)

# libm

Expand Down
5 changes: 5 additions & 0 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
float_func[trunc_llvm] = true;
float_func[rint_llvm] = true;
float_func[sqrt_llvm] = true;
float_func[sqrt_llvm_fast] = true;
}

extern "C"
Expand Down Expand Up @@ -1264,6 +1265,10 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
Value *sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
return ctx.builder.CreateCall(sqrtintr, x);
}
case sqrt_llvm_fast: {
Value *sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
return math_builder(ctx, true)().CreateCall(sqrtintr, x);
}

default:
assert(0 && "invalid intrinsic");
Expand Down
1 change: 1 addition & 0 deletions src/intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
ADD_I(trunc_llvm, 1) \
ADD_I(rint_llvm, 1) \
ADD_I(sqrt_llvm, 1) \
ADD_I(sqrt_llvm_fast, 1) \
/* pointer access */ \
ADD_I(pointerref, 3) \
ADD_I(pointerset, 4) \
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,7 @@ JL_DLLEXPORT jl_value_t *jl_floor_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_trunc_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_rint_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_sqrt_llvm(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_sqrt_llvm_fast(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_abs_float(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
Expand Down
1 change: 1 addition & 0 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,7 @@ un_fintrinsic(floor_float,floor_llvm)
un_fintrinsic(trunc_float,trunc_llvm)
un_fintrinsic(rint_float,rint_llvm)
un_fintrinsic(sqrt_float,sqrt_llvm)
un_fintrinsic(sqrt_float,sqrt_llvm_fast)

JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
{
Expand Down

0 comments on commit 640bb57

Please sign in to comment.