Skip to content

Commit

Permalink
codegen: pass the pgcstack as an argument to specsig calls (JuliaLang…
Browse files Browse the repository at this point in the history
…#50093)

The safepoint at function entry made it so that every function call did
a relatively expensive load from the PTLS, we can instead pass the PTLS
as an argument to functions making it significantly cheaper. Also use
the swift calling conventions, that together with the `swiftself`
attribute makes it so it's very likely the argument is kept in a
register between calls.

Fixes: JuliaLang#50068
  • Loading branch information
gbaraldi committed Jun 16, 2023
1 parent 71c5115 commit 631d187
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 35 deletions.
3 changes: 3 additions & 0 deletions base/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,7 @@ struct CodegenParams
gnu_pubnames::Cint
debug_info_kind::Cint
safepoint_on_entry::Cint
gcstack_arg::Cint

lookup::Ptr{Cvoid}

Expand All @@ -1203,13 +1204,15 @@ struct CodegenParams
prefer_specsig::Bool=false,
gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
safepoint_on_entry::Bool=true,
gcstack_arg::Bool=true,
lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})),
generic_context = nothing)
return new(
Cint(track_allocations), Cint(code_coverage),
Cint(prefer_specsig),
Cint(gnu_pubnames), debug_info_kind,
Cint(safepoint_on_entry),
Cint(gcstack_arg),
lookup, generic_context)
end
end
Expand Down
62 changes: 49 additions & 13 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1296,6 +1296,7 @@ extern "C" {
#endif
(int) DICompileUnit::DebugEmissionKind::FullDebug,
1,
1,
jl_rettype_inferred_addr, NULL };
}

Expand Down Expand Up @@ -1719,7 +1720,7 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
}

static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure);
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg);
static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
jl_binding_t **pbnd, bool assign);
Expand Down Expand Up @@ -4107,7 +4108,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
{
++EmittedSpecfunCalls;
// emit specialized call site
jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure);
bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
FunctionType *cft = returninfo.decl.getFunctionType();
*cc = returninfo.cc;
*return_roots = returninfo.return_roots;
Expand Down Expand Up @@ -4141,7 +4143,10 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
argvals[idx] = return_roots;
idx++;
}

if (gcstack_arg) {
argvals[idx] = ctx.pgcstack;
idx++;
}
for (size_t i = 0; i < nargs; i++) {
jl_value_t *jt = jl_nth_slot_type(specTypes, i);
// n.b.: specTypes is required to be a datatype by construction for specsig
Expand Down Expand Up @@ -4205,6 +4210,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
}
CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals);
call->setAttributes(returninfo.attrs);
if (gcstack_arg)
call->setCallingConv(CallingConv::Swift);

jl_cgval_t retval;
switch (returninfo.cc) {
Expand Down Expand Up @@ -5273,7 +5280,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
if (specF) {
jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL,
closure_decls.specFunctionObject, sigtype, rettype, true);
closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
specF = cast<Function>(returninfo.decl.getCallee());
}
}
Expand Down Expand Up @@ -5786,13 +5793,15 @@ static void emit_cfunc_invalidate(
DebugLoc noDbg;
ctx.builder.SetCurrentDebugLocation(noDbg);
allocate_gc_frame(ctx, b0);

Function::arg_iterator AI = gf_thunk->arg_begin();
SmallVector<jl_cgval_t> myargs(nargs);
if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
++AI;
if (return_roots)
++AI;
if (JL_FEAT_TEST(ctx,gcstack_arg)){
++AI; // gcstack_arg
}
for (size_t i = 0; i < nargs; i++) {
jl_value_t *jt = jl_nth_slot_type(calltype, i);
// n.b. specTypes is required to be a datatype by construction for specsig
Expand Down Expand Up @@ -6258,8 +6267,9 @@ static Function* gen_cfun_wrapper(
bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
assert(calltype == 3);
// emit a specsig call
bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure);
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg);
FunctionType *cft = returninfo.decl.getFunctionType();
jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);

Expand All @@ -6286,6 +6296,8 @@ static Function* gen_cfun_wrapper(
AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
args.push_back(return_roots);
}
if (gcstack_arg)
args.push_back(ctx.pgcstack);
for (size_t i = 0; i < nargs + 1; i++) {
// figure out how to repack the arguments
jl_cgval_t &inputarg = inputargs[i];
Expand Down Expand Up @@ -6332,11 +6344,15 @@ static Function* gen_cfun_wrapper(
emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context);
theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
}

assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
CallInst *call = ctx.builder.CreateCall(
returninfo.decl.getFunctionType(),
theFptr, ArrayRef<Value*>(args));
call->setAttributes(returninfo.attrs);
if (gcstack_arg)
call->setCallingConv(CallingConv::Swift);

switch (returninfo.cc) {
case jl_returninfo_t::Boxed:
retval = mark_julia_type(ctx, call, true, astrt);
Expand Down Expand Up @@ -6710,7 +6726,11 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
args[idx] = return_roots;
idx++;
}

bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
if (gcstack_arg) {
args[idx] = ctx.pgcstack;
idx++;
}
bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
Expand Down Expand Up @@ -6748,7 +6768,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
}
CallInst *call = ctx.builder.CreateCall(f.decl, args);
call->setAttributes(f.attrs);

if (gcstack_arg)
call->setCallingConv(CallingConv::Swift);
jl_cgval_t retval;
if (retarg != -1) {
Value *theArg;
Expand Down Expand Up @@ -6790,7 +6811,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
return w;
}

static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure)
static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg)
{
jl_returninfo_t props = {};
SmallVector<Type*, 8> fsig;
Expand Down Expand Up @@ -6875,6 +6896,14 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
}

if (gcstack_arg){
AttrBuilder param(ctx.builder.getContext());
param.addAttribute(Attribute::SwiftSelf);
param.addAttribute(Attribute::NonNull);
attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0));
}

for (size_t i = 0; i < jl_nparams(sig); i++) {
jl_value_t *jt = jl_tparam(sig, i);
bool isboxed = false;
Expand Down Expand Up @@ -6936,7 +6965,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
else
fval = emit_bitcast(ctx, fval, ftype->getPointerTo());
}

if (gcstack_arg && isa<Function>(fval))
cast<Function>(fval)->setCallingConv(CallingConv::Swift);
props.decl = FunctionCallee(ftype, fval);
props.attrs = attributes;
return props;
Expand Down Expand Up @@ -7163,7 +7193,8 @@ static jl_llvm_functions_t
Function *f = NULL;
bool has_sret = false;
if (specsig) { // assumes !va and !needsparams
returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure);
returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes,
jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg));
f = cast<Function>(returninfo.decl.getCallee());
has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
jl_init_function(f, ctx.emission_context.TargetTriple);
Expand Down Expand Up @@ -7348,7 +7379,6 @@ static jl_llvm_functions_t
ctx.spvals_ptr = &*AI++;
}
}

// step 6. set up GC frame
allocate_gc_frame(ctx, b0);
Value *last_age = NULL;
Expand Down Expand Up @@ -7554,6 +7584,12 @@ static jl_llvm_functions_t
param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
}
if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){
Argument *Arg = &*AI;
++AI;
AttrBuilder param(ctx.builder.getContext());
attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param);
}
for (i = 0; i < nreq; i++) {
jl_sym_t *s = slot_symbol(ctx, i);
jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
Expand Down Expand Up @@ -8564,7 +8600,7 @@ static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codeg
jl_llvm_functions_t declarations;
declarations.functionObject = "jl_f_opaque_closure_call";
if (uses_specsig(mi->specTypes, false, true, rettype, true)) {
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, 1);
jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
size_t nrealargs = jl_nparams(mi->specTypes);
Expand Down
1 change: 1 addition & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2344,6 +2344,7 @@ typedef struct {
// limited, standalone

int safepoint_on_entry; // Emit a safepoint on entry to each function
int gcstack_arg; // Pass the ptls value as an argument with swiftself

// Cache access. Default: jl_rettype_inferred.
jl_codeinstance_lookup_t lookup;
Expand Down
13 changes: 13 additions & 0 deletions src/llvm-ptls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,19 @@ bool LowerPTLS::run(bool *CFGModified)
for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
auto call = cast<CallInst>(*it);
++it;
auto f = call->getCaller();
Value *pgcstack = NULL;
for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end();++arg) {
if (arg->hasSwiftSelfAttr()){
pgcstack = &*arg;
break;
}
}
if (pgcstack) {
call->replaceAllUsesWith(pgcstack);
call->eraseFromParent();
continue;
}
assert(call->getCalledOperand() == pgcstack_getter);
fix_pgcstack_use(call, pgcstack_getter, or_new, CFGModified);
}
Expand Down
2 changes: 1 addition & 1 deletion stdlib/InteractiveUtils/src/codeview.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
raw::Bool, dump_module::Bool, syntax::Symbol,
optimize::Bool, debuginfo::Symbol, binary::Bool)
params = CodegenParams(debug_info_kind=Cint(0),
safepoint_on_entry=raw)
safepoint_on_entry=raw, gcstack_arg=raw)
_dump_function(f, t, native, wrapper, raw, dump_module, syntax,
optimize, debuginfo, binary, params)
end
Expand Down
2 changes: 1 addition & 1 deletion test/compiler/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ end

# The tests below assume a certain format and safepoint_on_entry=true breaks that.
function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true)
params = Base.CodegenParams(safepoint_on_entry=false)
params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false)
d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params)
sprint(print, d)
end
Expand Down
2 changes: 1 addition & 1 deletion test/llvmpasses/fastmath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))

import Base.FastMath

# CHECK: call fast float @llvm.sqrt.f32(float %0)
# CHECK: call fast float @llvm.sqrt.f32(float %{{[0-9]+}})
emit(FastMath.sqrt_fast, Float32)


Expand Down
2 changes: 1 addition & 1 deletion test/llvmpasses/llvmcall.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ emit(foo, Core.LLVMPtr{Float32, 3})
# CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}})
emit(foo, Foo)

# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half]
# CHECK: define {{(swiftcc )?}}<2 x half> @julia_bar_{{[0-9]+}}(
emit(bar, NTuple{2, Float16})
34 changes: 17 additions & 17 deletions test/llvmpasses/loopinfo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ end
# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]]
# LOWER-NOT: call void @julia.loopinfo_marker()
# LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL-NOT: call void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
# FINAL: br
end
end
Expand All @@ -90,17 +90,17 @@ end
# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]]
# LOWER-NOT: call void @julia.loopinfo_marker()
# LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL: call void @j_iteration
# FINAL-NOT: call void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
end
end

Expand All @@ -111,8 +111,8 @@ end
1 <= j <= I && continue
@show (i,j)
iteration(i)
# FINAL: call void @j_iteration
# FINAL-NOT: call void @j_iteration
# FINAL: call {{(swiftcc )?}}void @j_iteration
# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
end
$(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),)))
end
Expand Down
1 change: 0 additions & 1 deletion test/llvmpasses/pipeline-o0.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))

# CHECK-LABEL: @julia_simple
# CHECK-NOT: julia.get_pgcstack
# CHECK: asm
# CHECK-NOT: julia.gc_alloc_obj
# CHECK: ijl_gc_pool_alloc
# COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes
Expand Down

0 comments on commit 631d187

Please sign in to comment.