From 631d187b741ae0f391ba1bd067c32382f3122473 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 16 Jun 2023 10:33:32 -0300 Subject: [PATCH] codegen: pass the pgcstack as an argument to specsig calls (#50093) The safepoint at function entry made it so that every function call did a relatively expensive load from the PTLS, we can instead pass the PTLS as an argument to functions making it significantly cheaper. Also use the swift calling conventions, that together with the `swiftself` attribute makes it so it's very likely the argument is kept in a register between calls. Fixes: https://github.com/JuliaLang/julia/issues/50068 --- base/reflection.jl | 3 ++ src/codegen.cpp | 62 +++++++++++++++++++------ src/julia.h | 1 + src/llvm-ptls.cpp | 13 ++++++ stdlib/InteractiveUtils/src/codeview.jl | 2 +- test/compiler/codegen.jl | 2 +- test/llvmpasses/fastmath.jl | 2 +- test/llvmpasses/llvmcall.jl | 2 +- test/llvmpasses/loopinfo.jl | 34 +++++++------- test/llvmpasses/pipeline-o0.jl | 1 - 10 files changed, 87 insertions(+), 35 deletions(-) diff --git a/base/reflection.jl b/base/reflection.jl index bcfc39d2bd3a8..96b7a832cc575 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1194,6 +1194,7 @@ struct CodegenParams gnu_pubnames::Cint debug_info_kind::Cint safepoint_on_entry::Cint + gcstack_arg::Cint lookup::Ptr{Cvoid} @@ -1203,6 +1204,7 @@ struct CodegenParams prefer_specsig::Bool=false, gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(), safepoint_on_entry::Bool=true, + gcstack_arg::Bool=true, lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})), generic_context = nothing) return new( @@ -1210,6 +1212,7 @@ struct CodegenParams Cint(prefer_specsig), Cint(gnu_pubnames), debug_info_kind, Cint(safepoint_on_entry), + Cint(gcstack_arg), lookup, generic_context) end end diff --git a/src/codegen.cpp b/src/codegen.cpp index 26304c7350c5c..37281ed3038ec 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1296,6 +1296,7 @@ extern "C" { #endif (int) DICompileUnit::DebugEmissionKind::FullDebug, 1, + 1, jl_rettype_inferred_addr, NULL }; } @@ -1719,7 +1720,7 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) { } static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL); -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure); +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg); static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1); static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s, jl_binding_t **pbnd, bool assign); @@ -4107,7 +4108,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos { ++EmittedSpecfunCalls; // emit specialized call site - jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure); + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg); FunctionType *cft = returninfo.decl.getFunctionType(); *cc = returninfo.cc; *return_roots = returninfo.return_roots; @@ -4141,7 +4143,10 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos argvals[idx] = return_roots; idx++; } - + if (gcstack_arg) { + argvals[idx] = ctx.pgcstack; + idx++; + } for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(specTypes, i); // n.b.: specTypes is required to be a datatype by construction for specsig @@ -4205,6 +4210,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos } CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals); call->setAttributes(returninfo.attrs); + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); jl_cgval_t retval; switch (returninfo.cc) { @@ -5273,7 +5280,7 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject); if (specF) { jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL, - closure_decls.specFunctionObject, sigtype, rettype, true); + closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); specF = cast(returninfo.decl.getCallee()); } } @@ -5786,13 +5793,15 @@ static void emit_cfunc_invalidate( DebugLoc noDbg; ctx.builder.SetCurrentDebugLocation(noDbg); allocate_gc_frame(ctx, b0); - Function::arg_iterator AI = gf_thunk->arg_begin(); SmallVector myargs(nargs); if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union) ++AI; if (return_roots) ++AI; + if (JL_FEAT_TEST(ctx,gcstack_arg)){ + ++AI; // gcstack_arg + } for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(calltype, i); // n.b. specTypes is required to be a datatype by construction for specsig @@ -6258,8 +6267,9 @@ static Function* gen_cfun_wrapper( bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; assert(calltype == 3); // emit a specsig call + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst); - jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure); + jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg); FunctionType *cft = returninfo.decl.getFunctionType(); jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet); @@ -6286,6 +6296,8 @@ static Function* gen_cfun_wrapper( AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots)); args.push_back(return_roots); } + if (gcstack_arg) + args.push_back(ctx.pgcstack); for (size_t i = 0; i < nargs + 1; i++) { // figure out how to repack the arguments jl_cgval_t &inputarg = inputargs[i]; @@ -6332,11 +6344,15 @@ static Function* gen_cfun_wrapper( emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context); theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk); } + assert(cast(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType())); CallInst *call = ctx.builder.CreateCall( returninfo.decl.getFunctionType(), theFptr, ArrayRef(args)); call->setAttributes(returninfo.attrs); + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); + switch (returninfo.cc) { case jl_returninfo_t::Boxed: retval = mark_julia_type(ctx, call, true, astrt); @@ -6710,7 +6726,11 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret args[idx] = return_roots; idx++; } - + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + if (gcstack_arg) { + args[idx] = ctx.pgcstack; + idx++; + } bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) { jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type : @@ -6748,7 +6768,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret } CallInst *call = ctx.builder.CreateCall(f.decl, args); call->setAttributes(f.attrs); - + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); jl_cgval_t retval; if (retarg != -1) { Value *theArg; @@ -6790,7 +6811,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret return w; } -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure) +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg) { jl_returninfo_t props = {}; SmallVector fsig; @@ -6875,6 +6896,14 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0)); } + if (gcstack_arg){ + AttrBuilder param(ctx.builder.getContext()); + param.addAttribute(Attribute::SwiftSelf); + param.addAttribute(Attribute::NonNull); + attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param)); + fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0)); + } + for (size_t i = 0; i < jl_nparams(sig); i++) { jl_value_t *jt = jl_tparam(sig, i); bool isboxed = false; @@ -6936,7 +6965,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value else fval = emit_bitcast(ctx, fval, ftype->getPointerTo()); } - + if (gcstack_arg && isa(fval)) + cast(fval)->setCallingConv(CallingConv::Swift); props.decl = FunctionCallee(ftype, fval); props.attrs = attributes; return props; @@ -7163,7 +7193,8 @@ static jl_llvm_functions_t Function *f = NULL; bool has_sret = false; if (specsig) { // assumes !va and !needsparams - returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure); + returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, + jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg)); f = cast(returninfo.decl.getCallee()); has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union); jl_init_function(f, ctx.emission_context.TargetTriple); @@ -7348,7 +7379,6 @@ static jl_llvm_functions_t ctx.spvals_ptr = &*AI++; } } - // step 6. set up GC frame allocate_gc_frame(ctx, b0); Value *last_age = NULL; @@ -7554,6 +7584,12 @@ static jl_llvm_functions_t param.addAlignmentAttr(Align(sizeof(jl_value_t*))); attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes } + if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){ + Argument *Arg = &*AI; + ++AI; + AttrBuilder param(ctx.builder.getContext()); + attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); + } for (i = 0; i < nreq; i++) { jl_sym_t *s = slot_symbol(ctx, i); jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); @@ -8564,7 +8600,7 @@ static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codeg jl_llvm_functions_t declarations; declarations.functionObject = "jl_f_opaque_closure_call"; if (uses_specsig(mi->specTypes, false, true, rettype, true)) { - jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, 1); + jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); Function *gf_thunk = cast(returninfo.decl.getCallee()); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); size_t nrealargs = jl_nparams(mi->specTypes); diff --git a/src/julia.h b/src/julia.h index 2140b0ad0ab90..694a8d81b06e9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2344,6 +2344,7 @@ typedef struct { // limited, standalone int safepoint_on_entry; // Emit a safepoint on entry to each function + int gcstack_arg; // Pass the ptls value as an argument with swiftself // Cache access. Default: jl_rettype_inferred. jl_codeinstance_lookup_t lookup; diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 84f8d7121ff03..f69078433941f 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -314,6 +314,19 @@ bool LowerPTLS::run(bool *CFGModified) for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) { auto call = cast(*it); ++it; + auto f = call->getCaller(); + Value *pgcstack = NULL; + for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end();++arg) { + if (arg->hasSwiftSelfAttr()){ + pgcstack = &*arg; + break; + } + } + if (pgcstack) { + call->replaceAllUsesWith(pgcstack); + call->eraseFromParent(); + continue; + } assert(call->getCalledOperand() == pgcstack_getter); fix_pgcstack_use(call, pgcstack_getter, or_new, CFGModified); } diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl index 9ce5be9706bac..646028575d052 100644 --- a/stdlib/InteractiveUtils/src/codeview.jl +++ b/stdlib/InteractiveUtils/src/codeview.jl @@ -172,7 +172,7 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe raw::Bool, dump_module::Bool, syntax::Symbol, optimize::Bool, debuginfo::Symbol, binary::Bool) params = CodegenParams(debug_info_kind=Cint(0), - safepoint_on_entry=raw) + safepoint_on_entry=raw, gcstack_arg=raw) _dump_function(f, t, native, wrapper, raw, dump_module, syntax, optimize, debuginfo, binary, params) end diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index c29f82bfd6008..e93ecd232498f 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -17,7 +17,7 @@ end # The tests below assume a certain format and safepoint_on_entry=true breaks that. function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) - params = Base.CodegenParams(safepoint_on_entry=false) + params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false) d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params) sprint(print, d) end diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl index 76b048c19a2a0..7338d1c3ccc5a 100644 --- a/test/llvmpasses/fastmath.jl +++ b/test/llvmpasses/fastmath.jl @@ -14,7 +14,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl")) import Base.FastMath -# CHECK: call fast float @llvm.sqrt.f32(float %0) +# CHECK: call fast float @llvm.sqrt.f32(float %{{[0-9]+}}) emit(FastMath.sqrt_fast, Float32) diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl index 687abe0a8cd46..a55201c3e3bc3 100644 --- a/test/llvmpasses/llvmcall.jl +++ b/test/llvmpasses/llvmcall.jl @@ -28,5 +28,5 @@ emit(foo, Core.LLVMPtr{Float32, 3}) # CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}}) emit(foo, Foo) -# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half] +# CHECK: define {{(swiftcc )?}}<2 x half> @julia_bar_{{[0-9]+}}( emit(bar, NTuple{2, Float16}) diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index c970e07f8a125..18661ea6fde67 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -64,10 +64,10 @@ end # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]] # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration # FINAL: br end end @@ -90,17 +90,17 @@ end # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]] # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration end end @@ -111,8 +111,8 @@ end 1 <= j <= I && continue @show (i,j) iteration(i) -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration end $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),))) end diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl index 1b5d1df3c9f36..1075d126c59ca 100644 --- a/test/llvmpasses/pipeline-o0.jl +++ b/test/llvmpasses/pipeline-o0.jl @@ -9,7 +9,6 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl")) # CHECK-LABEL: @julia_simple # CHECK-NOT: julia.get_pgcstack -# CHECK: asm # CHECK-NOT: julia.gc_alloc_obj # CHECK: ijl_gc_pool_alloc # COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes