Skip to content

Commit

Permalink
Add LLVM level allocation optimization pass
Browse files Browse the repository at this point in the history
This can obtain escape information with much higher precision than what we can currently do
in typeinf. However, it does not replace the alloc_elim_pass! in type inference either since
this cannot handle objects with reference fields.

Fix JuliaLang#20452
  • Loading branch information
yuyichao committed Jul 27, 2017
1 parent 85a2555 commit 09dc2d5
Show file tree
Hide file tree
Showing 12 changed files with 1,071 additions and 64 deletions.
4 changes: 3 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ endif
LLVMLINK :=

ifeq ($(JULIACODEGEN),LLVM)
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \
llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
LLVM_LIBS := all
ifeq ($(USE_POLLY),1)
Expand Down
2 changes: 1 addition & 1 deletion src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2106,7 +2106,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
size_t rtsz = jl_datatype_size(rt);
assert(rtsz > 0);
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
int boxalign = jl_gc_alignment(rtsz);
int boxalign = jl_datatype_align(rt);
#ifndef JL_NDEBUG
#if JL_LLVM_VERSION >= 40000
const DataLayout &DL = jl_data_layout;
Expand Down
23 changes: 5 additions & 18 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2097,25 +2097,12 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
{
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
JL_FEAT_REQUIRE(ctx, runtime);

int osize;
int offset = jl_gc_classify_pools(static_size, &osize);
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
Value *v;
if (offset < 0) {
Value *args[] = {ptls_ptr,
ConstantInt::get(T_size, static_size + sizeof(void*))};
v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func),
ArrayRef<Value*>(args, 2));
}
else {
Value *pool_offs = ConstantInt::get(T_int32, offset);
Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)};
v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func),
ArrayRef<Value*>(args, 3));
}
tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v)));
return v;
auto call = ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func),
{ptls_ptr, ConstantInt::get(T_size, static_size),
maybe_decay_untracked(jt)});
call->setAttributes(jl_alloc_obj_func->getAttributes());
return call;
}

// if ptr is NULL this emits a write barrier _back_
Expand Down
34 changes: 14 additions & 20 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,7 @@ static Function *jlgenericfunction_func;
static Function *jlenter_func;
static Function *jlleave_func;
static Function *jlegal_func;
static Function *jlalloc_pool_func;
static Function *jlalloc_big_func;
static Function *jl_alloc_obj_func;
static Function *jlisa_func;
static Function *jlsubtype_func;
static Function *jlapplytype_func;
Expand Down Expand Up @@ -6372,24 +6371,19 @@ static void init_julia_llvm_env(Module *m)
"jl_instantiate_type_in_env", m);
add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);

std::vector<Type*> alloc_pool_args(0);
alloc_pool_args.push_back(T_pint8);
alloc_pool_args.push_back(T_int32);
alloc_pool_args.push_back(T_int32);
jlalloc_pool_func =
Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false),
Function::ExternalLinkage,
"jl_gc_pool_alloc", m);
add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc);

std::vector<Type*> alloc_big_args(0);
alloc_big_args.push_back(T_pint8);
alloc_big_args.push_back(T_size);
jlalloc_big_func =
Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false),
Function::ExternalLinkage,
"jl_gc_big_alloc", m);
add_named_global(jlalloc_big_func, &jl_gc_big_alloc);
std::vector<Type*> gc_alloc_args(0);
gc_alloc_args.push_back(T_pint8);
gc_alloc_args.push_back(T_size);
gc_alloc_args.push_back(T_prjlvalue);
jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false),
Function::ExternalLinkage,
"julia.gc_alloc_obj");
#if JL_LLVM_VERSION >= 50000
jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
#else
jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
#endif
add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false);

std::vector<Type *> dlsym_args(0);
dlsym_args.push_back(T_pint8);
Expand Down
6 changes: 1 addition & 5 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
}

int alignment;
if (x.isboxed) {
// julia's gc gives 16-byte aligned addresses
alignment = 16;
}
else if (jt) {
if (jt) {
alignment = julia_alignment(p, jt, 0);
}
else {
Expand Down
7 changes: 7 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
// effectiveness of the optimization, but should retain correctness.
#if JL_LLVM_VERSION < 50000
PM->add(createLowerExcHandlersPass());
PM->add(createAllocOptPass());
PM->add(createLateLowerGCFramePass());
// Remove dead use of ptls
PM->add(createDeadCodeEliminationPass());
Expand All @@ -161,6 +162,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
PM->add(createAlwaysInlinerPass()); // Respect always_inline
#endif

#if JL_LLVM_VERSION >= 50000
// Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
// merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
// pass.
PM->add(createAllocOptPass());
#endif
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
PM->add(createSROAPass()); // Break up aggregate allocas
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
Expand Down
1 change: 1 addition & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ Pass *createLateLowerGCFramePass();
Pass *createLowerExcHandlersPass();
Pass *createGCInvariantVerifierPass(bool Strong);
Pass *createPropagateJuliaAddrspaces();
Pass *createAllocOptPass();
// Whether the Function is an llvm or julia intrinsic.
static inline bool isIntrinsicFunction(Function *F)
{
Expand Down
Loading

0 comments on commit 09dc2d5

Please sign in to comment.