Skip to content

Commit

Permalink
Delay GC frame lowering into a separate pass
Browse files Browse the repository at this point in the history
Fix a typo in the final GC lowering pass

Split up 'julia.push_new_gc_frame' into two intrinsics

Steal name from existing GC frame when lowering 'julia.new_gc_frame'

Move GC frame pop lowering to final GC lowering pass

Change how GC helper intrinsics are accessed

Remove fields from GCLoweringRefs

Capture module pointer in GCLoweringRefs

Make the 'julia.new_gc_frame' intrinsic noalias

Move GC frame access lowering to final GC lowering pass

Update some outdated comments

Add some sanity checks to final GC lowering pass

Include 'llvm-version.h' in final GC lowering pass

Define tests that test GC frame lowering passes in isolation

Use 'i32' instead of 'size_t' for 'julia.{new,push}_gc_frame' size args

Try to fix GC lowering tests

These tests work fine on most machines, but they break on x86 Travis CI builds.
The breakage is almost certainly due to a mismatch between integer types in
the tests and 'size_t', exposed by recent changes to the GC lowering pass.

Define 'LLVMExtraAddFinalLowerGCPass'
  • Loading branch information
jonathanvdc committed Apr 24, 2019
1 parent 0bd5602 commit 1352b97
Show file tree
Hide file tree
Showing 13 changed files with 400 additions and 98 deletions.
5 changes: 3 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ LLVMLINK :=

ifeq ($(JULIACODEGEN),LLVM)
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
llvm-pass-helpers llvm-late-gc-lowering \
llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \
llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-api
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
Expand Down Expand Up @@ -211,8 +211,9 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)
$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h
$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/table.c
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h
$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
Expand Down
2 changes: 2 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
PM->add(createLowerExcHandlersPass());
PM->add(createGCInvariantVerifierPass(false));
PM->add(createLateLowerGCFramePass());
PM->add(createFinalLowerGCPass());
PM->add(createLowerPTLSPass(dump_native));
}
PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
Expand Down Expand Up @@ -241,6 +242,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
PM->add(createLowerExcHandlersPass());
PM->add(createGCInvariantVerifierPass(false));
PM->add(createLateLowerGCFramePass());
PM->add(createFinalLowerGCPass());
// Remove dead use of ptls
PM->add(createDeadCodeEliminationPass());
PM->add(createLowerPTLSPass(dump_native));
Expand Down
1 change: 1 addition & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ JL_DLLEXPORT extern LLVMContext jl_LLVMContext;

Pass *createLowerPTLSPass(bool imaging_mode);
Pass *createCombineMulAddPass();
Pass *createFinalLowerGCPass();
Pass *createLateLowerGCFramePass();
Pass *createLowerExcHandlersPass();
Pass *createGCInvariantVerifierPass(bool Strong);
Expand Down
240 changes: 240 additions & 0 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/Pass.h>
#include <llvm/Support/Debug.h>

#include "llvm-version.h"
#include "codegen_shared.h"
#include "julia.h"
#include "julia_internal.h"
#include "llvm-pass-helpers.h"

#define DEBUG_TYPE "final_gc_lowering"

using namespace llvm;

// The final GC lowering pass. This pass lowers platform-agnostic GC
// intrinsics to platform-dependent instruction sequences. The
// intrinsics it targets are those produced by the late GC frame
// lowering pass.
//
// This pass targets typical back-ends for which the standard Julia
// runtime library is available. Atypical back-ends should supply
// their own lowering pass.
struct FinalLowerGC: public FunctionPass, private JuliaPassContext {
static char ID;
FinalLowerGC() : FunctionPass(ID)
{ }

private:
CallInst *ptlsStates;

bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;

// Lowers a `julia.new_gc_frame` intrinsic.
Value *lowerNewGCFrame(CallInst *target, Function &F);

// Lowers a `julia.push_gc_frame` intrinsic.
void lowerPushGCFrame(CallInst *target, Function &F);

// Lowers a `julia.pop_gc_frame` intrinsic.
void lowerPopGCFrame(CallInst *target, Function &F);

// Lowers a `julia.get_gc_frame_slot` intrinsic.
Value *lowerGetGCFrameSlot(CallInst *target, Function &F);

Instruction *getPgcstack(Instruction *ptlsStates);
};

Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
{
assert(target->getNumArgOperands() == 1);
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);

// Create the GC frame.
AllocaInst *gcframe = new AllocaInst(
T_prjlvalue,
0,
ConstantInt::get(T_int32, nRoots + 2));
gcframe->insertAfter(target);
gcframe->takeName(target);

// Zero out the GC frame.
BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), "");
tempSlot_i8->insertAfter(gcframe);
Type *argsT[2] = {tempSlot_i8->getType(), T_int32};
Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT));
#if JL_LLVM_VERSION >= 70000
Value *args[4] = {
tempSlot_i8, // dest
ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val
ConstantInt::get(T_int32, sizeof(jl_value_t*)*(nRoots+2)), // len
ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile
#else
Value *args[5] = {
tempSlot_i8, // dest
ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val
ConstantInt::get(T_int32, sizeof(jl_value_t*)*(nRoots+2)), // len
ConstantInt::get(T_int32, 0), // align
ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile
#endif
CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args));
zeroing->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
zeroing->insertAfter(tempSlot_i8);

return gcframe;
}

void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
{
assert(target->getNumArgOperands() == 2);
auto gcframe = target->getArgOperand(0);
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);

IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
Instruction *inst =
builder.CreateStore(
ConstantInt::get(T_size, nRoots << 1),
builder.CreateBitCast(
builder.CreateConstGEP1_32(gcframe, 0),
T_size->getPointerTo()));
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
Value *pgcstack = builder.Insert(getPgcstack(ptlsStates));
inst = builder.CreateStore(
builder.CreateLoad(pgcstack),
builder.CreatePointerCast(
builder.CreateConstGEP1_32(gcframe, 1),
PointerType::get(T_ppjlvalue, 0)));
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
builder.CreateStore(gcframe, builder.CreateBitCast(pgcstack,
PointerType::get(PointerType::get(T_prjlvalue, 0), 0)));
}

void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
{
assert(target->getNumArgOperands() == 1);
auto gcframe = target->getArgOperand(0);

IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(target);
Instruction *gcpop =
cast<Instruction>(builder.CreateConstGEP1_32(gcframe, 1));
Instruction *inst = builder.CreateLoad(gcpop);
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
inst = builder.CreateStore(
inst,
builder.CreateBitCast(
builder.Insert(getPgcstack(ptlsStates)),
PointerType::get(T_prjlvalue, 0)));
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
}

Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
{
assert(target->getNumArgOperands() == 3);
auto gcframe = target->getArgOperand(0);
auto index = target->getArgOperand(1);

// Initialize an IR builder.
IRBuilder<> builder(target->getContext());
builder.SetInsertPoint(target);

// The first two slots are reserved, so we'll add two to the index.
index = builder.CreateAdd(index, ConstantInt::get(T_int32, 2));

// Lower the intrinsic as a GEP.
auto gep = builder.CreateGEP(gcframe, index);
gep->takeName(target);
return gep;
}

Instruction *FinalLowerGC::getPgcstack(Instruction *ptlsStates)
{
Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*));
return GetElementPtrInst::Create(
nullptr,
ptlsStates,
ArrayRef<Value*>(offset),
"jl_pgcstack");
}

bool FinalLowerGC::doInitialization(Module &M)
{
initAll(M);
return true;
}

bool FinalLowerGC::runOnFunction(Function &F)
{
DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
// Check availability of functions again since they might have been deleted.
initFunctions(*F.getParent());
if (!ptls_getter)
return true;

// Look for a call to 'julia.ptls_states'.
ptlsStates = getPtls(F);
if (!ptlsStates)
return true;

// Acquire intrinsic functions.
auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
auto pushGCFrameFunc = getOrNull(jl_intrinsics::pushGCFrame);
auto popGCFrameFunc = getOrNull(jl_intrinsics::popGCFrame);
auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);

// Lower all calls to supported intrinsics.
for (BasicBlock &BB : F) {
for (auto it = BB.begin(); it != BB.end();) {
auto *CI = dyn_cast<CallInst>(&*it);
if (!CI) {
++it;
continue;
}

auto callee = CI->getCalledValue();

if (callee == newGCFrameFunc) {
CI->replaceAllUsesWith(lowerNewGCFrame(CI, F));
it = CI->eraseFromParent();
}
else if (callee == pushGCFrameFunc) {
lowerPushGCFrame(CI, F);
it = CI->eraseFromParent();
}
else if (callee == popGCFrameFunc) {
lowerPopGCFrame(CI, F);
it = CI->eraseFromParent();
}
else if (callee == getGCFrameSlotFunc) {
CI->replaceAllUsesWith(lowerGetGCFrameSlot(CI, F));
it = CI->eraseFromParent();
}
else {
++it;
}
}
}

return true;
}

char FinalLowerGC::ID = 0;
static RegisterPass<FinalLowerGC> X("FinalLowerGC", "Final GC intrinsic lowering pass", false, false);

Pass *createFinalLowerGCPass()
{
return new FinalLowerGC();
}

extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass(LLVMPassManagerRef PM)
{
unwrap(PM)->add(createFinalLowerGCPass());
}
Loading

0 comments on commit 1352b97

Please sign in to comment.