From 1352b973007a9487ac6390f9cc03785ce7bb7d6f Mon Sep 17 00:00:00 2001 From: jonathanvdc Date: Tue, 23 Apr 2019 19:02:55 +0200 Subject: [PATCH] Delay GC frame lowering into a separate pass Fix a typo in the final GC lowering pass Split up 'julia.push_new_gc_frame' into two intrinsics Steal name from existing GC frame when lowering 'julia.new_gc_frame' Move GC frame pop lowering to final GC lowering pass Change how GC helper intrinsics are accessed Remove fields from GCLoweringRefs Capture module pointer in GCLoweringRefs Make the 'julia.new_gc_frame' intrinsic noalias Move GC frame access lowering to final GC lowering pass Update some outdated comments Add some sanity checks to final GC lowering pass Include 'llvm-version.h' in final GC lowering pass Define tests that test GC frame lowering passes in isolation Use 'i32' instead of 'size_t' for 'julia.{new,push}_gc_frame' size args Try to fix GC lowering tests These tests work fine on most machines, but they break on x86 Travis CI builds. The breakage is almost certainly due to a mismatch between integer types in the tests and 'size_t', exposed by recent changes to the GC lowering pass. Define 'LLVMExtraAddFinalLowerGCPass' --- src/Makefile | 5 +- src/jitlayers.cpp | 2 + src/jitlayers.h | 1 + src/llvm-final-gc-lowering.cpp | 240 ++++++++++++++++++++++++++++ src/llvm-late-gc-lowering.cpp | 116 ++++---------- src/llvm-pass-helpers.cpp | 26 ++- src/llvm-pass-helpers.h | 11 +- test/llvmpasses/final-lower-gc.ll | 56 +++++++ test/llvmpasses/gcroots.ll | 2 +- test/llvmpasses/late-lower-gc.ll | 33 ++++ test/llvmpasses/refinements.ll | 2 +- test/llvmpasses/returnstwicegc.ll | 2 +- test/llvmpasses/safepoint_stress.jl | 2 +- 13 files changed, 400 insertions(+), 98 deletions(-) create mode 100644 src/llvm-final-gc-lowering.cpp create mode 100644 test/llvmpasses/final-lower-gc.ll create mode 100644 test/llvmpasses/late-lower-gc.ll diff --git a/src/Makefile b/src/Makefile index 7768ec5e3c83c..df22c626bf251 100644 --- a/src/Makefile +++ b/src/Makefile @@ -55,7 +55,7 @@ LLVMLINK := ifeq ($(JULIACODEGEN),LLVM) SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \ - llvm-pass-helpers llvm-late-gc-lowering \ + llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \ llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \ llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-api FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) @@ -211,8 +211,9 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR) $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/table.c $(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h -$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h +$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h +$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index e0aaa9b53bf29..155192ed047f5 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -126,6 +126,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, PM->add(createLowerExcHandlersPass()); PM->add(createGCInvariantVerifierPass(false)); PM->add(createLateLowerGCFramePass()); + PM->add(createFinalLowerGCPass()); PM->add(createLowerPTLSPass(dump_native)); } PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop @@ -241,6 +242,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, PM->add(createLowerExcHandlersPass()); PM->add(createGCInvariantVerifierPass(false)); PM->add(createLateLowerGCFramePass()); + PM->add(createFinalLowerGCPass()); // Remove dead use of ptls PM->add(createDeadCodeEliminationPass()); PM->add(createLowerPTLSPass(dump_native)); diff --git a/src/jitlayers.h b/src/jitlayers.h index b58ce874c1ab0..56a37fd69fcab 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -187,6 +187,7 @@ JL_DLLEXPORT extern LLVMContext jl_LLVMContext; Pass *createLowerPTLSPass(bool imaging_mode); Pass *createCombineMulAddPass(); +Pass *createFinalLowerGCPass(); Pass *createLateLowerGCFramePass(); Pass *createLowerExcHandlersPass(); Pass *createGCInvariantVerifierPass(bool Strong); diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp new file mode 100644 index 0000000000000..f9a072983c9c5 --- /dev/null +++ b/src/llvm-final-gc-lowering.cpp @@ -0,0 +1,240 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include +#include +#include +#include +#include +#include +#include + +#include "llvm-version.h" +#include "codegen_shared.h" +#include "julia.h" +#include "julia_internal.h" +#include "llvm-pass-helpers.h" + +#define DEBUG_TYPE "final_gc_lowering" + +using namespace llvm; + +// The final GC lowering pass. This pass lowers platform-agnostic GC +// intrinsics to platform-dependent instruction sequences. The +// intrinsics it targets are those produced by the late GC frame +// lowering pass. +// +// This pass targets typical back-ends for which the standard Julia +// runtime library is available. Atypical back-ends should supply +// their own lowering pass. +struct FinalLowerGC: public FunctionPass, private JuliaPassContext { + static char ID; + FinalLowerGC() : FunctionPass(ID) + { } + +private: + CallInst *ptlsStates; + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + + // Lowers a `julia.new_gc_frame` intrinsic. + Value *lowerNewGCFrame(CallInst *target, Function &F); + + // Lowers a `julia.push_gc_frame` intrinsic. + void lowerPushGCFrame(CallInst *target, Function &F); + + // Lowers a `julia.pop_gc_frame` intrinsic. + void lowerPopGCFrame(CallInst *target, Function &F); + + // Lowers a `julia.get_gc_frame_slot` intrinsic. + Value *lowerGetGCFrameSlot(CallInst *target, Function &F); + + Instruction *getPgcstack(Instruction *ptlsStates); +}; + +Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) +{ + assert(target->getNumArgOperands() == 1); + unsigned nRoots = cast(target->getArgOperand(0))->getLimitedValue(INT_MAX); + + // Create the GC frame. + AllocaInst *gcframe = new AllocaInst( + T_prjlvalue, + 0, + ConstantInt::get(T_int32, nRoots + 2)); + gcframe->insertAfter(target); + gcframe->takeName(target); + + // Zero out the GC frame. + BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), ""); + tempSlot_i8->insertAfter(gcframe); + Type *argsT[2] = {tempSlot_i8->getType(), T_int32}; + Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT)); +#if JL_LLVM_VERSION >= 70000 + Value *args[4] = { + tempSlot_i8, // dest + ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val + ConstantInt::get(T_int32, sizeof(jl_value_t*)*(nRoots+2)), // len + ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile +#else + Value *args[5] = { + tempSlot_i8, // dest + ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val + ConstantInt::get(T_int32, sizeof(jl_value_t*)*(nRoots+2)), // len + ConstantInt::get(T_int32, 0), // align + ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile +#endif + CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args)); + zeroing->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); + zeroing->insertAfter(tempSlot_i8); + + return gcframe; +} + +void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) +{ + assert(target->getNumArgOperands() == 2); + auto gcframe = target->getArgOperand(0); + unsigned nRoots = cast(target->getArgOperand(1))->getLimitedValue(INT_MAX); + + IRBuilder<> builder(target->getContext()); + builder.SetInsertPoint(&*(++BasicBlock::iterator(target))); + Instruction *inst = + builder.CreateStore( + ConstantInt::get(T_size, nRoots << 1), + builder.CreateBitCast( + builder.CreateConstGEP1_32(gcframe, 0), + T_size->getPointerTo())); + inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); + Value *pgcstack = builder.Insert(getPgcstack(ptlsStates)); + inst = builder.CreateStore( + builder.CreateLoad(pgcstack), + builder.CreatePointerCast( + builder.CreateConstGEP1_32(gcframe, 1), + PointerType::get(T_ppjlvalue, 0))); + inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); + builder.CreateStore(gcframe, builder.CreateBitCast(pgcstack, + PointerType::get(PointerType::get(T_prjlvalue, 0), 0))); +} + +void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F) +{ + assert(target->getNumArgOperands() == 1); + auto gcframe = target->getArgOperand(0); + + IRBuilder<> builder(target->getContext()); + builder.SetInsertPoint(target); + Instruction *gcpop = + cast(builder.CreateConstGEP1_32(gcframe, 1)); + Instruction *inst = builder.CreateLoad(gcpop); + inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); + inst = builder.CreateStore( + inst, + builder.CreateBitCast( + builder.Insert(getPgcstack(ptlsStates)), + PointerType::get(T_prjlvalue, 0))); + inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); +} + +Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F) +{ + assert(target->getNumArgOperands() == 3); + auto gcframe = target->getArgOperand(0); + auto index = target->getArgOperand(1); + + // Initialize an IR builder. + IRBuilder<> builder(target->getContext()); + builder.SetInsertPoint(target); + + // The first two slots are reserved, so we'll add two to the index. + index = builder.CreateAdd(index, ConstantInt::get(T_int32, 2)); + + // Lower the intrinsic as a GEP. + auto gep = builder.CreateGEP(gcframe, index); + gep->takeName(target); + return gep; +} + +Instruction *FinalLowerGC::getPgcstack(Instruction *ptlsStates) +{ + Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*)); + return GetElementPtrInst::Create( + nullptr, + ptlsStates, + ArrayRef(offset), + "jl_pgcstack"); +} + +bool FinalLowerGC::doInitialization(Module &M) +{ + initAll(M); + return true; +} + +bool FinalLowerGC::runOnFunction(Function &F) +{ + DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); + // Check availability of functions again since they might have been deleted. + initFunctions(*F.getParent()); + if (!ptls_getter) + return true; + + // Look for a call to 'julia.ptls_states'. + ptlsStates = getPtls(F); + if (!ptlsStates) + return true; + + // Acquire intrinsic functions. + auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame); + auto pushGCFrameFunc = getOrNull(jl_intrinsics::pushGCFrame); + auto popGCFrameFunc = getOrNull(jl_intrinsics::popGCFrame); + auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot); + + // Lower all calls to supported intrinsics. + for (BasicBlock &BB : F) { + for (auto it = BB.begin(); it != BB.end();) { + auto *CI = dyn_cast(&*it); + if (!CI) { + ++it; + continue; + } + + auto callee = CI->getCalledValue(); + + if (callee == newGCFrameFunc) { + CI->replaceAllUsesWith(lowerNewGCFrame(CI, F)); + it = CI->eraseFromParent(); + } + else if (callee == pushGCFrameFunc) { + lowerPushGCFrame(CI, F); + it = CI->eraseFromParent(); + } + else if (callee == popGCFrameFunc) { + lowerPopGCFrame(CI, F); + it = CI->eraseFromParent(); + } + else if (callee == getGCFrameSlotFunc) { + CI->replaceAllUsesWith(lowerGetGCFrameSlot(CI, F)); + it = CI->eraseFromParent(); + } + else { + ++it; + } + } + } + + return true; +} + +char FinalLowerGC::ID = 0; +static RegisterPass X("FinalLowerGC", "Final GC intrinsic lowering pass", false, false); + +Pass *createFinalLowerGCPass() +{ + return new FinalLowerGC(); +} + +extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass(LLVMPassManagerRef PM) +{ + unwrap(PM)->add(createFinalLowerGCPass()); +} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 2f10f182389c4..9caac99e597d8 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -345,8 +345,6 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext { State LocalScan(Function &F); void ComputeLiveness(State &S); void ComputeLiveSets(State &S); - void PushGCFrame(AllocaInst *gcframe, unsigned NRoots, Instruction *InsertAfter); - void PopGCFrame(AllocaInst *gcframe, Instruction *InsertBefore); std::vector ColorRoots(const State &S); void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector &Colors, Value *GCFrame, Instruction *InsertionPoint); void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector &Colors, Value *GCFrame); @@ -354,7 +352,6 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext { bool doInitialization(Module &M) override; bool doFinalization(Module &) override; bool runOnFunction(Function &F) override; - Instruction *get_pgcstack(Instruction *ptlsStates); bool CleanupIR(Function &F, State *S=nullptr); void NoteUseChain(State &S, BBState &BBS, User *TheUser); SmallVector GetPHIRefinements(PHINode *phi, State &S); @@ -1594,44 +1591,6 @@ std::vector LateLowerGCFrame::ColorRoots(const State &S) { return Colors; } -Instruction *LateLowerGCFrame::get_pgcstack(Instruction *ptlsStates) -{ - Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*)); - return GetElementPtrInst::Create(nullptr, - ptlsStates, - ArrayRef(offset), - "jl_pgcstack"); -} - -void LateLowerGCFrame::PushGCFrame(AllocaInst *gcframe, unsigned NRoots, Instruction *InsertAfter) { - IRBuilder<> builder(gcframe->getContext()); - builder.SetInsertPoint(&*(++BasicBlock::iterator(InsertAfter))); - Instruction *inst = - builder.CreateStore(ConstantInt::get(T_size, NRoots << 1), - builder.CreateBitCast(builder.CreateConstGEP1_32(gcframe, 0), T_size->getPointerTo())); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - Value *pgcstack = builder.Insert(get_pgcstack(ptlsStates)); - inst = builder.CreateStore(builder.CreateLoad(pgcstack), - builder.CreatePointerCast(builder.CreateConstGEP1_32(gcframe, 1), PointerType::get(T_ppjlvalue,0))); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - builder.CreateStore(gcframe, builder.CreateBitCast(pgcstack, - PointerType::get(PointerType::get(T_prjlvalue, 0), 0))); -} - -void LateLowerGCFrame::PopGCFrame(AllocaInst *gcframe, Instruction *InsertBefore) { - IRBuilder<> builder(InsertBefore->getContext()); - builder.SetInsertPoint(InsertBefore); // set insert *before* Ret - Instruction *gcpop = - (Instruction*)builder.CreateConstGEP1_32(gcframe, 1); - Instruction *inst = builder.CreateLoad(gcpop); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - inst = builder.CreateStore(inst, - builder.CreateBitCast( - builder.Insert(get_pgcstack(ptlsStates)), - PointerType::get(T_prjlvalue, 0))); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); -} - // Size of T is assumed to be `sizeof(void*)` Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V) { @@ -1901,18 +1860,21 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor const std::vector &Colors, Value *GCFrame, Instruction *InsertionPoint) { Value *Val = GetPtrForNumber(S, R, InsertionPoint); - Value *args[1] = { - ConstantInt::get(T_int32, Colors[R]+MinColorRoot) - }; - GetElementPtrInst *gep = GetElementPtrInst::Create(T_prjlvalue, GCFrame, makeArrayRef(args)); - gep->insertBefore(InsertionPoint); + + // Get the slot address. + auto slotAddress = CallInst::Create( + getOrDefine(jl_intrinsics::getGCFrameSlot), + {GCFrame, ConstantInt::get(T_int32, Colors[R] + MinColorRoot)}); + + slotAddress->insertBefore(InsertionPoint); + Val = MaybeExtractUnion(std::make_pair(Val, -1), InsertionPoint); // Pointee types don't have semantics, so the optimizer is // free to rewrite them if convenient. We need to change // it back here for the store. if (Val->getType() != T_prjlvalue) Val = new BitCastInst(Val, T_prjlvalue, "", InsertionPoint); - new StoreInst(Val, gep, InsertionPoint); + new StoreInst(Val, slotAddress, InsertionPoint); } void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot, @@ -1949,43 +1911,28 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State // Insert instructions for the actual gc frame if (MaxColor != -1 || S.Allocas.size() != 0) { unsigned NRoots = MaxColor + 1 + S.Allocas.size(); - // Create GC Frame - AllocaInst *gcframe = new AllocaInst(T_prjlvalue, 0, - ConstantInt::get(T_int32, NRoots + 2), "gcframe"); + // Create and push a GC frame. + auto gcframe = CallInst::Create( + getOrDefine(jl_intrinsics::newGCFrame), + {ConstantInt::get(T_int32, NRoots)}, + "gcframe"); gcframe->insertBefore(&*F->getEntryBlock().begin()); - // Zero out gcframe - BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F->getContext()), ""); - tempSlot_i8->insertAfter(gcframe); - Type *argsT[2] = {tempSlot_i8->getType(), T_int32}; - Function *memset = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, makeArrayRef(argsT)); -#if JL_LLVM_VERSION >= 70000 - Value *args[4] = { - tempSlot_i8, // dest - ConstantInt::get(Type::getInt8Ty(F->getContext()), 0), // val - ConstantInt::get(T_int32, sizeof(jl_value_t*)*(NRoots+2)), // len - ConstantInt::get(Type::getInt1Ty(F->getContext()), 0)}; // volatile -#else - Value *args[5] = { - tempSlot_i8, // dest - ConstantInt::get(Type::getInt8Ty(F->getContext()), 0), // val - ConstantInt::get(T_int32, sizeof(jl_value_t*)*(NRoots+2)), // len - ConstantInt::get(T_int32, 0), // align - ConstantInt::get(Type::getInt1Ty(F->getContext()), 0)}; // volatile -#endif - CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args)); - zeroing->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - zeroing->insertAfter(tempSlot_i8); - // Push GC Frame - PushGCFrame(gcframe, NRoots, ptlsStates); + + auto pushGcframe = CallInst::Create( + getOrDefine(jl_intrinsics::pushGCFrame), + {gcframe, ConstantInt::get(T_int32, NRoots)}); + pushGcframe->insertAfter(ptlsStates); + // Replace Allocas - unsigned AllocaSlot = 2; + unsigned AllocaSlot = 0; for (AllocaInst *AI : S.Allocas) { - Value *args[1] = { - ConstantInt::get(T_int32, AllocaSlot++) - }; - GetElementPtrInst *gep = GetElementPtrInst::Create(T_prjlvalue, gcframe, makeArrayRef(args)); - gep->insertAfter(gcframe); - gep->takeName(AI); + // Pick a slot for the alloca. + auto slotAddress = CallInst::Create( + getOrDefine(jl_intrinsics::getGCFrameSlot), + {gcframe, ConstantInt::get(T_int32, AllocaSlot++)}); + slotAddress->insertAfter(gcframe); + slotAddress->takeName(AI); + // Check for lifetime intrinsics on this alloca, we can't keep them // because we're changing the semantics std::vector ToDelete; @@ -1998,7 +1945,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State }, AI); for (CallInst *II : ToDelete) II->eraseFromParent(); - AI->replaceAllUsesWith(gep); + AI->replaceAllUsesWith(slotAddress); AI->eraseFromParent(); } unsigned MinColorRoot = AllocaSlot; @@ -2007,7 +1954,10 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State // Insert GCFrame pops for(Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { if (isa(I->getTerminator())) { - PopGCFrame(gcframe, I->getTerminator()); + auto popGcframe = CallInst::Create( + getOrDefine(jl_intrinsics::popGCFrame), + {gcframe}); + popGcframe->insertBefore(I->getTerminator()); } } } diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 3cf1c1104a6e2..bc3a089ca9096 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -121,15 +121,16 @@ llvm::Function *JuliaPassContext::getOrDefine( } namespace jl_intrinsics { - static const char* NEW_GC_FRAME_NAME = "julia.new_gc_frame"; - static const char* PUSH_GC_FRAME_NAME = "julia.push_gc_frame"; - static const char* POP_GC_FRAME_NAME = "julia.pop_gc_frame"; + static const char *NEW_GC_FRAME_NAME = "julia.new_gc_frame"; + static const char *PUSH_GC_FRAME_NAME = "julia.push_gc_frame"; + static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame"; + static const char *GET_GC_FRAME_SLOT_NAME = "julia.get_gc_frame_slot"; const IntrinsicDescription newGCFrame( NEW_GC_FRAME_NAME, [](llvm::Module &M, const JuliaPassContext &context) { auto intrinsic = Function::Create( - FunctionType::get(PointerType::get(context.T_prjlvalue, 0), {context.T_size}, false), + FunctionType::get(PointerType::get(context.T_prjlvalue, 0), {context.T_int32}, false), Function::ExternalLinkage, NEW_GC_FRAME_NAME, &M); @@ -145,7 +146,7 @@ namespace jl_intrinsics { auto intrinsic = Function::Create( FunctionType::get( Type::getVoidTy(M.getContext()), - {PointerType::get(context.T_prjlvalue, 0), context.T_size}, + {PointerType::get(context.T_prjlvalue, 0), context.T_int32}, false), Function::ExternalLinkage, PUSH_GC_FRAME_NAME, @@ -168,4 +169,19 @@ namespace jl_intrinsics { return intrinsic; }); + + const IntrinsicDescription getGCFrameSlot( + GET_GC_FRAME_SLOT_NAME, + [](llvm::Module &M, const JuliaPassContext &context) { + auto intrinsic = Function::Create( + FunctionType::get( + PointerType::get(context.T_prjlvalue, 0), + {PointerType::get(context.T_prjlvalue, 0), context.T_int32}, + false), + Function::ExternalLinkage, + GET_GC_FRAME_SLOT_NAME, + &M); + + return intrinsic; + }); } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index cffae20800b24..7567832ff198b 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -18,7 +18,7 @@ namespace jl_intrinsics { // intrinsics and materialize new intrinsics if necessary. struct IntrinsicDescription final { // The type of function that defines a new intrinsic. - typedef llvm::Function *(*DefinitionFunction)(llvm::Module &M, const JuliaPassContext&); + typedef llvm::Function *(*DefinitionFunction)(llvm::Module&, const JuliaPassContext&); // Creates an intrinsic description with a particular // name and definition function. @@ -61,15 +61,15 @@ struct JuliaPassContext { llvm::Function *typeof_func; llvm::Function *write_barrier_func; - // Creates a GC lowering refs structure. Type and function pointers + // Creates a pass context. Type and function pointers // are set to `nullptr`. Metadata nodes are initialized. JuliaPassContext(); - // Populates a GC lowering refs structure by inspecting a module. + // Populates a pass context by inspecting a module. // Also sets the current module to the given module. void initAll(llvm::Module &M); - // Initializes a GC lowering refs structure's functions only. + // Initializes a pass context's functions only. // Also sets the current module to the given module. void initFunctions(llvm::Module &M); @@ -99,6 +99,9 @@ namespace jl_intrinsics { // An intrinsic that pops a GC frame. extern const IntrinsicDescription popGCFrame; + + // An intrinsic that creates a pointer to a GC frame slot. + extern const IntrinsicDescription getGCFrameSlot; } #endif diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll new file mode 100644 index 0000000000000..24dd270c1bc12 --- /dev/null +++ b/test/llvmpasses/final-lower-gc.ll @@ -0,0 +1,56 @@ +; RUN: opt -load libjulia%shlibext -FinalLowerGC -S %s | FileCheck %s + +%jl_value_t = type opaque + +declare void @boxed_simple(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*) +declare %jl_value_t addrspace(10)* @jl_box_int64(i64) +declare %jl_value_t*** @julia.ptls_states() +declare void @jl_safepoint() +declare %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32) + +declare noalias nonnull %jl_value_t addrspace(10)** @julia.new_gc_frame(i32) +declare void @julia.push_gc_frame(%jl_value_t addrspace(10)**, i32) +declare %jl_value_t addrspace(10)** @julia.get_gc_frame_slot(%jl_value_t addrspace(10)**, i32) +declare void @julia.pop_gc_frame(%jl_value_t addrspace(10)**) + +define void @gc_frame_lowering(i64 %a, i64 %b) { +top: +; CHECK-LABEL: @gc_frame_lowering +; CHECK: %gcframe = alloca %jl_value_t addrspace(10)*, i32 4 + %gcframe = call %jl_value_t addrspace(10)** @julia.new_gc_frame(i32 2) +; CHECK: %ptls = call %jl_value_t*** @julia.ptls_states() + %ptls = call %jl_value_t*** @julia.ptls_states() +; CHECK-NEXT: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 0 +; CHECK-NEXT: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast %jl_value_t addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* +; CHECK-NEXT: store i64 4, i64* [[GCFRAME_SIZE_PTR2]], !tbaa !0 +; CHECK-NEXT: [[GCFRAME_SLOT:%.*]] = getelementptr %jl_value_t**, %jl_value_t*** %ptls, i32 0 +; CHECK-NEXT: [[PREV_GCFRAME_PTR:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 1 +; CHECK-NEXT: [[PREV_GCFRAME_PTR2:%.*]] = bitcast %jl_value_t addrspace(10)** [[PREV_GCFRAME_PTR]] to %jl_value_t*** +; CHECK-NEXT: [[PREV_GCFRAME:%.*]] = load %jl_value_t**, %jl_value_t*** [[GCFRAME_SLOT]] +; CHECK-NEXT: store %jl_value_t** [[PREV_GCFRAME]], %jl_value_t*** [[PREV_GCFRAME_PTR2]], !tbaa !0 +; CHECK-NEXT: [[GCFRAME_SLOT2:%.*]] = bitcast %jl_value_t*** [[GCFRAME_SLOT]] to %jl_value_t addrspace(10)*** +; CHECK-NEXT: store %jl_value_t addrspace(10)** %gcframe, %jl_value_t addrspace(10)*** [[GCFRAME_SLOT2]] + call void @julia.push_gc_frame(%jl_value_t addrspace(10)** %gcframe, i32 2) + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) +; CHECK: %frame_slot_1 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 3 + %frame_slot_1 = call %jl_value_t addrspace(10)** @julia.get_gc_frame_slot(%jl_value_t addrspace(10)** %gcframe, i32 1) + store %jl_value_t addrspace(10)* %aboxed, %jl_value_t addrspace(10)** %frame_slot_1 + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) +; CHECK: %frame_slot_2 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 2 + %frame_slot_2 = call %jl_value_t addrspace(10)** @julia.get_gc_frame_slot(%jl_value_t addrspace(10)** %gcframe, i32 0) + store %jl_value_t addrspace(10)* %bboxed, %jl_value_t addrspace(10)** %frame_slot_2 +; CHECK: call void @boxed_simple(%jl_value_t addrspace(10)* %aboxed, %jl_value_t addrspace(10)* %bboxed) + call void @boxed_simple(%jl_value_t addrspace(10)* %aboxed, %jl_value_t addrspace(10)* %bboxed) +; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 1 +; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** [[PREV_GCFRAME_PTR3]], !tbaa !0 +; CHECK-NEXT: [[GCFRAME_SLOT3:%.*]] = getelementptr %jl_value_t**, %jl_value_t*** %ptls, i32 0 +; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast %jl_value_t*** [[GCFRAME_SLOT3]] to %jl_value_t addrspace(10)** +; CHECK-NEXT: store %jl_value_t addrspace(10)* [[PREV_GCFRAME_PTR4]], %jl_value_t addrspace(10)** [[GCFRAME_SLOT4]], !tbaa !0 + call void @julia.pop_gc_frame(%jl_value_t addrspace(10)** %gcframe) +; CHECK-NEXT: ret void + ret void +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"jtbaa_gcframe", !2, i64 0} +!2 = !{!"jtbaa"} diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll index eb72e2e580d91..9b048d51433d6 100644 --- a/test/llvmpasses/gcroots.ll +++ b/test/llvmpasses/gcroots.ll @@ -1,4 +1,4 @@ -; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -S %s | FileCheck %s +; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s %jl_value_t = type opaque diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll new file mode 100644 index 0000000000000..6bdc62dd29d15 --- /dev/null +++ b/test/llvmpasses/late-lower-gc.ll @@ -0,0 +1,33 @@ +; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -S %s | FileCheck %s + +%jl_value_t = type opaque + +declare void @boxed_simple(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*) +declare %jl_value_t addrspace(10)* @jl_box_int64(i64) +declare %jl_value_t*** @julia.ptls_states() +declare void @jl_safepoint() +declare %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32) + +define void @gc_frame_lowering(i64 %a, i64 %b) { +top: +; CHECK-LABEL: @gc_frame_lowering +; CHECK: %gcframe = call %jl_value_t addrspace(10)** @julia.new_gc_frame(i32 2) + %ptls = call %jl_value_t*** @julia.ptls_states() +; CHECK: %ptls = call %jl_value_t*** @julia.ptls_states() +; CHECK-NEXT: call void @julia.push_gc_frame(%jl_value_t addrspace(10)** %gcframe, i32 2) +; CHECK-NEXT: call %jl_value_t addrspace(10)* @jl_box_int64 + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) +; CHECK: [[GEP0:%.*]] = call %jl_value_t addrspace(10)** @julia.get_gc_frame_slot(%jl_value_t addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; CHECK-NEXT: store %jl_value_t addrspace(10)* %aboxed, %jl_value_t addrspace(10)** [[GEP0]] + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) +; CHECK-NEXT: %bboxed = +; Make sure the same gc slot isn't re-used +; CHECK-NOT: call %jl_value_t addrspace(10)** @julia.get_gc_frame_slot(%jl_value_t addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) +; CHECK: [[GEP1:%.*]] = call %jl_value_t addrspace(10)** @julia.get_gc_frame_slot(%jl_value_t addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; CHECK-NEXT: store %jl_value_t addrspace(10)* %bboxed, %jl_value_t addrspace(10)** [[GEP1]] +; CHECK-NEXT: call void @boxed_simple + call void @boxed_simple(%jl_value_t addrspace(10)* %aboxed, + %jl_value_t addrspace(10)* %bboxed) +; CHECK-NEXT: call void @julia.pop_gc_frame(%jl_value_t addrspace(10)** %gcframe) + ret void +} diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll index f7b9040dd45cb..cc9110b4f1af5 100644 --- a/test/llvmpasses/refinements.ll +++ b/test/llvmpasses/refinements.ll @@ -1,4 +1,4 @@ -; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -S %s | FileCheck %s +; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s %jl_value_t = type opaque diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll index 0bc649dac0308..20c22b3516060 100644 --- a/test/llvmpasses/returnstwicegc.ll +++ b/test/llvmpasses/returnstwicegc.ll @@ -1,4 +1,4 @@ -; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -S %s | FileCheck %s +; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s %jl_value_t = type opaque diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl index baa26952884a5..68736333f23f2 100644 --- a/test/llvmpasses/safepoint_stress.jl +++ b/test/llvmpasses/safepoint_stress.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# RUN: julia --startup-file=no %s | opt -load libjulia%shlibext -LateLowerGCFrame -S - | FileCheck %s +# RUN: julia --startup-file=no %s | opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s println(""" %jl_value_t = type opaque