Skip to content

Commit

Permalink
Split the JIT compiler into an optimizer and concurrent compiler layer (
Browse files Browse the repository at this point in the history
#44364)

* Move optimization to IRTransformLayer
* Move to ConcurrentIRCompiler
* Create an optimization selection layer
  • Loading branch information
pchintalapudi committed Mar 2, 2022
1 parent b10aa56 commit 15b5df4
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 109 deletions.
189 changes: 97 additions & 92 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,108 +456,91 @@ CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
#endif
}

static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, raw_svector_ostream &ObjStream, MCContext *Ctx, int optlevel)
static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, int optlevel)
{
addTargetPasses(&PM, &TM);
addOptimizationPasses(&PM, optlevel);
addMachinePasses(&PM, &TM, optlevel);
if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
llvm_unreachable("Target does not support MC emission.");
}

static auto countBasicBlocks(const Function &F)
{
return std::distance(F.begin(), F.end());
}

CompilerResultT JuliaOJIT::CompilerT::operator()(Module &M)
{
uint64_t start_time = 0;
if (dump_llvm_opt_stream != NULL) {
// Print LLVM function statistics _before_ optimization
// Print all the information about this invocation as a YAML object
jl_printf(dump_llvm_opt_stream, "- \n");
// We print the name and some statistics for each function in the module, both
// before optimization and again afterwards.
jl_printf(dump_llvm_opt_stream, " before: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
OptimizerResultT JuliaOJIT::OptimizerT::operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
TSM.withModuleDo([&](Module &M){
uint64_t start_time = 0;
if (dump_llvm_opt_stream != NULL) {
// Print LLVM function statistics _before_ optimization
// Print all the information about this invocation as a YAML object
jl_printf(dump_llvm_opt_stream, "- \n");
// We print the name and some statistics for each function in the module, both
// before optimization and again afterwards.
jl_printf(dump_llvm_opt_stream, " before: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
}
// Each function is printed as a YAML object with several attributes
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
}
// Each function is printed as a YAML object with several attributes
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));

start_time = jl_hrtime();
}

start_time = jl_hrtime();
}
JL_TIMING(LLVM_OPT);

PM.run(M);

JL_TIMING(LLVM_OPT);
uint64_t end_time = 0;
if (dump_llvm_opt_stream != NULL) {
end_time = jl_hrtime();
jl_printf(dump_llvm_opt_stream, " time_ns: %" PRIu64 "\n", end_time - start_time);
jl_printf(dump_llvm_opt_stream, " optlevel: %d\n", optlevel);

int optlevel;
int optlevel_min;
if (jl_generating_output()) {
optlevel = 0;
}
else {
optlevel = jl_options.opt_level;
optlevel_min = jl_options.opt_level_min;
for (auto &F : M.functions()) {
if (!F.getBasicBlockList().empty()) {
Attribute attr = F.getFnAttribute("julia-optimization-level");
StringRef val = attr.getValueAsString();
if (val != "") {
int ol = (int)val[0] - '0';
if (ol >= 0 && ol < optlevel)
optlevel = ol;
// Print LLVM function statistics _after_ optimization
jl_printf(dump_llvm_opt_stream, " after: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
}
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
}
}
optlevel = std::max(optlevel, optlevel_min);
}
if (optlevel == 0)
jit.PM0.run(M);
else if (optlevel == 1)
jit.PM1.run(M);
else if (optlevel == 2)
jit.PM2.run(M);
else if (optlevel >= 3)
jit.PM3.run(M);

std::unique_ptr<MemoryBuffer> ObjBuffer(
new SmallVectorMemoryBuffer(std::move(jit.ObjBufferSV)));
auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());

if (!Obj) {
llvm_dump(&M);
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(Obj.takeError(), OS, "");
OS.flush();
llvm::report_fatal_error(llvm::Twine("FATAL: Unable to compile LLVM Module: '") + Buf + "'\n"
"The module's content was printed above. Please file a bug report");
}

uint64_t end_time = 0;
if (dump_llvm_opt_stream != NULL) {
end_time = jl_hrtime();
jl_printf(dump_llvm_opt_stream, " time_ns: %" PRIu64 "\n", end_time - start_time);
jl_printf(dump_llvm_opt_stream, " optlevel: %d\n", optlevel);
});
return Expected<orc::ThreadSafeModule>{std::move(TSM)};
}

// Print LLVM function statistics _after_ optimization
jl_printf(dump_llvm_opt_stream, " after: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
size_t optlevel = ~0ull;
TSM.withModuleDo([&](Module &M) {
if (jl_generating_output()) {
optlevel = 0;
}
else {
optlevel = std::max(static_cast<int>(jl_options.opt_level), 0);
size_t optlevel_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
for (auto &F : M.functions()) {
if (!F.getBasicBlockList().empty()) {
Attribute attr = F.getFnAttribute("julia-optimization-level");
StringRef val = attr.getValueAsString();
if (val != "") {
size_t ol = (size_t)val[0] - '0';
if (ol >= 0 && ol < optlevel)
optlevel = ol;
}
}
}
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
optlevel = std::min(std::max(optlevel, optlevel_min), this->count);
}
}

return CompilerResultT(std::move(ObjBuffer));
});
assert(optlevel != ~0ull && "Failed to select a valid optimization level!");
this->optimizers[optlevel].emit(std::move(R), std::move(TSM));
}

void jl_register_jit_object(const object::ObjectFile &debugObj,
Expand Down Expand Up @@ -807,10 +790,27 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
}
#endif

namespace {
orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) {
return orc::JITTargetMachineBuilder(TM.getTargetTriple())
.setCPU(TM.getTargetCPU().str())
.setFeatures(TM.getTargetFeatureString())
.setOptions(TM.Options)
.setRelocationModel(Reloc::Static)
.setCodeModel(TM.getCodeModel())
.setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
}
}

JuliaOJIT::JuliaOJIT(TargetMachine &TM, LLVMContext *LLVMCtx)
: TM(TM),
DL(TM.createDataLayout()),
ObjStream(ObjBufferSV),
TMs{
cantFail(createJTMBFromTM(TM, 0).createTargetMachine()),
cantFail(createJTMBFromTM(TM, 1).createTargetMachine()),
cantFail(createJTMBFromTM(TM, 2).createTargetMachine()),
cantFail(createJTMBFromTM(TM, 3).createTargetMachine())
},
TSCtx(std::unique_ptr<LLVMContext>(LLVMCtx)),
#if JL_LLVM_VERSION >= 130000
ES(cantFail(orc::SelfExecutorProcessControl::Create())),
Expand All @@ -837,7 +837,17 @@ JuliaOJIT::JuliaOJIT(TargetMachine &TM, LLVMContext *LLVMCtx)
}
),
#endif
CompileLayer(ES, ObjectLayer, std::make_unique<CompilerT>(this))
CompileLayer0(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 0))),
CompileLayer1(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 1))),
CompileLayer2(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 2))),
CompileLayer3(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(TM, 3))),
OptimizeLayers{
{ES, CompileLayer0, OptimizerT(PM0, 0)},
{ES, CompileLayer1, OptimizerT(PM1, 1)},
{ES, CompileLayer2, OptimizerT(PM2, 2)},
{ES, CompileLayer3, OptimizerT(PM3, 3)},
},
OptSelLayer(OptimizeLayers)
{
#ifdef JL_USE_JITLINK
# if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
Expand All @@ -859,15 +869,10 @@ JuliaOJIT::JuliaOJIT(TargetMachine &TM, LLVMContext *LLVMCtx)
registerRTDyldJITObject(Object, LO, MemMgr);
});
#endif
for (int i = 0; i < 4; i++) {
TMs[i] = TM.getTarget().createTargetMachine(TM.getTargetTriple().getTriple(), TM.getTargetCPU(),
TM.getTargetFeatureString(), TM.Options, Reloc::Static, TM.getCodeModel(),
CodeGenOptLevelFor(i), true);
}
addPassesForOptLevel(PM0, *TMs[0], ObjStream, Ctx, 0);
addPassesForOptLevel(PM1, *TMs[1], ObjStream, Ctx, 1);
addPassesForOptLevel(PM2, *TMs[2], ObjStream, Ctx, 2);
addPassesForOptLevel(PM3, *TMs[3], ObjStream, Ctx, 3);
addPassesForOptLevel(PM0, *TMs[0], 0);
addPassesForOptLevel(PM1, *TMs[1], 1);
addPassesForOptLevel(PM2, *TMs[2], 2);
addPassesForOptLevel(PM3, *TMs[3], 3);

// Make sure SectionMemoryManager::getSymbolAddressInProcess can resolve
// symbols in the program as well. The nullptr argument to the function
Expand Down Expand Up @@ -943,7 +948,7 @@ void JuliaOJIT::addModule(std::unique_ptr<Module> M)
}
#endif
// TODO: what is the performance characteristics of this?
cantFail(CompileLayer.add(JD, orc::ThreadSafeModule(std::move(M), TSCtx)));
cantFail(OptSelLayer.add(JD, orc::ThreadSafeModule(std::move(M), TSCtx)));
// force eager compilation (for now), due to memory management specifics
// (can't handle compilation recursion)
for (auto Name : NewExports)
Expand Down
55 changes: 38 additions & 17 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "llvm/IR/LegacyPassManager.h"

#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
#include <llvm/ExecutionEngine/Orc/IRTransformLayer.h>
#include <llvm/ExecutionEngine/JITEventListener.h>

#include <llvm/Target/TargetMachine.h>
Expand Down Expand Up @@ -176,28 +177,46 @@ typedef JITSymbol JL_JITSymbol;
typedef JITSymbol JL_SymbolInfo;

using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
using OptimizerResultT = Expected<orc::ThreadSafeModule>;

class JuliaOJIT {
struct CompilerT : public orc::IRCompileLayer::IRCompiler {
CompilerT(JuliaOJIT *pjit)
: IRCompiler(orc::IRSymbolMapper::ManglingOptions{}),
jit(*pjit) {}
virtual CompilerResultT operator()(Module &M) override;
private:
JuliaOJIT &jit;
};
// Custom object emission notification handler for the JuliaOJIT
template <typename ObjT, typename LoadResult>
void registerObject(const ObjT &Obj, const LoadResult &LO);

public:
#ifdef JL_USE_JITLINK
typedef orc::ObjectLinkingLayer ObjLayerT;
#else
typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
#endif
typedef orc::IRCompileLayer CompileLayerT;
typedef orc::IRTransformLayer OptimizeLayerT;
typedef object::OwningBinary<object::ObjectFile> OwningObj;
private:
struct OptimizerT {
OptimizerT(legacy::PassManager &PM, int optlevel) : optlevel(optlevel), PM(PM) {}

OptimizerResultT operator()(orc::ThreadSafeModule M, orc::MaterializationResponsibility &R);
private:
int optlevel;
legacy::PassManager &PM;
};
// Custom object emission notification handler for the JuliaOJIT
template <typename ObjT, typename LoadResult>
void registerObject(const ObjT &Obj, const LoadResult &LO);

struct OptSelLayerT : orc::IRLayer {

template<size_t N>
OptSelLayerT(OptimizeLayerT (&optimizers)[N]) : orc::IRLayer(optimizers[0].getExecutionSession(), optimizers[0].getManglingOptions()), optimizers(optimizers), count(N) {
static_assert(N > 0, "Expected array with at least one optimizer!");
}

void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;

private:
OptimizeLayerT *optimizers;
size_t count;
};

public:

JuliaOJIT(TargetMachine &TM, LLVMContext *Ctx);

Expand Down Expand Up @@ -227,14 +246,11 @@ class JuliaOJIT {
const DataLayout DL;
// Should be big enough that in the common case, The
// object fits in its entirety
SmallVector<char, 4096> ObjBufferSV;
raw_svector_ostream ObjStream;
legacy::PassManager PM0; // per-optlevel pass managers
legacy::PassManager PM1;
legacy::PassManager PM2;
legacy::PassManager PM3;
TargetMachine *TMs[4];
MCContext *Ctx;
std::unique_ptr<TargetMachine> TMs[4];

orc::ThreadSafeContext TSCtx;
orc::ExecutionSession ES;
Expand All @@ -245,7 +261,12 @@ class JuliaOJIT {
std::shared_ptr<RTDyldMemoryManager> MemMgr;
#endif
ObjLayerT ObjectLayer;
CompileLayerT CompileLayer;
CompileLayerT CompileLayer0;
CompileLayerT CompileLayer1;
CompileLayerT CompileLayer2;
CompileLayerT CompileLayer3;
OptimizeLayerT OptimizeLayers[4];
OptSelLayerT OptSelLayer;

DenseMap<void*, std::string> ReverseLocalSymbolTable;
};
Expand Down

0 comments on commit 15b5df4

Please sign in to comment.