Skip to content

Commit

Permalink
Simplify compiler and add cpu option
Browse files Browse the repository at this point in the history
  • Loading branch information
fsaintjacques committed Dec 31, 2019
1 parent 077a9b7 commit 8a61d7e
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 56 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ a & b
The *jitmap-ir* tool takes an expression as first input argument and dumps the
generated LLVM ir to stdout. By default, this will not use vectorized instruction.

```
```llvm
# tools/jitmap-ir '(a & b & c & d | e ^ f)'
; ModuleID = 'jitmap-ir-module'
source_filename = "jitmap-ir-module"
Expand Down Expand Up @@ -82,7 +82,7 @@ attributes #0 = { argmemonly }

We can then use LLVM's `llc` to transform the IR into native assembly.

```
```objdump
# tools/jitmap-ir '(a & b & c & d | e ^ f)' | llc-8 -O3 -mcpu=core-avx2
ninja: no work to do.
.text
Expand Down Expand Up @@ -127,7 +127,7 @@ query: # @query

This code is still not fully optimized, `opt` is used for this.

```
```assembly
# tools/jitmap-ir '(a & b & c & d | e ^ f)' | opt-8 -S -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu | llc-8 -O3 -mcpu=core-avx2
.text
.file "jitmap-ir-module"
Expand Down
23 changes: 8 additions & 15 deletions include/jitmap/query/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,15 @@ struct CompilerOptions {
// will be clamped to 3.
uint8_t optimization_level = 3;

// Number of background compiler threads.
uint8_t compiler_threads = 0;

// Controls the number of scalar, i.e. width of the vector aggregate value in
// the loop. A value of 1 will emit a scalar value instead of a vector value.
//
// Usually a small power of 2, e.g. 1, 2, 4, 8 or 16. See the documentation
// of your hardware platform for the optimal value.
// CPU architecture to optimize for. This will dictate the "best" vector
// instruction set to compile with. If unspecified or empty, llvm will
// auto-detect the host cpu architecture.
//
// LLVM's optimizer is able to perform the auto-vectorization of the loop. In
// cases where it can't, change the vector width here.
uint8_t vector_width = 1;

// Controls the width of each scalar (in bits). See the documentation
// of your hardware platform for the optimal value.
uint8_t scalar_width = kBitsPerBitsetWord;
// Invoke clang with `-mcpu=?` options to get a list of supported strings, e.g.
// - core-avx-i
// - core-avx2
// - skylake-avx512
std::string cpu = "";
};

// The QueryIRCodeGen class generates LLVM IR for query expression on dense
Expand Down
2 changes: 1 addition & 1 deletion include/jitmap/query/expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ class ExprBuilder {
return &full;
}

Expr* Var(std::string name) { return Build<VariableExpr>(name); }
Expr* Var(const std::string& name) { return Build<VariableExpr>(name); }
Expr* Var(std::string_view name) { return Build<VariableExpr>(std::string(name)); }

Expr* Not(Expr* expr) { return Build<NotOpExpr>(expr); }
Expand Down
7 changes: 0 additions & 7 deletions include/jitmap/query/jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ class JitEngine {
// https://llvm.org/doxygen/Host_8h_source.html for more information.
std::string GetTargetCPU() const;

// Return the LLVM features string for the host CPU.
//
// An array delimited by comma of symbols referencing a specific cpu feature.
// The feature supported are prefixed by `+`, and unsupported by `-`. See
// https://llvm.org/doxygen/Host_8h_source.html for more information.
std::string GetTargetFeatureString() const;

// Return the LLVM target triple for the host.
//
// The format is ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT. See
Expand Down
19 changes: 5 additions & 14 deletions src/jitmap/query/compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class QueryIRCodeGen::Impl {
// Constants
auto induction_type = llvm::Type::getInt64Ty(*ctx_);
auto zero = llvm::ConstantInt::get(induction_type, 0);
auto step = llvm::ConstantInt::get(induction_type, vector_width());
auto step = llvm::ConstantInt::get(induction_type, 1);
auto n_words = llvm::ConstantInt::get(induction_type, word_size());

auto loop_block = llvm::BasicBlock::Create(*ctx_, "loop", fn);
Expand Down Expand Up @@ -153,10 +153,8 @@ class QueryIRCodeGen::Impl {
auto namify = [&i](std::string key) { return key + "_" + std::to_string(i); };
// Compute the address to load
auto gep = builder_.CreateInBoundsGEP(bitmap_addr, {loop_idx}, namify("gep"));
// Cast previous address as a vector-type
auto bitcast = builder_.CreateBitCast(gep, VectorPtrType(), namify("bitcast"));
// Load in a register
return builder_.CreateLoad(bitcast, namify("load"));
return builder_.CreateLoad(gep, namify("load"));
};

// Bind the variable bitmaps by name to inputs of the function
Expand All @@ -167,12 +165,12 @@ class QueryIRCodeGen::Impl {
}

// Execute the expression tree on the input
ExprCodeGenVisitor visitor{keyed_bitmaps, builder_, VectorType()};
ExprCodeGenVisitor visitor{keyed_bitmaps, builder_, ElementType()};
auto result = query.expr().Visit(visitor);

// Store the result in the output bitmap.
auto gep = builder_.CreateInBoundsGEP(output, {loop_idx}, "gep_output");
auto bitcast = builder_.CreateBitCast(gep, VectorPtrType(), "bitcast_output");
auto bitcast = builder_.CreateBitCast(gep, ElementPtrType(), "bitcast_output");
builder_.CreateStore(result, bitcast);
}

Expand Down Expand Up @@ -224,15 +222,8 @@ class QueryIRCodeGen::Impl {

llvm::Type* ElementType() { return llvm::Type::getIntNTy(*ctx_, scalar_width()); }
llvm::Type* ElementPtrType() { return ElementType()->getPointerTo(); }
llvm::Type* VectorType() {
// Reverts to scalar for debugging purposes.
if (vector_width() == 1) return ElementType();
return llvm::VectorType::get(ElementType(), vector_width());
}
llvm::Type* VectorPtrType() { return VectorType()->getPointerTo(); }

uint8_t vector_width() const { return options_.vector_width; }
uint8_t scalar_width() const { return options_.scalar_width; }
uint8_t scalar_width() const { return kBitsPerBitsetWord; }
uint32_t word_size() const { return kBitsPerContainer / scalar_width(); }

std::unique_ptr<llvm::LLVMContext> ctx_;
Expand Down
21 changes: 6 additions & 15 deletions src/jitmap/query/jit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,12 @@ llvm::CodeGenOpt::Level CodeGetOptFromNumber(uint8_t level) {
}
}

llvm::SubtargetFeatures DetectHostCPUFeatures() {
llvm::SubtargetFeatures features;
llvm::StringMap<bool> map;
llvm::sys::getHostCPUFeatures(map);
for (auto& feature : map) {
features.AddFeature(feature.first(), feature.second);
std::string DetectCPU(const CompilerOptions& opts) {
if (opts.cpu.empty()) {
return llvm::sys::getHostCPUName();
}
return features;

return opts.cpu;
}

auto InitHostTargetMachineBuilder(const CompilerOptions& opts) {
Expand All @@ -79,9 +77,7 @@ auto InitHostTargetMachineBuilder(const CompilerOptions& opts) {

auto machine_builder = ExpectOrRaise(orc::JITTargetMachineBuilder::detectHost());
machine_builder.setCodeGenOptLevel(CodeGetOptFromNumber(opts.optimization_level));
// In newer LLVM version, cpu and features are detected by detectHost();
machine_builder.setCPU(llvm::sys::getHostCPUName());
machine_builder.addFeatures(DetectHostCPUFeatures().getFeatures());
machine_builder.setCPU(DetectCPU(opts));

return machine_builder;
}
Expand Down Expand Up @@ -121,7 +117,6 @@ std::unique_ptr<orc::LLJIT> InitLLJIT(orc::JITTargetMachineBuilder machine_build
const CompilerOptions& options) {
return ExpectOrRaise(orc::LLJITBuilder()
.setJITTargetMachineBuilder(machine_builder)
.setNumCompileThreads(options.compiler_threads)
.setObjectLinkingLayerCreator(ObjectLinkingLayerFactory)
.create());
}
Expand Down Expand Up @@ -150,7 +145,6 @@ class JitEngine::Impl {

// Introspection
std::string GetTargetCPU() const { return host_->getTargetCPU(); }
std::string GetTargetFeatureString() const { return host_->getTargetFeatureString(); }
std::string GetTargetTriple() const { return host_->getTargetTriple().normalize(); }
const llvm::DataLayout layout() const { return host_->createDataLayout(); }

Expand Down Expand Up @@ -190,9 +184,6 @@ JitEngine::~JitEngine() {}
JitEngine::JitEngine(JitEngine&& other) { std::swap(impl_, other.impl_); }

std::string JitEngine::GetTargetCPU() const { return impl_->GetTargetCPU(); }
std::string JitEngine::GetTargetFeatureString() const {
return impl_->GetTargetFeatureString();
}
std::string JitEngine::GetTargetTriple() const { return impl_->GetTargetTriple(); }

void JitEngine::Compile(QueryIRCodeGen query) { impl_->AddModule(std::move(query)); }
Expand Down
1 change: 0 additions & 1 deletion tests/query/jit_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ class JitTest : public QueryTest {

TEST_F(JitTest, CpuDetection) {
EXPECT_NE(engine_.GetTargetCPU(), "");
EXPECT_NE(engine_.GetTargetFeatureString(), "");
EXPECT_NE(engine_.GetTargetTriple(), "");
}

Expand Down

0 comments on commit 8a61d7e

Please sign in to comment.