Skip to content

Commit

Permalink
LibJS/Bytecode: Add peephole optimization pass and fuse compare+jump
Browse files Browse the repository at this point in the history
This patch adds a new "Peephole" pass for performing small, local
optimizations to bytecode.

We also introduce the first such optimization, fusing a sequence of
some comparison instruction FooCompare followed by a JumpIf into a
new set of JumpFooCompare instructions.

This gives a ~50% speed-up on the following microbenchmark:

    for (let i = 0; i < 10_000_000; ++i) {
    }

But more traditional benchmarks see a pretty sizable speed-up as well,
for example 15% on Kraken/ai-astar.js and 16% on Kraken/audio-dft.js :^)
  • Loading branch information
awesomekling committed Mar 4, 2024
1 parent acd29e0 commit 4438ec4
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 24 deletions.
6 changes: 3 additions & 3 deletions Userland/Libraries/LibJS/Bytecode/BasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ class BasicBlock {
op->set_source_record({ start_offset, end_offset });
}

private:
explicit BasicBlock(String name);

void terminate(size_t slot_offset)
{
m_terminated = true;
m_terminator_offset = slot_offset;
}

private:
explicit BasicBlock(String name);

Vector<u8> m_buffer;
BasicBlock const* m_handler { nullptr };
BasicBlock const* m_finalizer { nullptr };
Expand Down
8 changes: 8 additions & 0 deletions Userland/Libraries/LibJS/Bytecode/Instruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@
O(IteratorToArray) \
O(Jump) \
O(JumpIf) \
O(JumpGreaterThan) \
O(JumpGreaterThanEquals) \
O(JumpLessThan) \
O(JumpLessThanEquals) \
O(JumpLooselyEquals) \
O(JumpLooselyInequals) \
O(JumpStrictlyEquals) \
O(JumpStrictlyInequals) \
O(JumpNullish) \
O(JumpUndefined) \
O(LeaveLexicalEnvironment) \
Expand Down
83 changes: 62 additions & 21 deletions Userland/Libraries/LibJS/Bytecode/Interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,26 @@ NonnullOwnPtr<CallFrame> CallFrame::create(size_t register_count)
return call_frame;
}

static ThrowCompletionOr<Value> loosely_inequals(VM& vm, Value src1, Value src2)
{
return Value(!TRY(is_loosely_equal(vm, src1, src2)));
}

static ThrowCompletionOr<Value> loosely_equals(VM& vm, Value src1, Value src2)
{
return Value(TRY(is_loosely_equal(vm, src1, src2)));
}

static ThrowCompletionOr<Value> strict_inequals(VM&, Value src1, Value src2)
{
return Value(!is_strictly_equal(src1, src2));
}

static ThrowCompletionOr<Value> strict_equals(VM&, Value src1, Value src2)
{
return Value(is_strictly_equal(src1, src2));
}

Interpreter::Interpreter(VM& vm)
: m_vm(vm)
{
Expand Down Expand Up @@ -324,6 +344,33 @@ void Interpreter::run_bytecode()
else
m_current_block = &static_cast<Op::JumpIf const&>(instruction).false_target()->block();
goto start;

#define JS_HANDLE_FUSABLE_BINARY_JUMP(PreOp, int32_operator, slow_case) \
case Instruction::Type::Jump##PreOp: { \
auto& jump = static_cast<Op::Jump##PreOp const&>(instruction); \
auto lhs = get(jump.lhs()); \
auto rhs = get(jump.rhs()); \
bool condition = false; \
if (lhs.is_int32() && rhs.is_int32()) { \
condition = lhs.as_i32() int32_operator rhs.as_i32(); \
} else { \
auto condition_or_error = slow_case(vm(), lhs, rhs); \
if (condition_or_error.is_error()) { \
result = condition_or_error.release_error(); \
break; \
} \
condition = condition_or_error.value().to_boolean(); \
} \
\
if (condition) \
m_current_block = &jump.true_target()->block(); \
else \
m_current_block = &jump.false_target()->block(); \
goto start; \
}

JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_HANDLE_FUSABLE_BINARY_JUMP)

case Instruction::Type::JumpNullish:
if (get(static_cast<Op::JumpNullish const&>(instruction).condition()).is_nullish())
m_current_block = &static_cast<Op::Jump const&>(instruction).true_target()->block();
Expand Down Expand Up @@ -545,6 +592,7 @@ static PassManager& optimization_pipeline()
pm->add<Passes::UnifySameBlocks>();
pm->add<Passes::GenerateCFG>();
pm->add<Passes::MergeBlocks>();
pm->add<Passes::Peephole>();
pm->add<Passes::GenerateCFG>();
pm->add<Passes::PlaceBlocks>();
return pm;
Expand Down Expand Up @@ -587,7 +635,6 @@ Variant<NonnullOwnPtr<CallFrame>, CallFrame*> Interpreter::pop_call_frame()
m_current_call_frame = m_call_frames.is_empty() ? Span<Value> {} : this->call_frame().registers();
return frame;
}

}

namespace JS::Bytecode {
Expand Down Expand Up @@ -642,26 +689,6 @@ ThrowCompletionOr<void> End::execute_impl(Bytecode::Interpreter&) const
__builtin_unreachable();
}

static ThrowCompletionOr<Value> loosely_inequals(VM& vm, Value src1, Value src2)
{
return Value(!TRY(is_loosely_equal(vm, src1, src2)));
}

static ThrowCompletionOr<Value> loosely_equals(VM& vm, Value src1, Value src2)
{
return Value(TRY(is_loosely_equal(vm, src1, src2)));
}

static ThrowCompletionOr<Value> strict_inequals(VM&, Value src1, Value src2)
{
return Value(!is_strictly_equal(src1, src2));
}

static ThrowCompletionOr<Value> strict_equals(VM&, Value src1, Value src2)
{
return Value(is_strictly_equal(src1, src2));
}

#define JS_DEFINE_EXECUTE_FOR_COMMON_BINARY_OP(OpTitleCase, op_snake_case) \
ThrowCompletionOr<void> OpTitleCase::execute_impl(Bytecode::Interpreter& interpreter) const \
{ \
Expand Down Expand Up @@ -1225,6 +1252,20 @@ ThrowCompletionOr<void> JumpIf::execute_impl(Bytecode::Interpreter&) const
__builtin_unreachable();
}

#define JS_DEFINE_FUSABLE_BINARY_OP(PreOp, ...) \
ThrowCompletionOr<void> Jump##PreOp::execute_impl(Bytecode::Interpreter&) const { __builtin_unreachable(); } \
\
ByteString Jump##PreOp::to_byte_string_impl(Bytecode::Executable const& executable) const \
{ \
return ByteString::formatted("Jump" #PreOp " {}, {}, \033[32mtrue\033[0m:{} \033[32mfalse\033[0m:{}", \
format_operand("lhs"sv, m_lhs, executable), \
format_operand("rhs"sv, m_rhs, executable), \
*m_true_target, \
*m_false_target); \
}

JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_DEFINE_FUSABLE_BINARY_OP)

ThrowCompletionOr<void> JumpUndefined::execute_impl(Bytecode::Interpreter&) const
{
// Handled in the interpreter loop.
Expand Down
34 changes: 34 additions & 0 deletions Userland/Libraries/LibJS/Bytecode/Op.h
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,40 @@ class JumpIf final : public Jump {
Operand m_condition;
};

// NOTE: The raw operator is used for comparing two Int32 values.
#define JS_ENUMERATE_FUSABLE_BINARY_OPS(X) \
X(GreaterThan, >, greater_than) \
X(GreaterThanEquals, >=, greater_than_equals) \
X(LessThan, <, less_than) \
X(LessThanEquals, <=, less_than_equals) \
X(LooselyEquals, ==, loosely_equals) \
X(LooselyInequals, !=, loosely_inequals) \
X(StrictlyEquals, ==, strict_equals) \
X(StrictlyInequals, !=, strict_inequals)

#define JS_DECLARE_FUSED_JUMP(PreOp, ...) \
class Jump##PreOp final : public Jump { \
public: \
explicit Jump##PreOp(Operand lhs, Operand rhs, Label true_target, Label false_target) \
: Jump(Type::Jump##PreOp, move(true_target), move(false_target), sizeof(*this)) \
, m_lhs(lhs) \
, m_rhs(rhs) \
{ \
} \
ThrowCompletionOr<void> execute_impl(Bytecode::Interpreter&) const; \
ByteString to_byte_string_impl(Bytecode::Executable const&) const; \
\
Operand lhs() const { return m_lhs; } \
Operand rhs() const { return m_rhs; } \
\
private: \
Operand m_lhs; \
Operand m_rhs; \
};

JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_DECLARE_FUSED_JUMP)
#undef JS_DECLARE_FUSED_JUMP

class JumpNullish final : public Jump {
public:
explicit JumpNullish(Operand condition, Label true_target, Label false_target)
Expand Down
7 changes: 7 additions & 0 deletions Userland/Libraries/LibJS/Bytecode/Pass/GenerateCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ static void generate_cfg_for_block(BasicBlock const& current_block, PassPipeline
enter_label(true_target, current_block);
return;
}

#define JS_ENUMERATE_FUSABLE_BINARY_OP(PreOp, ...) \
case Jump##PreOp:

JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_ENUMERATE_FUSABLE_BINARY_OP)
#undef JS_ENUMERATE_FUSABLE_BINARY_OP

case JumpIf:
case JumpNullish:
case JumpUndefined: {
Expand Down
90 changes: 90 additions & 0 deletions Userland/Libraries/LibJS/Bytecode/Pass/Peephole.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright (c) 2024, Andreas Kling <[email protected]>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#include <LibJS/Bytecode/PassManager.h>

namespace JS::Bytecode::Passes {

void Peephole::perform(PassPipelineExecutable& executable)
{
started();

// Fuse compare-followed-by-jump into a single compare-and-jump
// This is a very common pattern in bytecode, and it's nice to have it as a single instruction
// For example, LessThan + JumpIf => JumpLessThan

HashMap<BasicBlock const*, BasicBlock const*> replacement_blocks;
Vector<NonnullOwnPtr<BasicBlock>> replaced_blocks;

for (size_t i = 0; i < executable.executable.basic_blocks.size(); ++i) {
auto& block = executable.executable.basic_blocks[i];
auto new_block = BasicBlock::create(block->name());
if (block->handler())
new_block->set_handler(*block->handler());
if (block->finalizer())
new_block->set_finalizer(*block->finalizer());
replacement_blocks.set(block.ptr(), new_block.ptr());

InstructionStreamIterator it { block->instruction_stream() };
while (!it.at_end()) {
auto const& instruction = *it;
++it;

if (!it.at_end()) {
auto const& next_instruction = *it;
if (next_instruction.type() == Instruction::Type::JumpIf) {
auto const& jump = static_cast<Op::JumpIf const&>(next_instruction);

#define DO_FUSE_JUMP(PreOp, ...) \
if (instruction.type() == Instruction::Type::PreOp) { \
auto const& compare = static_cast<Op::PreOp const&>(instruction); \
VERIFY(jump.condition() == compare.dst()); \
new_block->append<Op::Jump##PreOp>( \
compare.source_record().source_start_offset, \
compare.source_record().source_end_offset, \
compare.lhs(), \
compare.rhs(), \
*jump.true_target(), \
*jump.false_target()); \
++it; \
VERIFY(it.at_end()); \
continue; \
}
JS_ENUMERATE_FUSABLE_BINARY_OPS(DO_FUSE_JUMP)
}
}

auto slot_offset = new_block->size();
new_block->grow(instruction.length());
memcpy(new_block->data() + slot_offset, &instruction, instruction.length());
if (instruction.is_terminator())
new_block->terminate(slot_offset);
}
replaced_blocks.append(move(executable.executable.basic_blocks[i]));
executable.executable.basic_blocks[i] = move(new_block);
}

auto update_block_references = [&](BasicBlock const& original, BasicBlock const& replacement) {
for (auto& block : executable.executable.basic_blocks) {
InstructionStreamIterator it { block->instruction_stream() };
if (block->handler() == &original)
block->set_handler(replacement);
if (block->finalizer() == &original)
block->set_finalizer(replacement);
while (!it.at_end()) {
auto const& instruction = *it;
++it;
const_cast<Instruction&>(instruction).replace_references(original, replacement);
}
}
};
for (auto& entry : replacement_blocks)
update_block_references(*entry.key, *entry.value);

finished();
}

}
9 changes: 9 additions & 0 deletions Userland/Libraries/LibJS/Bytecode/PassManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ class UnifySameBlocks : public Pass {
virtual void perform(PassPipelineExecutable&) override;
};

class Peephole : public Pass {
public:
Peephole() = default;
~Peephole() override = default;

private:
virtual void perform(PassPipelineExecutable&) override;
};

class DumpCFG : public Pass {
public:
DumpCFG(FILE* file)
Expand Down
1 change: 1 addition & 0 deletions Userland/Libraries/LibJS/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set(SOURCES
Bytecode/Pass/DumpCFG.cpp
Bytecode/Pass/GenerateCFG.cpp
Bytecode/Pass/MergeBlocks.cpp
Bytecode/Pass/Peephole.cpp
Bytecode/Pass/PlaceBlocks.cpp
Bytecode/Pass/UnifySameBlocks.cpp
Bytecode/RegexTable.cpp
Expand Down

0 comments on commit 4438ec4

Please sign in to comment.