Skip to content

Commit

Permalink
Show raw machine code along with disassembled code (JuliaLang#40220)
Browse files Browse the repository at this point in the history
For code_native with binary=true
  • Loading branch information
bicycle1885 authored and Amit Shirodkar committed Jun 9, 2021
1 parent ab9be2a commit e232df0
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 24 deletions.
50 changes: 46 additions & 4 deletions src/disasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,8 @@ static void jl_dump_asm_internal(
DIContext *di_ctx,
raw_ostream &rstream,
const char* asm_variant,
const char* debuginfo);
const char* debuginfo,
bool binary);

// This isn't particularly fast, but neither is printing assembly, and they're only used for interactive mode
static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
Expand Down Expand Up @@ -506,7 +507,7 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)

// print a native disassembly for the function starting at fptr
extern "C" JL_DLLEXPORT
jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo)
jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo, char binary)
{
assert(fptr != 0);
jl_ptls_t ptls = jl_get_ptls_states();
Expand Down Expand Up @@ -543,7 +544,8 @@ jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant,
Section, context,
stream,
asm_variant,
debuginfo);
debuginfo,
binary);
jl_gc_safe_leave(ptls, gc_state);

return jl_pchar_to_string(stream.str().data(), stream.str().size());
Expand Down Expand Up @@ -739,14 +741,42 @@ static int OpInfoLookup(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t Si
}
} // namespace

// Stringify raw bytes as a comment string.
std::string rawCodeComment(const llvm::ArrayRef<uint8_t>& Memory, const llvm::Triple& Triple)
{
std::string Buffer{"; "};
llvm::raw_string_ostream Stream{Buffer};
auto Address = reinterpret_cast<uintptr_t>(Memory.data());
// write abbreviated address
llvm::write_hex(Stream, Address & 0xffff, HexPrintStyle::Lower, 4);
Stream << ":";
auto Arch = Triple.getArch();
bool FixedLength = !(Arch == Triple::x86 || Arch == Triple::x86_64);
if (FixedLength)
Stream << " ";
if (FixedLength && Triple.isLittleEndian()) {
for (auto Iter = Memory.rbegin(); Iter != Memory.rend(); ++Iter)
llvm::write_hex(Stream, *Iter, HexPrintStyle::Lower, 2);
}
else {
// variable-length or (fixed-length) big-endian format
for (auto Byte : Memory) {
if (!FixedLength)
Stream << " ";
llvm::write_hex(Stream, Byte, HexPrintStyle::Lower, 2);
}
}
return Stream.str();
}

static void jl_dump_asm_internal(
uintptr_t Fptr, size_t Fsize, int64_t slide,
object::SectionRef Section,
DIContext *di_ctx,
raw_ostream &rstream,
const char* asm_variant,
const char* debuginfo)
const char* debuginfo,
bool binary)
{
// GC safe
// Get the host information
Expand Down Expand Up @@ -842,6 +872,16 @@ static void jl_dump_asm_internal(
}
}

if (binary) {
// Print the complete address and the size at the top (instruction addresses are abbreviated)
std::string Buffer{"; code origin: "};
llvm::raw_string_ostream Stream{Buffer};
auto Address = reinterpret_cast<uintptr_t>(memoryObject.data());
llvm::write_hex(Stream, Address, HexPrintStyle::Lower, 16);
Stream << ", code size: " << memoryObject.size();
Streamer->emitRawText(Stream.str());
}

// Take two passes: In the first pass we record all branch labels,
// in the second we actually perform the output
for (int pass = 0; pass < 2; ++ pass) {
Expand Down Expand Up @@ -984,6 +1024,8 @@ static void jl_dump_asm_internal(
}
}
}
if (binary)
Streamer->emitRawText(rawCodeComment(memoryObject.slice(Index, insSize), TheTriple));
Streamer->emitInstruction(Inst, *STI);
}
break;
Expand Down
6 changes: 3 additions & 3 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,14 +409,14 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
// get a native disassembly for a compiled method
extern "C" JL_DLLEXPORT
jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo)
int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
{
// printing via disassembly
jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
if (codeinst) {
uintptr_t fptr = (uintptr_t)codeinst->invoke;
if (getwrapper)
return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo);
return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
uintptr_t specfptr = (uintptr_t)codeinst->specptr.fptr;
if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) {
// normally we prevent native code from being generated for these functions,
Expand Down Expand Up @@ -455,7 +455,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
JL_UNLOCK(&codegen_lock);
}
if (specfptr != 0)
return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo);
return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary);
}

// whatever, that didn't work - use the assembler output instead
Expand Down
4 changes: 2 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -743,9 +743,9 @@ static inline void jl_set_gc_and_wait(void)
void jl_gc_set_permalloc_region(void *start, void *end);

JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo);
int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
JL_DLLEXPORT void *jl_get_llvmf_defn(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo);
JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo, char binary);
JL_DLLEXPORT jl_value_t *jl_dump_llvm_asm(void *F, const char* asm_variant, const char *debuginfo);
JL_DLLEXPORT jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo);

Expand Down
23 changes: 12 additions & 11 deletions stdlib/InteractiveUtils/src/codeview.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ import Base.CodegenParams
# Printing code representations in IR and assembly
function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
strip_ir_metadata::Bool, dump_module::Bool, syntax::Symbol,
optimize::Bool, debuginfo::Symbol,
optimize::Bool, debuginfo::Symbol, binary::Bool,
params::CodegenParams=CodegenParams())
ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
if isa(f, Core.Builtin)
Expand All @@ -153,7 +153,7 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
linfo = Core.Compiler.specialize_method(match)
# get the code for it
if native
str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo)
str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary)
else
str = _dump_function_linfo_llvm(linfo, world, wrapper, strip_ir_metadata, dump_module, optimize, debuginfo, params)
end
Expand All @@ -162,7 +162,7 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
return str
end

function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol)
function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool)
if syntax !== :att && syntax !== :intel
throw(ArgumentError("'syntax' must be either :intel or :att"))
end
Expand All @@ -172,8 +172,8 @@ function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wr
throw(ArgumentError("'debuginfo' must be either :source or :none"))
end
str = ccall(:jl_dump_method_asm, Ref{String},
(Any, UInt, Cint, Bool, Ptr{UInt8}, Ptr{UInt8}),
linfo, world, 0, wrapper, syntax, debuginfo)
(Any, UInt, Cint, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
linfo, world, 0, wrapper, syntax, debuginfo, binary)
return str
end

Expand Down Expand Up @@ -208,7 +208,7 @@ Keyword argument `debuginfo` may be one of source (default) or none, to specify
"""
function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default)
d = _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo)
d = _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo, false)
if highlighting[:llvm] && get(io, :color, false)
print_llvm(io, d)
else
Expand All @@ -222,24 +222,25 @@ code_llvm(@nospecialize(f), @nospecialize(types=Tuple); raw=false, dump_module=f


"""
code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default)
code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default, binary=false)
Prints the native assembly instructions generated for running the method matching the given
generic function and type signature to `io`.
Switch assembly syntax using `syntax` symbol parameter set to `:att` for AT&T syntax or `:intel` for Intel syntax.
Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
If `binary` is `true`, it also prints the binary machine code for each instruction precedented by an abbreviated address.
"""
function code_native(io::IO, @nospecialize(f), @nospecialize(types=Tuple);
syntax::Symbol=:att, debuginfo::Symbol=:default)
d = _dump_function(f, types, true, false, false, false, syntax, true, debuginfo)
syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false)
d = _dump_function(f, types, true, false, false, false, syntax, true, debuginfo, binary)
if highlighting[:native] && get(io, :color, false)
print_native(io, d)
else
print(io, d)
end
end
code_native(@nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default) =
code_native(stdout, f, types; syntax=syntax, debuginfo=debuginfo)
code_native(@nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false) =
code_native(stdout, f, types; syntax=syntax, debuginfo=debuginfo, binary=binary)
code_native(::IO, ::Any, ::Symbol) = error("illegal code_native call") # resolve ambiguous call

## colorized IR and assembly printing
Expand Down
21 changes: 17 additions & 4 deletions stdlib/InteractiveUtils/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -442,23 +442,36 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))

rgx = r"%"
buf = IOBuffer()
output = ""
#test that the string output is at&t syntax by checking for occurrences of '%'s
code_native(buf, linear_foo, (), syntax = :att, debuginfo = :none)
output = String(take!(buf))

@test occursin(rgx, output)

#test that the code output is intel syntax by checking it has no occurrences of '%'
code_native(buf, linear_foo, (), syntax = :intel, debuginfo = :none)
output = String(take!(buf))

@test !occursin(rgx, output)

code_native(buf, linear_foo, ())
output = String(take!(buf))

@test occursin(rgx, output)

@testset "binary" begin
# check the RET instruction (opcode: C3)
ret = r"^; [0-9a-f]{4}: c3$"m

# without binary flag (default)
code_native(buf, linear_foo, ())
output = String(take!(buf))
@test !occursin(ret, output)

# with binary flag
for binary in false:true
code_native(buf, linear_foo, (), binary = binary)
output = String(take!(buf))
@test occursin(ret, output) == binary
end
end
end

@testset "error message" begin
Expand Down
1 change: 1 addition & 0 deletions test/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,7 @@ _test_at_locals2(1,1,0.5f0)
_dump_function(f31687_parent, Tuple{},
#=native=#false, #=wrapper=#false, #=strip=#false,
#=dump_module=#true, #=syntax=#:att, #=optimize=#false, :none,
#=binary=#false,
params)
end

Expand Down

0 comments on commit e232df0

Please sign in to comment.