Skip to content

Commit

Permalink
Report per-MethodInstance inference timings during a single inference…
Browse files Browse the repository at this point in the history
… call, via SnoopCompile (#37749)

This allows us to measure the _nested_ per-method-instance inference
timings, which allows us to produce both a profile view over the
inference data, as well as compute _exclusive_ timings
per-method-instance.
  • Loading branch information
NHDaly committed Oct 12, 2020
1 parent 705e8cb commit ebc63b9
Show file tree
Hide file tree
Showing 2 changed files with 252 additions and 0 deletions.
193 changes: 193 additions & 0 deletions base/compiler/typeinfer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,200 @@ function typeinf(interp::AbstractInterpreter, result::InferenceResult, cached::B
return typeinf(interp, frame)
end

"""
The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
can be used to measure the exclusive time spent inferring each method instance that is
recursively inferred during type inference.
This is meant to be internal to the compiler, and makes some specific assumptions about
being used for this purpose alone.
"""
module Timings

using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
@inbounds, copy

# What we record for any given frame we infer during type inference.
struct InferenceFrameInfo
mi::Core.MethodInstance
world::UInt64
sptypes::Vector{Any}
slottypes::Vector{Any}
end

function _typeinf_identifier(frame::Core.Compiler.InferenceState)
mi_info = InferenceFrameInfo(
frame.linfo,
frame.world,
copy(frame.sptypes),
frame.slottypes[2:end], # Skip repeating the method instance
)
return mi_info
end

"""
Core.Compiler.Timing(mi_info, start_time, ...)
Internal type containing the timing result for running type inference on a single
MethodInstance.
"""
struct Timing
mi_info::InferenceFrameInfo
start_time::UInt64
cur_start_time::UInt64
time::UInt64
children::Core.Array{Timing,1}
Timing(mi_info, start_time, cur_start_time, time, children) = new(mi_info, start_time, cur_start_time, time, children)
Timing(mi_info, start_time) = new(mi_info, start_time, start_time, UInt64(0), Timing[])
end

_time_ns() = ccall(:jl_hrtime, UInt64, ()) # Re-implemented here because Base not yet available.

# We keep a stack of the Timings for each of the MethodInstances currently being timed.
# Since type inference currently operates via a depth-first search (during abstract
# evaluation), this vector operates like a call stack. The last node in _timings is the
# node currently being inferred, and its parent is directly before it, etc.
# Each Timing also contains its own vector for all of its children, so that the tree
# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
# because we create a new node for duplicates.)
const _timings = Timing[]
# ROOT() is an empty function used as the top-level Timing node to measure all time spent
# *not* in type inference during a given recording trace. It is used as a "dummy" node.
function ROOT() end
const ROOTmi = Core.Compiler.specialize_method(
first(Core.Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
"""
Core.Compiler.reset_timings()
Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
"""
function reset_timings()
empty!(_timings)
push!(_timings, Timing(
# The MethodInstance for ROOT(), and default empty values for other fields.
InferenceFrameInfo(ROOTmi, 0x0, Any[], Any[]),
_time_ns()))
return nothing
end
reset_timings()

# (This is split into a function so that it can be called both in this module, at the top
# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
# the operation and called `reset_timings()`.)
@inline function close_current_timer()
stop_time = _time_ns()
parent_timer = _timings[end]
accum_time = Core.Compiler.:(-)(stop_time, parent_timer.cur_start_time)

# Add in accum_time ("modify" the immutable struct)
@inbounds begin
_timings[end] = Timings.Timing(
parent_timer.mi_info,
parent_timer.start_time,
parent_timer.cur_start_time,
parent_timer.time + accum_time,
parent_timer.children,
)
end
return nothing
end

@inline function enter_new_timer(frame)
# Very first thing, stop the active timer: get the current time and add in the
# time since it was last started to its aggregate exclusive time.
close_current_timer()

mi_info = _typeinf_identifier(frame)

# Start the new timer right before returning
push!(_timings, Timings.Timing(mi_info, UInt64(0)))
len = length(_timings)
new_timer = @inbounds _timings[len]
# Set the current time _after_ appending the node, to try to exclude the
# overhead from measurement.
start = Timings._time_ns()

@inbounds begin
_timings[len] = Timings.Timing(
new_timer.mi_info,
start,
start,
new_timer.time,
new_timer.children,
)
end

return nothing
end

# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
# assert that indeed we are always returning to a parent after finishing all of its
# children (that is, asserting that inference proceeds via depth-first-search).
@inline function exit_current_timer(_expected_frame_)
# Finish the new timer
stop_time = Timings._time_ns()

expected_mi_info = _typeinf_identifier(_expected_frame_)

# Grab the new timer again because it might have been modified in _timings
# (since it's an immutable struct)
# And remove it from the current timings stack
_timings = Timings._timings
new_timer = pop!(_timings)
Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi

accum_time = stop_time - new_timer.cur_start_time
# Add in accum_time ("modify" the immutable struct)
new_timer = Timings.Timing(
new_timer.mi_info,
new_timer.start_time,
new_timer.cur_start_time,
new_timer.time + accum_time,
new_timer.children
)
# Record the final timing with the original parent timer
parent_timer = _timings[end]
push!(parent_timer.children, new_timer)

# And finally restart the parent timer:
len = length(_timings)
@inbounds begin
_timings[len] = Timings.Timing(
parent_timer.mi_info,
parent_timer.start_time,
Timings._time_ns(),
parent_timer.time,
parent_timer.children,
)
end

return nothing
end

end # module Timings

"""
Core.Compiler.__set_measure_typeinf(onoff::Bool)
If set to `true`, record per-method-instance timings within type inference in the Compiler.
"""
__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
const __measure_typeinf__ = fill(false)

# Wrapper around _typeinf that optionally records the exclusive time for each invocation.
function typeinf(interp::AbstractInterpreter, frame::InferenceState)
if __measure_typeinf__[]
Timings.enter_new_timer(frame)
v = _typeinf(interp, frame)
Timings.exit_current_timer(frame)
return v
else
return _typeinf(interp, frame)
end
end

function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
# with no active ip's, frame is done
frames = frame.callers_in_cycle
Expand Down
59 changes: 59 additions & 0 deletions test/compiler/inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2849,5 +2849,64 @@ f_apply_cglobal(args...) = cglobal(args...)
f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
@test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]

# PR #37749
# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
# usually (SnoopCompileCore).
function time_inference(f)
Core.Compiler.Timings.reset_timings()
Core.Compiler.__set_measure_typeinf(true)
f()
Core.Compiler.__set_measure_typeinf(false)
Core.Compiler.Timings.close_current_timer()
return Core.Compiler.Timings._timings[1]
end
function depth(t::Core.Compiler.Timings.Timing)
maximum(depth.(t.children), init=0) + 1
end
function flatten_times(t::Core.Compiler.Timings.Timing)
collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
end
# Some very limited testing of timing the type inference (#37749).
@testset "Core.Compiler.Timings" begin
# Functions that call each other
@eval module M
i(x) = x+5
i2(x) = x+2
h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
g(y::Integer, x) = h(Any[y]) + Int(x)
end
timing1 = time_inference() do
@eval M.g(2, 3.0)
end
# The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
# their concrete types.
@test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
# Test that inference has cached some of the Method Instances
timing2 = time_inference() do
@eval M.g(2, 3.0)
end
@test length(flatten_times(timing2)) < length(flatten_times(timing1))

# Recursive function
@eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
timing = time_inference() do
@eval _Recursive.f(5)
end
@test depth(timing) == 3 # root -> f -> +
@test length(flatten_times(timing)) == 3 # root, f, +

# Functions inferred with multiple constants
@eval module C
i(x) = x === 0 ? 0 : 1 / x
a(x) = i(0) * i(x)
b() = i(0) * i(1) * i(0)
end
timing = time_inference() do
@eval C.a(2)
@eval C.b()
end
@test !isempty(flatten_times(timing))
end

# issue #37638
@test !(Core.Compiler.return_type(() -> (nothing, Any[]...)[2], Tuple{}) <: Vararg)

0 comments on commit ebc63b9

Please sign in to comment.