Report per-MethodInstance inference timings during a single inference…

… call, via SnoopCompile (#37749) This allows us to measure the _nested_ per-method-instance inference timings, which allows us to produce both a profile view over the inference data, as well as compute _exclusive_ timings per-method-instance.
JuliaLang · Oct 12, 2020 · ebc63b9 · ebc63b9
1 parent 705e8cb
commit ebc63b9
Show file tree

Hide file tree

Showing 2 changed files with 252 additions and 0 deletions.
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
@@ -8,7 +8,200 @@ function typeinf(interp::AbstractInterpreter, result::InferenceResult, cached::B
  return typeinf(interp, frame)
 end
 
+"""
+The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
+can be used to measure the exclusive time spent inferring each method instance that is
+recursively inferred during type inference.
+
+This is meant to be internal to the compiler, and makes some specific assumptions about
+being used for this purpose alone.
+"""
+module Timings
+
+using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
+ @inbounds, copy
+
+# What we record for any given frame we infer during type inference.
+struct InferenceFrameInfo
+ mi::Core.MethodInstance
+ world::UInt64
+ sptypes::Vector{Any}
+ slottypes::Vector{Any}
+end
+
+function _typeinf_identifier(frame::Core.Compiler.InferenceState)
+ mi_info = InferenceFrameInfo(
+ frame.linfo,
+ frame.world,
+ copy(frame.sptypes),
+ frame.slottypes[2:end], # Skip repeating the method instance
+ )
+ return mi_info
+end
+
+"""
+ Core.Compiler.Timing(mi_info, start_time, ...)
+
+Internal type containing the timing result for running type inference on a single
+MethodInstance.
+"""
+struct Timing
+ mi_info::InferenceFrameInfo
+ start_time::UInt64
+ cur_start_time::UInt64
+ time::UInt64
+ children::Core.Array{Timing,1}
+ Timing(mi_info, start_time, cur_start_time, time, children) = new(mi_info, start_time, cur_start_time, time, children)
+ Timing(mi_info, start_time) = new(mi_info, start_time, start_time, UInt64(0), Timing[])
+end
+
+_time_ns() = ccall(:jl_hrtime, UInt64, ()) # Re-implemented here because Base not yet available.
+
+# We keep a stack of the Timings for each of the MethodInstances currently being timed.
+# Since type inference currently operates via a depth-first search (during abstract
+# evaluation), this vector operates like a call stack. The last node in _timings is the
+# node currently being inferred, and its parent is directly before it, etc.
+# Each Timing also contains its own vector for all of its children, so that the tree
+# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
+# because we create a new node for duplicates.)
+const _timings = Timing[]
+# ROOT() is an empty function used as the top-level Timing node to measure all time spent
+# *not* in type inference during a given recording trace. It is used as a "dummy" node.
+function ROOT() end
+const ROOTmi = Core.Compiler.specialize_method(
+ first(Core.Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
+"""
+ Core.Compiler.reset_timings()
+
+Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
+start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
+"""
+function reset_timings()
+ empty!(_timings)
+ push!(_timings, Timing(
+ # The MethodInstance for ROOT(), and default empty values for other fields.
+ InferenceFrameInfo(ROOTmi, 0x0, Any[], Any[]),
+ _time_ns()))
+ return nothing
+end
+reset_timings()
+
+# (This is split into a function so that it can be called both in this module, at the top
+# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
+# the operation and called `reset_timings()`.)
+@inline function close_current_timer()
+ stop_time = _time_ns()
+ parent_timer = _timings[end]
+ accum_time = Core.Compiler.:(-)(stop_time, parent_timer.cur_start_time)
+
+ # Add in accum_time ("modify" the immutable struct)
+ @inbounds begin
+ _timings[end] = Timings.Timing(
+ parent_timer.mi_info,
+ parent_timer.start_time,
+ parent_timer.cur_start_time,
+ parent_timer.time + accum_time,
+ parent_timer.children,
+ )
+ end
+ return nothing
+end
+
+@inline function enter_new_timer(frame)
+ # Very first thing, stop the active timer: get the current time and add in the
+ # time since it was last started to its aggregate exclusive time.
+ close_current_timer()
+
+ mi_info = _typeinf_identifier(frame)
+
+ # Start the new timer right before returning
+ push!(_timings, Timings.Timing(mi_info, UInt64(0)))
+ len = length(_timings)
+ new_timer = @inbounds _timings[len]
+ # Set the current time _after_ appending the node, to try to exclude the
+ # overhead from measurement.
+ start = Timings._time_ns()
+
+ @inbounds begin
+ _timings[len] = Timings.Timing(
+ new_timer.mi_info,
+ start,
+ start,
+ new_timer.time,
+ new_timer.children,
+ )
+ end
+
+ return nothing
+end
+
+# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
+# assert that indeed we are always returning to a parent after finishing all of its
+# children (that is, asserting that inference proceeds via depth-first-search).
+@inline function exit_current_timer(_expected_frame_)
+ # Finish the new timer
+ stop_time = Timings._time_ns()
+
+ expected_mi_info = _typeinf_identifier(_expected_frame_)
+
+ # Grab the new timer again because it might have been modified in _timings
+ # (since it's an immutable struct)
+ # And remove it from the current timings stack
+ _timings = Timings._timings
+ new_timer = pop!(_timings)
+ Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
+
+ accum_time = stop_time - new_timer.cur_start_time
+ # Add in accum_time ("modify" the immutable struct)
+ new_timer = Timings.Timing(
+ new_timer.mi_info,
+ new_timer.start_time,
+ new_timer.cur_start_time,
+ new_timer.time + accum_time,
+ new_timer.children
+ )
+ # Record the final timing with the original parent timer
+ parent_timer = _timings[end]
+ push!(parent_timer.children, new_timer)
+
+ # And finally restart the parent timer:
+ len = length(_timings)
+ @inbounds begin
+ _timings[len] = Timings.Timing(
+ parent_timer.mi_info,
+ parent_timer.start_time,
+ Timings._time_ns(),
+ parent_timer.time,
+ parent_timer.children,
+ )
+ end
+
+ return nothing
+end
+
+end # module Timings
+
+"""
+ Core.Compiler.__set_measure_typeinf(onoff::Bool)
+
+If set to `true`, record per-method-instance timings within type inference in the Compiler.
+"""
+__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
+const __measure_typeinf__ = fill(false)
+
+# Wrapper around _typeinf that optionally records the exclusive time for each invocation.
 function typeinf(interp::AbstractInterpreter, frame::InferenceState)
+ if __measure_typeinf__[]
+ Timings.enter_new_timer(frame)
+ v = _typeinf(interp, frame)
+ Timings.exit_current_timer(frame)
+ return v
+ else
+ return _typeinf(interp, frame)
+ end
+end
+
+function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
  typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
  # with no active ip's, frame is done
  frames = frame.callers_in_cycle

diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
@@ -2849,5 +2849,64 @@ f_apply_cglobal(args...) = cglobal(args...)
 f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
 @test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]
 
+# PR #37749
+# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
+# usually (SnoopCompileCore).
+function time_inference(f)
+ Core.Compiler.Timings.reset_timings()
+ Core.Compiler.__set_measure_typeinf(true)
+ f()
+ Core.Compiler.__set_measure_typeinf(false)
+ Core.Compiler.Timings.close_current_timer()
+ return Core.Compiler.Timings._timings[1]
+end
+function depth(t::Core.Compiler.Timings.Timing)
+ maximum(depth.(t.children), init=0) + 1
+end
+function flatten_times(t::Core.Compiler.Timings.Timing)
+ collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
+end
+# Some very limited testing of timing the type inference (#37749).
+@testset "Core.Compiler.Timings" begin
+ # Functions that call each other
+ @eval module M
+ i(x) = x+5
+ i2(x) = x+2
+ h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
+ g(y::Integer, x) = h(Any[y]) + Int(x)
+ end
+ timing1 = time_inference() do
+ @eval M.g(2, 3.0)
+ end
+ # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
+ # their concrete types.
+ @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
+ # Test that inference has cached some of the Method Instances
+ timing2 = time_inference() do
+ @eval M.g(2, 3.0)
+ end
+ @test length(flatten_times(timing2)) < length(flatten_times(timing1))
+
+ # Recursive function
+ @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
+ timing = time_inference() do
+ @eval _Recursive.f(5)
+ end
+ @test depth(timing) == 3 # root -> f -> +
+ @test length(flatten_times(timing)) == 3 # root, f, +
+
+ # Functions inferred with multiple constants
+ @eval module C
+ i(x) = x === 0 ? 0 : 1 / x
+ a(x) = i(0) * i(x)
+ b() = i(0) * i(1) * i(0)
+ end
+ timing = time_inference() do
+ @eval C.a(2)
+ @eval C.b()
+ end
+ @test !isempty(flatten_times(timing))
+end
+
 # issue #37638
 @test !(Core.Compiler.return_type(() -> (nothing, Any[]...)[2], Tuple{}) <: Vararg)