Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some Profile compatibility routines #42482

Merged
merged 7 commits into from
Oct 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 51 additions & 23 deletions stdlib/Profile/src/Profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ The keyword arguments can be any combination of:
line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
incurred by each function by itself.

- `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:threads`, `:tasks`,
`[:threads, :tasks]`, or `[:tasks, :threads]` where the last two provide nested grouping.
- `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`,
`[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping.

- `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
Expand Down Expand Up @@ -296,7 +296,13 @@ function is_block_end(data, i)
# and we could have (though very unlikely):
# 1:<stack><metadata><null><null><NULL><metadata><null><null>:end
# and we want to ignore the triple NULL (which is an ip).
return data[i] == 0 && data[i - 1] == 0 && data[i - 2] != 0
data[i] == 0 || return false # first block end null
data[i - 1] == 0 || return false # second block end null
data[i - 2] in 1:2 || return false # sleep state
data[i - 3] != 0 || return false # cpu_cycle_clock
data[i - 4] != 0 || return false # taskid
data[i - 5] != 0 || return false # threadid
return true
end

"""
Expand Down Expand Up @@ -519,29 +525,51 @@ function fetch(;include_meta = false)
GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
if include_meta || isempty(data)
return data
else
nblocks = 0
for i = 2:length(data)
if is_block_end(data, i) # detect block ends and count them
nblocks += 1
end
end
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
j = length(data_stripped)
i = length(data)
while i > 0 && j > 0
data_stripped[j] = data[i]
if is_block_end(data, i)
i -= (nmeta + 1) # metadata fields and the extra NULL IP
end
i -= 1
j -= 1
end
return strip_meta(data)
end

function strip_meta(data)
nblocks = count(Base.Fix1(is_block_end, data), eachindex(data))
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
j = length(data_stripped)
i = length(data)
while i > 0 && j > 0
data_stripped[j] = data[i]
if is_block_end(data, i)
i -= (nmeta + 1) # metadata fields and the extra NULL IP
end
@assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])"
return data_stripped
i -= 1
j -= 1
end
@assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])"
return data_stripped
end

"""
Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta

The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used
for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal
details of the metadata format.
"""
function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
threadid == 0 && error("Fake threadid cannot be 0")
taskid == 0 && error("Fake taskid cannot be 0")
any(Base.Fix1(is_block_end, data), eachindex(data)) && error("input already has metadata")
cpu_clock_cycle = UInt64(99)
data_with_meta = similar(data, 0)
for i = 1:length(data)
val = data[i]
if iszero(val)
# (threadid, taskid, cpu_cycle_clock, thread_sleeping)
push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
else
push!(data_with_meta, val)
end
end
return data_with_meta
end

## Print as a flat list
# Counts the number of times each line appears, at any nesting level and at the topmost level
Expand Down Expand Up @@ -807,7 +835,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
skip = false
nsleeping = 0
for i in startframe:-1:1
(startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
(startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
ip = all[i]
if is_block_end(all, i)
# read metadata
Expand Down
46 changes: 45 additions & 1 deletion stdlib/Profile/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

using Test, Profile, Serialization, Logging
using Base.StackTraces: StackFrame

Profile.clear()
Profile.init()
Expand Down Expand Up @@ -78,7 +79,17 @@ end
data_with = Profile.fetch(include_meta = true)
@test data_without[1] == data_with[1]
@test data_without[end] == data_with[end]
@test length(data_without) < length(data_with)
nblocks = count(Base.Fix1(Profile.is_block_end, data_with), eachindex(data_with))
@test length(data_without) == length(data_with) - nblocks * (Profile.nmeta + 1)

data_with_fake = Profile.add_fake_meta(data_without)
@test_throws "input already has metadata" Profile.add_fake_meta(data_with)
data_stripped = Profile.strip_meta(data_with_fake)
@test data_stripped == data_without
# ideally the test below would be a test for equality, but real sample ips can be nulls, and thus
# adding metadata back in can convert those ips to new block ends, and the length is then longer
@test length(data_with_fake) >= length(data_with)

end

Profile.clear()
Expand Down Expand Up @@ -175,3 +186,36 @@ let cmd = Base.julia_cmd()
@test success(p)
@test parse(Int, s) > 100
end

@testset "FlameGraphs" begin
# FlameGraphs makes use of some Profile's internals. Detect possible breakage by mimicking some of its tests.
# Breakage is acceptable since these internals are not part of the stable API, but it's better to know, and ideally
# should be paired with an issue or PR in FlameGraphs.
#
# This also improves the thoroughness of our overall Profile tests.
stackframe(func, file, line; C=false) = StackFrame(Symbol(func), Symbol(file), line, nothing, C, false, 0)

backtraces = UInt64[ 4, 3, 2, 1, # order: callees then caller
0, 6, 5, 1,
0, 8, 7,
0, 4, 3, 2, 1,
0]
backtraces = Profile.add_fake_meta(backtraces)
lidict = Dict{UInt64,StackFrame}(1=>stackframe(:f1, :file1, 1),
2=>stackframe(:f2, :file1, 5),
3=>stackframe(:f3, :file2, 1),
4=>stackframe(:f2, :file1, 15),
5=>stackframe(:f4, :file1, 20),
6=>stackframe(:f5, :file3, 1),
7=>stackframe(:f1, :file1, 2),
8=>stackframe(:f6, :file3, 10))
root = Profile.StackFrameTree{StackFrame}()
Profile.tree!(root, backtraces, lidict, #= C =# true, :off)
@test length(root.down) == 2
for k in keys(root.down)
@test k.file == :file1
@test k.line ∈ (1, 2)
end
node = root.down[stackframe(:f1, :file1, 2)]
@test only(node.down).first == lidict[8]
end