Skip to content

Commit

Permalink
Work around a CUPTI bug in CUDA 12.4 Update 1. (#2330)
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Apr 16, 2024
1 parent a011e73 commit 9c24777
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
10 changes: 10 additions & 0 deletions src/compiler/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@ function code_sass(io::IO, job::CompilerJob; raw::Bool=false)
return
end

# NVIDIA bug #4604961: CUPTI in CUDA 12.4 Update 1 does not capture profiled events
# unless the activity API is first activated
if runtime_version() == v"12.4"
cfg = CUPTI.ActivityConfig([CUPTI.CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
CUPTI.CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API])
CUPTI.enable!(cfg) do
# do nothing
end
end

cfg = CUPTI.CallbackConfig([CUPTI.CUPTI_CB_DOMAIN_RESOURCE]) do domain, id, data
# only process relevant callbacks
id == CUPTI.CUPTI_CBID_RESOURCE_MODULE_LOADED || return
Expand Down
6 changes: 3 additions & 3 deletions test/core/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ end
CUDA.code_warntype(devnull, dummy, Tuple{})
CUDA.code_llvm(devnull, dummy, Tuple{})
CUDA.code_ptx(devnull, dummy, Tuple{})
if can_use_cupti() && CUDA.runtime_version() != v"12.4"
if can_use_cupti()
# functions defined in Julia
sass = sprint(io->CUDA.code_sass(io, dummy, Tuple{}))
@test occursin(".text._Z5dummy", sass)
Expand All @@ -96,7 +96,7 @@ end
@device_code_warntype io=devnull @cuda dummy()
@device_code_llvm io=devnull @cuda dummy()
@device_code_ptx io=devnull @cuda dummy()
if can_use_cupti() && CUDA.runtime_version() != v"12.4"
if can_use_cupti()
# functions defined in Julia
sass = sprint(io->@device_code_sass io=io @cuda dummy())
@test occursin(".text._Z5dummy", sass)
Expand All @@ -120,7 +120,7 @@ end
@test occursin("dummy", sprint(io->(@device_code_llvm io=io optimize=false @cuda dummy())))
@test occursin("dummy", sprint(io->(@device_code_llvm io=io @cuda dummy())))
@test occursin("dummy", sprint(io->(@device_code_ptx io=io @cuda dummy())))
if can_use_cupti() && CUDA.runtime_version() != v"12.4"
if can_use_cupti()
@test occursin("dummy", sprint(io->(@device_code_sass io=io @cuda dummy())))
end

Expand Down

0 comments on commit 9c24777

Please sign in to comment.