diff --git a/src/compiler/reflection.jl b/src/compiler/reflection.jl index eb4e01bb7e..513157b205 100644 --- a/src/compiler/reflection.jl +++ b/src/compiler/reflection.jl @@ -51,6 +51,16 @@ function code_sass(io::IO, job::CompilerJob; raw::Bool=false) return end + # NVIDIA bug #4604961: CUPTI in CUDA 12.4 Update 1 does not capture profiled events + # unless the activity API is first activated + if runtime_version() == v"12.4" + cfg = CUPTI.ActivityConfig([CUPTI.CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL, + CUPTI.CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API]) + CUPTI.enable!(cfg) do + # do nothing + end + end + cfg = CUPTI.CallbackConfig([CUPTI.CUPTI_CB_DOMAIN_RESOURCE]) do domain, id, data # only process relevant callbacks id == CUPTI.CUPTI_CBID_RESOURCE_MODULE_LOADED || return diff --git a/test/core/execution.jl b/test/core/execution.jl index 101f6d1194..6d78851e27 100644 --- a/test/core/execution.jl +++ b/test/core/execution.jl @@ -77,7 +77,7 @@ end CUDA.code_warntype(devnull, dummy, Tuple{}) CUDA.code_llvm(devnull, dummy, Tuple{}) CUDA.code_ptx(devnull, dummy, Tuple{}) - if can_use_cupti() && CUDA.runtime_version() != v"12.4" + if can_use_cupti() # functions defined in Julia sass = sprint(io->CUDA.code_sass(io, dummy, Tuple{})) @test occursin(".text._Z5dummy", sass) @@ -96,7 +96,7 @@ end @device_code_warntype io=devnull @cuda dummy() @device_code_llvm io=devnull @cuda dummy() @device_code_ptx io=devnull @cuda dummy() - if can_use_cupti() && CUDA.runtime_version() != v"12.4" + if can_use_cupti() # functions defined in Julia sass = sprint(io->@device_code_sass io=io @cuda dummy()) @test occursin(".text._Z5dummy", sass) @@ -120,7 +120,7 @@ end @test occursin("dummy", sprint(io->(@device_code_llvm io=io optimize=false @cuda dummy()))) @test occursin("dummy", sprint(io->(@device_code_llvm io=io @cuda dummy()))) @test occursin("dummy", sprint(io->(@device_code_ptx io=io @cuda dummy()))) - if can_use_cupti() && CUDA.runtime_version() != v"12.4" + if can_use_cupti() @test occursin("dummy", sprint(io->(@device_code_sass io=io @cuda dummy()))) end