From 9c24777ad8809bb04383fde6812eab08015bd39a Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 16 Apr 2024 14:25:01 +0200 Subject: [PATCH] Work around a CUPTI bug in CUDA 12.4 Update 1. (#2330) --- src/compiler/reflection.jl | 10 ++++++++++ test/core/execution.jl | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/compiler/reflection.jl b/src/compiler/reflection.jl index eb4e01bb7e..513157b205 100644 --- a/src/compiler/reflection.jl +++ b/src/compiler/reflection.jl @@ -51,6 +51,16 @@ function code_sass(io::IO, job::CompilerJob; raw::Bool=false) return end + # NVIDIA bug #4604961: CUPTI in CUDA 12.4 Update 1 does not capture profiled events + # unless the activity API is first activated + if runtime_version() == v"12.4" + cfg = CUPTI.ActivityConfig([CUPTI.CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL, + CUPTI.CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API]) + CUPTI.enable!(cfg) do + # do nothing + end + end + cfg = CUPTI.CallbackConfig([CUPTI.CUPTI_CB_DOMAIN_RESOURCE]) do domain, id, data # only process relevant callbacks id == CUPTI.CUPTI_CBID_RESOURCE_MODULE_LOADED || return diff --git a/test/core/execution.jl b/test/core/execution.jl index 101f6d1194..6d78851e27 100644 --- a/test/core/execution.jl +++ b/test/core/execution.jl @@ -77,7 +77,7 @@ end CUDA.code_warntype(devnull, dummy, Tuple{}) CUDA.code_llvm(devnull, dummy, Tuple{}) CUDA.code_ptx(devnull, dummy, Tuple{}) - if can_use_cupti() && CUDA.runtime_version() != v"12.4" + if can_use_cupti() # functions defined in Julia sass = sprint(io->CUDA.code_sass(io, dummy, Tuple{})) @test occursin(".text._Z5dummy", sass) @@ -96,7 +96,7 @@ end @device_code_warntype io=devnull @cuda dummy() @device_code_llvm io=devnull @cuda dummy() @device_code_ptx io=devnull @cuda dummy() - if can_use_cupti() && CUDA.runtime_version() != v"12.4" + if can_use_cupti() # functions defined in Julia sass = sprint(io->@device_code_sass io=io @cuda dummy()) @test occursin(".text._Z5dummy", sass) @@ -120,7 +120,7 @@ end @test occursin("dummy", sprint(io->(@device_code_llvm io=io optimize=false @cuda dummy()))) @test occursin("dummy", sprint(io->(@device_code_llvm io=io @cuda dummy()))) @test occursin("dummy", sprint(io->(@device_code_ptx io=io @cuda dummy()))) - if can_use_cupti() && CUDA.runtime_version() != v"12.4" + if can_use_cupti() @test occursin("dummy", sprint(io->(@device_code_sass io=io @cuda dummy()))) end