Skip to content

Commit

Permalink
Update New PassManager Pipeline (#559)
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed Apr 10, 2024
1 parent a807841 commit cb736ee
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 29 deletions.
61 changes: 32 additions & 29 deletions src/optim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ function buildNewPMPipeline!(mpm, @nospecialize(job::CompilerJob), opt_level=2)
add!(mpm, NewPMFunctionPassManager) do fpm
buildLoopOptimizerPipeline(fpm, job, opt_level)
buildScalarOptimizerPipeline(fpm, job, opt_level)
if false && opt_level >= 2
if uses_julia_runtime(job) && opt_level >= 2
# XXX: we disable vectorization, as this generally isn't useful for GPU targets
# and actually causes issues with some back-end compilers (like Metal).
# TODO: Make this not dependent on `uses_julia_runtime` (likely CPU), but it's own control
buildVectorPipeline(fpm, job, opt_level)
end
if isdebug(:optim)
Expand Down Expand Up @@ -112,6 +113,8 @@ end

function buildLoopOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_level)
add!(fpm, NewPMLoopPassManager) do lpm
# TODO LowerSIMDLoopPass
# LoopPass since JuliaLang/julia#51883
if opt_level >= 2
add!(lpm, LoopRotatePass())
end
Expand All @@ -121,7 +124,7 @@ function buildLoopOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_le
add!(fpm, NewPMLoopPassManager, #=UseMemorySSA=#true) do lpm
add!(lpm, LICMPass())
add!(lpm, JuliaLICMPass())
add!(lpm, SimpleLoopUnswitchPass())
add!(lpm, SimpleLoopUnswitchPass(SimpleLoopUnswitchPassOptions(nontrivial=true, trivial=true)))
add!(lpm, LICMPass())
add!(lpm, JuliaLICMPass())
end
Expand Down Expand Up @@ -186,23 +189,41 @@ function buildVectorPipeline(fpm, @nospecialize(job::CompilerJob), opt_level)
end

function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), opt_level)
# lower exception handling
if uses_julia_runtime(job)
add!(mpm, RemoveNIPass())

# lower GC intrinsics
add!(mpm, NewPMFunctionPassManager) do fpm
if !uses_julia_runtime(job)
add!(legacy2newpm(lower_gc_frame!), fpm)
end
end

# lower kernel state intrinsics
# NOTE: we can only do so here, as GC lowering can introduce calls to the runtime,
# and thus additional uses of the kernel state intrinsics.
if job.config.kernel
# TODO: now that all kernel state-related passes are being run here, merge some?
add!(legacy2newpm(add_kernel_state!), mpm)
add!(mpm, NewPMFunctionPassManager) do fpm
add!(fpm, LowerExcHandlersPass())
add!(legacy2newpm(lower_kernel_state!), fpm)
end
add!(legacy2newpm(cleanup_kernel_state!), mpm)
end

add!(mpm, NewPMFunctionPassManager) do fpm
add!(fpm, GCInvariantVerifierPass())
if !uses_julia_runtime(job)
# remove dead uses of ptls
add!(mpm, NewPMFunctionPassManager) do fpm
add!(fpm, ADCEPass())
end
add!(legacy2newpm(lower_ptls!), mpm)
end
add!(mpm, RemoveNIPass())

# lower GC intrinsics
add!(mpm, NewPMFunctionPassManager) do fpm
if !uses_julia_runtime(job)
add!(legacy2newpm(lower_gc_frame!), fpm)
# lower exception handling
if uses_julia_runtime(job)
add!(fpm, LowerExcHandlersPass())
end
add!(fpm, GCInvariantVerifierPass())
add!(fpm, LateLowerGCPass())
if uses_julia_runtime(job) && VERSION >= v"1.11.0-DEV.208"
add!(fpm, FinalLowerGCPass())
Expand All @@ -220,27 +241,9 @@ function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), op
end
end

# lower kernel state intrinsics
# NOTE: we can only do so here, as GC lowering can introduce calls to the runtime,
# and thus additional uses of the kernel state intrinsics.
if job.config.kernel
# TODO: now that all kernel state-related passes are being run here, merge some?
add!(legacy2newpm(add_kernel_state!), mpm)
add!(mpm, NewPMFunctionPassManager) do fpm
add!(legacy2newpm(lower_kernel_state!), fpm)
end
add!(legacy2newpm(cleanup_kernel_state!), mpm)
end

# lower PTLS intrinsics
if uses_julia_runtime(job)
add!(mpm, LowerPTLSPass())
else
# remove dead uses of ptls
add!(mpm, NewPMFunctionPassManager) do fpm
add!(fpm, ADCEPass())
end
add!(legacy2newpm(lower_ptls!), mpm)
end

if opt_level >= 1
Expand Down
4 changes: 4 additions & 0 deletions test/gcn_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ false && @testset "GC and TLS lowering" begin

asm = sprint(io->GCN.code_native(io, mod.kernel, Tuple{Int}))
@test occursin("gpu_gc_pool_alloc", asm)
@test !occursin("julia.push_gc_frame", asm)
@test !occursin("julia.pop_gc_frame", asm)
@test !occursin("julia.get_gc_frame_slot", asm)
@test !occursin("julia.new_gc_frame", asm)

# make sure that we can still ellide allocations
function ref_kernel(ptr, i)
Expand Down
5 changes: 5 additions & 0 deletions test/ptx_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,11 @@ end

asm = sprint(io->PTX.code_native(io, mod.kernel, Tuple{Int}))
@test occursin("gpu_gc_pool_alloc", asm)
@test !occursin("julia.push_gc_frame", asm)
@test !occursin("julia.pop_gc_frame", asm)
@test !occursin("julia.get_gc_frame_slot", asm)
@test !occursin("julia.new_gc_frame", asm)


# make sure that we can still ellide allocations
function ref_kernel(ptr, i)
Expand Down

0 comments on commit cb736ee

Please sign in to comment.