diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index a967553b30e90..14302c1c19924 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -28,9 +28,13 @@ end bb_unreachable(domtree::DomTree, bb::Int) = bb != 1 && domtree.nodes[bb].level == 1 function update_level!(domtree::Vector{DomTreeNode}, node::Int, level::Int) - domtree[node] = DomTreeNode(level, domtree[node].children) - foreach(domtree[node].children) do child - update_level!(domtree, child, level+1) + worklist = Tuple{Int, Int}[(node, level)] + while !isempty(worklist) + (node, level) = pop!(worklist) + domtree[node] = DomTreeNode(level, domtree[node].children) + foreach(domtree[node].children) do child + push!(worklist, (child, level+1)) + end end end @@ -213,6 +217,29 @@ begin nothing end + function snca_compress_worklist!( + state::Vector{Node}, ancestors::Vector{DFSNumber}, + v::DFSNumber, last_linked::DFSNumber) + # TODO: There is a smarter way to do this + u = ancestors[v] + worklist = Tuple{Int, Int}[(u,v)] + @assert u < v + while !isempty(worklist) + u, v = last(worklist) + if u >= last_linked + if ancestors[u] >= last_linked + push!(worklist, (ancestors[u], u)) + continue + end + if state[u].label < state[v].label + state[v] = Node(state[v].semi, state[u].label) + end + ancestors[v] = ancestors[u] + end + pop!(worklist) + end + end + """ The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. Note that the pseudocode in [LG05] is not entirely accurate. @@ -255,7 +282,13 @@ begin # `ancestor[v] != 0` check in the `eval` implementation in # figure 2.6 if vdfs >= last_linked - snca_compress!(state, ancestors, vdfs, last_linked) + # For performance, if the number of ancestors is small + # avoid the extra allocation of the worklist. + if length(ancestors) <= 32 + snca_compress!(state, ancestors, vdfs, last_linked) + else + snca_compress_worklist!(state, ancestors, vdfs, last_linked) + end end semi_w = min(semi_w, state[vdfs].label) end diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 0ab20ef0c734b..bf655a8feac09 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -40,6 +40,32 @@ let m = Meta.@lower 1 + 1 @test isa(ir.stmts[3], Core.PhiNode) && length(ir.stmts[3].edges) == 1 end +# test that we don't stack-overflow in SNCA with large functions. +let m = Meta.@lower 1 + 1 + @assert Meta.isexpr(m, :thunk) + src = m.args[1]::Core.CodeInfo + code = Any[] + N = 2^15 + for i in 1:2:N + push!(code, Expr(:call, :opaque)) + push!(code, Expr(:gotoifnot, Core.SSAValue(i), N+2)) # skip one block + end + # all goto here + push!(code, Expr(:call, :opaque)) + push!(code, Expr(:return)) + src.code = code + + nstmts = length(src.code) + src.ssavaluetypes = nstmts + src.codelocs = fill(Int32(1), nstmts) + src.ssaflags = fill(Int32(0), nstmts) + ir = Core.Compiler.inflate_ir(src) + Core.Compiler.verify_ir(ir) + domtree = Core.Compiler.construct_domtree(ir.cfg) + ir = Core.Compiler.domsort_ssa!(ir, domtree) + Core.Compiler.verify_ir(ir) +end + # Tests for SROA mutable struct Foo30594; x::Float64; end