add utilities for performing CodeInfo validation passes (enabled for …

…debug builds only) (JuliaLang#22938)
lazarusA · Aug 11, 2017 · 1fcc47c · 1fcc47c
1 parent bbf5584
commit 1fcc47c
Show file tree

Hide file tree

Showing 5 changed files with 332 additions and 19 deletions.
diff --git a/base/codevalidation.jl b/base/codevalidation.jl
@@ -0,0 +1,155 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Expr head => argument count bounds
+const VALID_EXPR_HEADS = ObjectIdDict(
+ :call => 1:typemax(Int),
+ :invoke => 2:typemax(Int),
+ :static_parameter => 1:1,
+ :line => 1:3,
+ :gotoifnot => 2:2,
+ :(&) => 1:1,
+ :(=) => 2:2,
+ :method => 1:4,
+ :const => 1:1,
+ :null => 0:0, # TODO from @vtjnash: remove this + any :null handling code in Base
+ :new => 1:typemax(Int),
+ :return => 1:1,
+ :the_exception => 0:0,
+ :enter => 1:1,
+ :leave => 1:1,
+ :inbounds => 1:1,
+ :boundscheck => 1:1,
+ :copyast => 1:1,
+ :meta => 0:typemax(Int),
+ :global => 1:1,
+ :foreigncall => 3:typemax(Int),
+ :isdefined => 1:1,
+ :simdloop => 0:0
+)
+
+const ASSIGNED_FLAG = 0x02
+
+# @enum isn't defined yet, otherwise I'd use it for this
+const INVALID_EXPR_HEAD = "invalid expression head"
+const INVALID_EXPR_NARGS = "invalid number of expression args"
+const INVALID_LVALUE = "invalid LHS value"
+const INVALID_RVALUE = "invalid RHS value"
+const INVALID_CALL_ARG = "invalid :call argument"
+const EMPTY_SLOTNAMES = "slotnames field is empty"
+const SLOTFLAGS_MISMATCH = "length(slotnames) != length(slotflags)"
+const SLOTTYPES_MISMATCH = "length(slotnames) != length(slottypes)"
+const SLOTTYPES_MISMATCH_UNINFERRED = "uninferred CodeInfo slottypes field is not `nothing`"
+const SSAVALUETYPES_MISMATCH = "not all SSAValues in AST have a type in ssavaluetypes"
+const SSAVALUETYPES_MISMATCH_UNINFERRED = "uninferred CodeInfo ssavaluetypes field does not equal the number of present SSAValues"
+const INVALID_ASSIGNMENT_SLOTFLAG = "slot has wrong assignment slotflag setting (bit flag 2 not set)"
+const NON_TOP_LEVEL_METHOD = "encountered `Expr` head `:method` in non-top-level code (i.e. `nargs` > 0)"
+const SIGNATURE_NARGS_MISMATCH = "method signature does not match number of method arguments"
+const SLOTNAMES_NARGS_MISMATCH = "CodeInfo for method contains fewer slotnames than the number of method arguments"
+
+struct InvalidCodeError <: Exception
+ kind::String
+ meta::Any
+end
+
+InvalidCodeError(kind) = InvalidCodeError(kind, nothing)
+
+"""
+ validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo)
+
+Validate `c`, logging any violation by pushing an `InvalidCodeError` into `errors`.
+"""
+function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_level::Bool = false)
+ ssavals = IntSet()
+ lhs_slotnums = IntSet()
+ walkast(c.code) do x
+ if isa(x, Expr)
+ !is_top_level && x.head == :method && push!(errors, InvalidCodeError(NON_TOP_LEVEL_METHOD))
+ narg_bounds = get(VALID_EXPR_HEADS, x.head, -1:-1)
+ nargs = length(x.args)
+ if narg_bounds == -1:-1
+ push!(errors, InvalidCodeError(INVALID_EXPR_HEAD, (x.head, x)))
+ elseif !in(nargs, narg_bounds)
+ push!(errors, InvalidCodeError(INVALID_EXPR_NARGS, (x.head, nargs, x)))
+ elseif x.head == :(=)
+ lhs, rhs = x.args
+ if !is_valid_lvalue(lhs)
+ push!(errors, InvalidCodeError(INVALID_LVALUE, lhs))
+ elseif isa(lhs, SlotNumber) && !in(lhs.id, lhs_slotnums)
+ n = lhs.id
+ if isassigned(c.slotflags, n) && !is_flag_set(c.slotflags[n], ASSIGNED_FLAG)
+ push!(errors, InvalidCodeError(INVALID_ASSIGNMENT_SLOTFLAG, lhs))
+ end
+ push!(lhs_slotnums, n)
+ end
+ if !is_valid_rvalue(rhs)
+ push!(errors, InvalidCodeError(INVALID_RVALUE, rhs))
+ end
+ elseif x.head == :call || x.head == :invoke
+ for arg in x.args
+ if !is_valid_rvalue(arg)
+ push!(errors, InvalidCodeError(INVALID_CALL_ARG, arg))
+ end
+ end
+ end
+ elseif isa(x, SSAValue)
+ id = x.id + 1 # ensures that id > 0 for use with IntSet
+ !in(id, ssavals) && push!(ssavals, id)
+ end
+ end
+ nslotnames = length(c.slotnames)
+ nslotflags = length(c.slotflags)
+ nssavals = length(ssavals)
+ nslotnames == 0 && push!(errors, InvalidCodeError(EMPTY_SLOTNAMES))
+ nslotnames != nslotflags && push!(errors, InvalidCodeError(SLOTFLAGS_MISMATCH, (nslotnames, nslotflags)))
+ if c.inferred
+ nslottypes = length(c.slottypes)
+ nssavaluetypes = length(c.ssavaluetypes)
+ nslottypes != nslotnames && push!(errors, InvalidCodeError(SLOTTYPES_MISMATCH, (nslotnames, nslottypes)))
+ nssavaluetypes < nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH, (nssavals, nssavaluetypes)))
+ else
+ c.slottypes !== nothing && push!(errors, InvalidCodeError(SLOTTYPES_MISMATCH_UNINFERRED, c.slottypes))
+ c.ssavaluetypes != nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, c.ssavaluetypes)))
+ end
+ return errors
+end
+
+"""
+ validate_code!(errors::Vector{>:InvalidCodeError}, mi::MethodInstance,
+ c::Union{Void,CodeInfo} = Core.Inference.retrieve_code_info(mi))
+
+Validate `mi`, logging any violation by pushing an `InvalidCodeError` into `errors`.
+
+If `isa(c, CodeInfo)`, also call `validate_code!(errors, c)`. It is assumed that `c` is
+the `CodeInfo` instance associated with `mi`.
+"""
+function validate_code!(errors::Vector{>:InvalidCodeError}, mi::Core.MethodInstance,
+ c::Union{Void,CodeInfo} = Core.Inference.retrieve_code_info(mi))
+ m = mi.def::Method
+ n_sig_params = length(Core.Inference.unwrap_unionall(m.sig).parameters)
+ if (m.isva ? (n_sig_params < (m.nargs - 1)) : (n_sig_params != m.nargs))
+ push!(errors, InvalidCodeError(SIGNATURE_NARGS_MISMATCH, (m.isva, n_sig_params, m.nargs)))
+ end
+ if isa(c, CodeInfo)
+ m.nargs > length(c.slotnames) && push!(errors, InvalidCodeError(SLOTNAMES_NARGS_MISMATCH))
+ validate_code!(errors, c, m.nargs == 0)
+ end
+ return errors
+end
+
+validate_code(args...) = validate_code!(Vector{InvalidCodeError}(), args...)
+
+function walkast(f, stmts::Array)
+ for stmt in stmts
+ f(stmt)
+ isa(stmt, Expr) && walkast(f, stmt.args)
+ end
+end
+
+is_valid_lvalue(x) = isa(x, SlotNumber) || isa(x, SSAValue) || isa(x, GlobalRef)
+
+function is_valid_rvalue(x)
+ isa(x, Expr) && return !in(x.head, (:gotoifnot, :line, :const, :meta))
+ return !isa(x, GotoNode) && !isa(x, LabelNode) && !isa(x, LineNumberNode)
+end
+
+is_flag_set(byte::UInt8, flag::UInt8) = (byte & flag) == flag
diff --git a/base/coreimg.jl b/base/coreimg.jl
@@ -65,6 +65,7 @@ include("associative.jl")
 include("docs/core.jl")
 
 # compiler
+include("codevalidation.jl")
 include("inference.jl")
 ccall(:jl_set_typeinf_func, Void, (Any,), typeinf_ext)
 

diff --git a/base/inference.jl b/base/inference.jl
@@ -289,24 +289,16 @@ end
 function InferenceState(linfo::MethodInstance,
  optimize::Bool, cached::Bool, params::InferenceParams)
  # prepare an InferenceState object for inferring lambda
- # create copies of the CodeInfo definition, and any fields that type-inference might modify
- m = linfo.def::Method
- if isdefined(m, :generator)
- try
- # user code might throw errors – ignore them
- src = get_staged(linfo)
- catch
- return nothing
- end
- else
- # TODO: post-inference see if we can swap back to the original arrays?
- if isa(m.source, Array{UInt8,1})
- src = ccall(:jl_uncompress_ast, Any, (Any, Any), m, m.source)
- else
- src = ccall(:jl_copy_code_info, Ref{CodeInfo}, (Any,), m.source)
- src.code = copy_exprargs(src.code)
- src.slotnames = copy(src.slotnames)
- src.slotflags = copy(src.slotflags)
+ src = retrieve_code_info(linfo)
+ src === nothing && return nothing
+ if JLOptions().debug_level == 2
+ # this is a debug build of julia, so let's validate linfo
+ errors = validate_code(linfo, src)
+ if !isempty(errors)
+ for e in errors
+ println(STDERR, "WARNING: Encountered invalid lowered code for method ",
+ linfo.def, ": ", e)
+ end
  end
  end
  return InferenceState(linfo, src, optimize, cached, params)
@@ -332,6 +324,35 @@ end
 
 #### helper functions ####
 
+# create copies of the CodeInfo definition, and any fields that type-inference might modify
+function copy_code_info(c::CodeInfo)
+ cnew = ccall(:jl_copy_code_info, Ref{CodeInfo}, (Any,), c)
+ cnew.code = copy_exprargs(cnew.code)
+ cnew.slotnames = copy(cnew.slotnames)
+ cnew.slotflags = copy(cnew.slotflags)
+ return cnew
+end
+
+function retrieve_code_info(linfo::MethodInstance)
+ m = linfo.def::Method
+ if isdefined(m, :generator)
+ try
+ # user code might throw errors – ignore them
+ c = get_staged(linfo)
+ catch
+ return nothing
+ end
+ else
+ # TODO: post-inference see if we can swap back to the original arrays?
+ if isa(m.source, Array{UInt8,1})
+ c = ccall(:jl_uncompress_ast, Any, (Any, Any), m, m.source)
+ else
+ c = copy_code_info(m.source)
+ end
+ end
+ return c
+end
+
 @inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id : (s::TypedSlot).id # using a function to ensure we can infer this
 
 # avoid cycle due to over-specializing `any` when used by inference

diff --git a/test/choosetests.jl b/test/choosetests.jl
@@ -35,7 +35,7 @@ function choosetests(choices = [])
  "enums", "cmdlineargs", "i18n", "workspace", "libdl", "int",
  "checked", "intset", "floatfuncs", "compile", "distributed", "inline",
  "boundscheck", "error", "ambiguous", "cartesian", "asmvariant", "osutils",
- "channels", "iostream", "specificity", "codegen"
+ "channels", "iostream", "specificity", "codegen", "codevalidation"
  ]
  profile_skipped = false
  if startswith(string(Sys.ARCH), "arm")

diff --git a/test/codevalidation.jl b/test/codevalidation.jl
@@ -0,0 +1,136 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Base.Test
+
+function f22938(a, b, x...)
+ d = 1
+ a = d
+ for i in 1:b
+ d += i
+ end
+ return i * a
+end
+
+msig = Tuple{typeof(f22938),Int,Int,Int,Int}
+world = typemax(UInt)
+_, msp, m = Base._methods_by_ftype(msig, -1, world)[]
+mi = Core.Inference.code_for_method(m, msig, msp, world, false)
+c0 = Core.Inference.retrieve_code_info(mi)
+
+@test isempty(Core.Inference.validate_code(mi))
+@test isempty(Core.Inference.validate_code(c0))
+
+# INVALID_EXPR_HEAD
+c = Core.Inference.copy_code_info(c0)
+insert!(c.code, 4, Expr(:(=), SlotNumber(2), Expr(:invalid, 1)))
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.INVALID_EXPR_HEAD
+
+# INVALID_LVALUE
+c = Core.Inference.copy_code_info(c0)
+insert!(c.code, 4, Expr(:(=), LabelNode(1), 1))
+insert!(c.code, 2, Expr(:(=), :x, 1))
+insert!(c.code, 10, Expr(:(=), 3, 1))
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 3
+@test all(e.kind === Core.Inference.INVALID_LVALUE for e in errors)
+
+# INVALID_RVALUE
+c = Core.Inference.copy_code_info(c0)
+insert!(c.code, 2, Expr(:(=), SlotNumber(2), GotoNode(1)))
+insert!(c.code, 4, Expr(:(=), SlotNumber(2), LabelNode(2)))
+insert!(c.code, 10, Expr(:(=), SlotNumber(2), LineNumberNode(2)))
+for h in (:gotoifnot, :line, :const, :meta)
+ push!(c.code, Expr(:(=), SlotNumber(2), Expr(h)))
+end
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 10
+@test count(e.kind === Core.Inference.INVALID_RVALUE for e in errors) == 7
+@test count(e.kind === Core.Inference.INVALID_EXPR_NARGS for e in errors) == 3
+
+# INVALID_CALL_ARG/INVALID_EXPR_NARGS
+c = Core.Inference.copy_code_info(c0)
+insert!(c.code, 2, Expr(:(=), SlotNumber(2), Expr(:call, :+, SlotNumber(2), GotoNode(1))))
+insert!(c.code, 4, Expr(:call, :-, Expr(:call, :sin, LabelNode(2)), 3))
+insert!(c.code, 10, Expr(:call, LineNumberNode(2)))
+for h in (:gotoifnot, :line, :const, :meta)
+ push!(c.code, Expr(:call, :f, Expr(h)))
+end
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 10
+@test count(e.kind === Core.Inference.INVALID_CALL_ARG for e in errors) == 7
+@test count(e.kind === Core.Inference.INVALID_EXPR_NARGS for e in errors) == 3
+
+# EMPTY_SLOTNAMES
+c = Core.Inference.copy_code_info(c0)
+empty!(c.slotnames)
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 2
+@test any(e.kind === Core.Inference.EMPTY_SLOTNAMES for e in errors)
+@test any(e.kind === Core.Inference.SLOTFLAGS_MISMATCH for e in errors)
+
+# SLOTFLAGS_MISMATCH
+c = Core.Inference.copy_code_info(c0)
+push!(c.slotnames, :dummy)
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.SLOTFLAGS_MISMATCH
+
+# SLOTTYPES_MISMATCH
+c = @code_typed(f22938(1,2,3,4))[1]
+pop!(c.slottypes)
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.SLOTTYPES_MISMATCH
+
+# SLOTTYPES_MISMATCH_UNINFERRED
+c = Core.Inference.copy_code_info(c0)
+c.slottypes = 1
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.SLOTTYPES_MISMATCH_UNINFERRED
+
+# SSAVALUETYPES_MISMATCH
+c = @code_typed(f22938(1,2,3,4))[1]
+empty!(c.ssavaluetypes)
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.SSAVALUETYPES_MISMATCH
+
+# SSAVALUETYPES_MISMATCH_UNINFERRED
+c = Core.Inference.copy_code_info(c0)
+c.ssavaluetypes -= 1
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.SSAVALUETYPES_MISMATCH_UNINFERRED
+
+# INVALID_ASSIGNMENT_SLOTFLAG
+c = Core.Inference.copy_code_info(c0)
+c.slotflags[8] = 0x00
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.INVALID_ASSIGNMENT_SLOTFLAG
+
+# SIGNATURE_NARGS_MISMATCH
+old_sig = mi.def.sig
+mi.def.sig = Tuple{1,2}
+errors = Core.Inference.validate_code(mi)
+mi.def.sig = old_sig
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.SIGNATURE_NARGS_MISMATCH
+
+# NON_TOP_LEVEL_METHOD
+c = Core.Inference.copy_code_info(c0)
+push!(c.code, Expr(:method, :dummy))
+errors = Core.Inference.validate_code(c)
+@test length(errors) == 1
+@test errors[1].kind === Core.Inference.NON_TOP_LEVEL_METHOD
+
+# SLOTNAMES_NARGS_MISMATCH
+mi.def.nargs += 20
+errors = Core.Inference.validate_code(mi)
+mi.def.nargs -= 20
+@test length(errors) == 2
+@test count(e.kind === Core.Inference.SLOTNAMES_NARGS_MISMATCH for e in errors) == 1
+@test count(e.kind === Core.Inference.SIGNATURE_NARGS_MISMATCH for e in errors) == 1