From c79044c3d52d0689dbbcd6c7ae0e75bc8f869bfe Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Sun, 10 May 2020 19:29:26 -0400
Subject: [PATCH 1/4] Refactor cache logic for easy replacement

This is the next step in the line of work started by #33955,
though a lot of enabling work towards this was previously done
by Jameson in his codegen-norecursion branch. The basic
thrust here is to allow external packages to manage their own
cache of compiled code that may have been generated using entirely
difference inference or compiler options. The GPU compilers are one
such example, but there are several others, including generating
code using offload compilers, such as XLA or compilers for secure
computation. A lot of this is just moving code arround to make it
clear exactly which parts of the code are accessing the internal
code cache (which is now its own type to make it obvious when
it's being accessed), as well as providing clear extension points
for custom cache implementations. The second part is to refactor
CodeInstance construction to separate construction and insertion
into the internal cache (so it can be inserted into an external
cache instead if desired). The last part of the change
is to give cgparams another hook that lets the caller replace
the cache lookup to be used by codegen.
---
 base/boot.jl                            |   5 +
 base/compiler/abstractinterpretation.jl |   2 +-
 base/compiler/cicache.jl                |  52 ++++++++++
 base/compiler/compiler.jl               |   1 +
 base/compiler/ssair/inlining.jl         |   8 +-
 base/compiler/typeinfer.jl              | 124 ++++++++++++++----------
 base/compiler/types.jl                  |  22 +++++
 base/compiler/utilities.jl              |   5 -
 base/reflection.jl                      |   7 +-
 src/aotcompile.cpp                      |  43 +++++---
 src/codegen.cpp                         |   5 +-
 src/gf.c                                |  46 ++++++---
 src/julia.h                             |   8 +-
 test/compiler/inference.jl              |   2 +-
 test/reflection.jl                      |   4 +-
 15 files changed, 234 insertions(+), 100 deletions(-)
 create mode 100644 base/compiler/cicache.jl

diff --git a/base/boot.jl b/base/boot.jl
index 33dfe984e9ba5..6a2338f2094e0 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -380,6 +380,11 @@ eval(Core, :(UpsilonNode(val) = $(Expr(:new, :UpsilonNode, :val))))
 eval(Core, :(UpsilonNode() = $(Expr(:new, :UpsilonNode))))
 eval(Core, :(LineInfoNode(@nospecialize(method), file::Symbol, line::Int, inlined_at::Int) =
              $(Expr(:new, :LineInfoNode, :method, :file, :line, :inlined_at))))
+eval(Core, :(CodeInstance(mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
+                          @nospecialize(inferred), const_flags::Int32,
+                          min_world::UInt, max_world::UInt) =
+                ccall(:jl_new_codeinst, Ref{CodeInstance}, (Any, Any, Any, Any, Int32, UInt, UInt),
+                    mi, rettype, inferred_const, inferred, const_flags, min_world, max_world)))
 
 Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)
 
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 572ad43b72e51..6c59a28199ceb 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -242,7 +242,7 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, @nosp
     mi = mi::MethodInstance
     # decide if it's likely to be worthwhile
     if !force_inference
-        code = inf_for_methodinstance(interp, mi, get_world_counter(interp))
+        code = get(code_cache(interp), mi, nothing)
         declared_inline = isdefined(method, :source) && ccall(:jl_ir_flag_inlineable, Bool, (Any,), method.source)
         cache_inlineable = declared_inline
         if isdefined(code, :inferred) && !cache_inlineable
diff --git a/base/compiler/cicache.jl b/base/compiler/cicache.jl
new file mode 100644
index 0000000000000..5e5690ff3a014
--- /dev/null
+++ b/base/compiler/cicache.jl
@@ -0,0 +1,52 @@
+"""
+    struct InternalCodeCache
+
+Internally, each `MethodInstance` keep a unique global cache of code instances
+that have been created for the given method instance, stratified by world age
+ranges. This struct abstracts over access to this cache.
+"""
+struct InternalCodeCache
+end
+
+function setindex!(cache::InternalCodeCache, ci::CodeInstance, mi::MethodInstance)
+    ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, ci)
+end
+
+const GLOBAL_CI_CACHE = InternalCodeCache()
+
+"""
+    struct WorldView
+
+Takes a given cache and provides access to the cache contents for the given
+range of world ages, rather than defaulting to the current active world age.
+"""
+struct WorldView{Cache}
+    cache::Cache
+    min_world::UInt
+    max_world::UInt
+end
+WorldView(cache, r::UnitRange) = WorldView(cache, first(r), last(r))
+WorldView(cache, world::UInt) = WorldView(cache, world, world)
+WorldView(wvc::WorldView{InternalCodeCache}, min_world::UInt, max_world::UInt) =
+    WorldView(wvc.cache, min_world, max_world)
+
+function haskey(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
+    ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, wvc.min_world, wvc.max_world)::Union{Nothing, CodeInstance} !== nothing
+end
+
+function get(wvc::WorldView{InternalCodeCache}, mi::MethodInstance, default)
+    r = ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, wvc.min_world, wvc.max_world)::Union{Nothing, CodeInstance}
+    if r === nothing
+        return default
+    end
+    return r::CodeInstance
+end
+
+function getindex(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
+    r = get(wvc, mi, nothing)
+    r === nothing && throw(KeyError(mi))
+    return r::CodeInstance
+end
+
+setindex!(wvc::WorldView{InternalCodeCache}, ci::CodeInstance, mi::MethodInstance) =
+    setindex!(wvc.cache, ci, mi)
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index c8356611da4ab..82634d1d06717 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -100,6 +100,7 @@ include("compiler/validation.jl")
 
 include("compiler/inferenceresult.jl")
 include("compiler/inferencestate.jl")
+include("compiler/cicache.jl")
 
 include("compiler/typeutils.jl")
 include("compiler/typelimits.jl")
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 7c42385f28a98..1149f37fd310c 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -798,7 +798,7 @@ function iterate(split::UnionSplitSignature, state::Vector{Int}...)
     return (sig, state)
 end
 
-function handle_single_case!(ir::IRCode, stmt::Expr, idx::Int, @nospecialize(case), isinvoke::Bool, todo::Vector{Any}, sv::OptimizationState)
+function handle_single_case!(ir::IRCode, stmt::Expr, idx::Int, @nospecialize(case), isinvoke::Bool, todo::Vector{Any})
     if isa(case, ConstantCase)
         ir[SSAValue(idx)] = case.val
     elseif isa(case, MethodInstance)
@@ -935,7 +935,7 @@ function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, invoke_data::Invok
     methsp = methsp::SimpleVector
     result = analyze_method!(idx, sig, metharg, methsp, method, stmt, sv, true, invoke_data,
                              calltype)
-    handle_single_case!(ir, stmt, idx, result, true, todo, sv)
+    handle_single_case!(ir, stmt, idx, result, true, todo)
     update_valid_age!(invoke_data.min_valid, invoke_data.max_valid, sv)
     return nothing
 end
@@ -1103,7 +1103,7 @@ function assemble_inline_todo!(ir::IRCode, sv::OptimizationState)
         # be able to do the inlining now (for constant cases), or push it directly
         # onto the todo list
         if fully_covered && length(cases) == 1
-            handle_single_case!(ir, stmt, idx, cases[1][2], false, todo, sv)
+            handle_single_case!(ir, stmt, idx, cases[1][2], false, todo)
             continue
         end
         length(cases) == 0 && continue
@@ -1318,7 +1318,7 @@ function find_inferred(mi::MethodInstance, @nospecialize(atypes), sv::Optimizati
         end
     end
 
-    linfo = inf_for_methodinstance(sv.interp, mi, sv.world)
+    linfo = get(WorldView(code_cache(sv.interp), sv.world), mi, nothing)
     if linfo isa CodeInstance
         if invoke_api(linfo) == 2
             # in this case function can be inlined to a constant
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 0e367bb3f6449..1df620436defb 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -4,7 +4,7 @@
 function typeinf(interp::AbstractInterpreter, result::InferenceResult, cached::Bool)
     frame = InferenceState(result, cached, interp)
     frame === nothing && return false
-    cached && (result.linfo.inInference = true)
+    cached && lock_mi_inference(interp, result.linfo)
     return typeinf(interp, frame)
 end
 
@@ -64,7 +64,7 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
         caller.src.min_world = min_valid
         caller.src.max_world = max_valid
         if cached
-            cache_result(interp, caller.result, min_valid, max_valid)
+            cache_result!(interp, caller.result, min_valid, max_valid)
         end
         if max_valid == typemax(UInt)
             # if we aren't cached, we don't need this edge
@@ -79,60 +79,78 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
     return true
 end
 
-# inference completed on `me`
-# update the MethodInstance and notify the edges
-function cache_result(interp::AbstractInterpreter, result::InferenceResult, min_valid::UInt, max_valid::UInt)
-    def = result.linfo.def
-    toplevel = !isa(result.linfo.def, Method)
-
-    # check if the existing linfo metadata is also sufficient to describe the current inference result
-    # to decide if it is worth caching this
-    already_inferred = !result.linfo.inInference
-    if inf_for_methodinstance(interp, result.linfo, min_valid, max_valid) isa CodeInstance
-        already_inferred = true
-    end
 
-    # TODO: also don't store inferred code if we've previously decided to interpret this function
-    if !already_inferred
-        inferred_result = result.src
-        if inferred_result isa Const
-            # use constant calling convention
-            rettype_const = (result.src::Const).val
-            const_flags = 0x3
+function CodeInstance(result::InferenceResult, min_valid::UInt, max_valid::UInt,
+                      may_compress=true, always_cache_tree=false)
+    inferred_result = result.src
+    local const_flags::Int32
+    if inferred_result isa Const
+        # use constant calling convention
+        rettype_const = (result.src::Const).val
+        const_flags = 0x3
+    else
+        if isa(result.result, Const)
+            rettype_const = (result.result::Const).val
+            const_flags = 0x2
+        elseif isconstType(result.result)
+            rettype_const = result.result.parameters[1]
+            const_flags = 0x2
         else
-            if isa(result.result, Const)
-                rettype_const = (result.result::Const).val
-                const_flags = 0x2
-            elseif isconstType(result.result)
-                rettype_const = result.result.parameters[1]
-                const_flags = 0x2
-            else
-                rettype_const = nothing
-                const_flags = 0x00
-            end
-            if !toplevel && inferred_result isa CodeInfo
-                cache_the_tree = result.src.inferred &&
+            rettype_const = nothing
+            const_flags = 0x00
+        end
+        if inferred_result isa CodeInfo
+            def = result.linfo.def
+            toplevel = !isa(def, Method)
+            if !toplevel
+                cache_the_tree = always_cache_tree || (result.src.inferred &&
                     (result.src.inlineable ||
-                     ccall(:jl_isa_compileable_sig, Int32, (Any, Any), result.linfo.specTypes, def) != 0)
+                    ccall(:jl_isa_compileable_sig, Int32, (Any, Any), result.linfo.specTypes, def) != 0))
                 if cache_the_tree
-                    # compress code for non-toplevel thunks
-                    nslots = length(inferred_result.slotflags)
-                    resize!(inferred_result.slottypes, nslots)
-                    resize!(inferred_result.slotnames, nslots)
-                    inferred_result = ccall(:jl_compress_ir, Any, (Any, Any), def, inferred_result)
+                    if may_compress
+                        nslots = length(inferred_result.slotflags)
+                        resize!(inferred_result.slottypes, nslots)
+                        resize!(inferred_result.slotnames, nslots)
+                        inferred_result = ccall(:jl_compress_ir, Any, (Any, Any), def, inferred_result)
+                    end
                 else
                     inferred_result = nothing
                 end
             end
         end
-        if !isa(inferred_result, Union{CodeInfo, Vector{UInt8}})
-            inferred_result = nothing
-        end
-        ccall(:jl_set_method_inferred, Ref{CodeInstance}, (Any, Any, Any, Any, Int32, UInt, UInt),
-            result.linfo, widenconst(result.result), rettype_const, inferred_result,
-            const_flags, min_valid, max_valid)
     end
-    result.linfo.inInference = false
+    if !isa(inferred_result, Union{CodeInfo, Vector{UInt8}})
+        inferred_result = nothing
+    end
+    return CodeInstance(result.linfo,
+        widenconst(result.result), rettype_const, inferred_result,
+        const_flags, min_valid, max_valid)
+end
+
+# For the NativeInterpreter, we don't need to do an actual cache query to know
+# if something was already inferred. If we reach this point, but the inference
+# flag has been turned off, then it's in the cache. This is purely a performance
+# optimization.
+already_inferred_quick_test(interp::NativeInterpreter, mi::MethodInstance) =
+    !mi.inInference
+already_inferred_quick_test(interp::AbstractInterpreter, mi::MethodInstance) =
+    false
+
+# inference completed on `me`
+# update the MethodInstance and notify the edges
+function cache_result!(interp::AbstractInterpreter, result::InferenceResult, min_valid::UInt, max_valid::UInt)
+    # check if the existing linfo metadata is also sufficient to describe the current inference result
+    # to decide if it is worth caching this
+    already_inferred = already_inferred_quick_test(interp, result.linfo)
+    if !already_inferred && haskey(WorldView(code_cache(interp), min_valid, max_valid), result.linfo)
+        already_inferred = true
+    end
+
+    # TODO: also don't store inferred code if we've previously decided to interpret this function
+    if !already_inferred
+        code_cache(interp)[result.linfo] = CodeInstance(result, min_valid, max_valid)
+    end
+    unlock_mi_inference(interp, result.linfo)
     nothing
 end
 
@@ -142,7 +160,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         # a top parent will be cached still, but not this intermediate work
         # we can throw everything else away now
         me.cached = false
-        me.linfo.inInference = false
+        unlock_mi_inference(interp, me.linfo)
         me.src.inlineable = false
     else
         # annotate fulltree with type information
@@ -452,7 +470,7 @@ end
 # compute (and cache) an inferred AST and return the current best estimate of the result type
 function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atypes), sparams::SimpleVector, caller::InferenceState)
     mi = specialize_method(method, atypes, sparams)::MethodInstance
-    code = inf_for_methodinstance(interp, mi, get_world_counter(interp))
+    code = get(code_cache(interp), mi, nothing)
     if code isa CodeInstance # return existing rettype if the code is already inferred
         update_valid_age!(min_world(code), max_world(code), caller)
         if isdefined(code, :rettype_const)
@@ -470,12 +488,12 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
     end
     if frame === false
         # completely new
-        mi.inInference = true
+        lock_mi_inference(interp, mi)
         result = InferenceResult(mi)
         frame = InferenceState(result, #=cached=#true, interp) # always use the cache for edge targets
         if frame === nothing
             # can't get the source for this, so we know nothing
-            mi.inInference = false
+            unlock_mi_inference(interp, mi)
             return Any, nothing
         end
         if caller.cached || caller.limited # don't involve uncached functions in cycle resolution
@@ -524,7 +542,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
     method = mi.def::Method
     for i = 1:2 # test-and-lock-and-test
         i == 2 && ccall(:jl_typeinf_begin, Cvoid, ())
-        code = inf_for_methodinstance(interp, mi, get_world_counter(interp))
+        code = get(code_cache(interp), mi, nothing)
         if code isa CodeInstance
             # see if this code already exists in the cache
             inf = code.inferred
@@ -565,7 +583,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
             end
         end
     end
-    mi.inInference = true
+    lock_mi_inference(interp, mi)
     frame = InferenceState(InferenceResult(mi), #=cached=#true, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
@@ -582,7 +600,7 @@ function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize
     mi = specialize_method(method, atypes, sparams)::MethodInstance
     for i = 1:2 # test-and-lock-and-test
         i == 2 && ccall(:jl_typeinf_begin, Cvoid, ())
-        code = inf_for_methodinstance(interp, mi, get_world_counter(interp))
+        code = get(code_cache(interp), mi, nothing)
         if code isa CodeInstance
             # see if this rettype already exists in the cache
             i == 2 && ccall(:jl_typeinf_end, Cvoid, ())
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index ae73c523d22ed..4a8feb261da99 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -175,3 +175,25 @@ InferenceParams(ni::NativeInterpreter) = ni.inf_params
 OptimizationParams(ni::NativeInterpreter) = ni.opt_params
 get_world_counter(ni::NativeInterpreter) = ni.world
 get_inference_cache(ni::NativeInterpreter) = ni.cache
+
+code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, ni.world)
+
+"""
+    lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance)
+
+Locks `mi`'s inference flag to prevent infinite recursion inside inference.
+While running inference, we must occaisionally compile additional code, which
+may in turn request additional code to be inferred. This can happen during
+bootstrap (for part of inference itself), but also during regular execution (e.g.
+to expand generated functions). The inference flag lets the runtime know that
+it should not attempt to re-infer the requested functions as it is being worked
+on higher in the stack. Not that inference itself does not look at this flag,
+instead checking its own working caches - it is only used for locking the C
+runtime.
+"""
+lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
+
+"""
+    See lock_mi_inference
+"""
+unlock_mi_inference(ni::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 0fa4f776de681..84ca65834726f 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -118,11 +118,6 @@ function retrieve_code_info(linfo::MethodInstance)
     end
 end
 
-function inf_for_methodinstance(interp::AbstractInterpreter, mi::MethodInstance, min_world::UInt, max_world::UInt=min_world)
-    return ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, min_world, max_world)::Union{Nothing, CodeInstance}
-end
-
-
 # get a handle to the unique specialization object representing a particular instantiation of a call
 function specialize_method(method::Method, @nospecialize(atypes), sparams::SimpleVector, preexisting::Bool=false)
     if preexisting
diff --git a/base/reflection.jl b/base/reflection.jl
index 3b3e745559593..df4ee17efc89d 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -981,17 +981,20 @@ struct CodegenParams
     emit_function::Any
     emitted_function::Any
 
+    lookup::Ptr{Cvoid}
+
     function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true,
                    static_alloc::Bool=true, prefer_specsig::Bool=false,
                    gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
                    module_setup=nothing, module_activation=nothing, raise_exception=nothing,
-                   emit_function=nothing, emitted_function=nothing)
+                   emit_function=nothing, emitted_function=nothing,
+                   lookup::Ptr{Cvoid}=cglobal(:jl_rettype_inferred))
         return new(
             Cint(track_allocations), Cint(code_coverage),
             Cint(static_alloc), Cint(prefer_specsig),
             Cint(gnu_pubnames), debug_info_kind,
             module_setup, module_activation, raise_exception,
-            emit_function, emitted_function)
+            emit_function, emitted_function, lookup)
     end
 end
 
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 7ad21841ab693..bb92e185b6e7a 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -248,6 +248,32 @@ static void makeSafeName(GlobalObject &G)
         G.setName(StringRef(SafeName.data(), SafeName.size()));
 }
 
+static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance_t *mi, size_t world, jl_code_instance_t **ci_out, jl_code_info_t **src_out)
+{
+    jl_value_t *ci = cgparams.lookup(mi, world, world);
+    JL_GC_PROMISE_ROOTED(ci);
+    jl_code_instance_t *codeinst = NULL;
+    if (ci != jl_nothing) {
+        codeinst = (jl_code_instance_t*)ci;
+        *src_out = (jl_code_info_t*)codeinst->inferred;
+        jl_method_t *def = codeinst->def->def.method;
+        if ((jl_value_t*)*src_out == jl_nothing)
+            *src_out = NULL;
+        if (*src_out && jl_is_method(def))
+            *src_out = jl_uncompress_ir(def, codeinst, (jl_array_t*)*src_out);
+    }
+    if (*src_out == NULL || !jl_is_code_info(*src_out)) {
+        if (cgparams.lookup != jl_rettype_inferred) {
+            jl_error("Refusing to automatically run type inference with custom cache lookup.");
+        } else {
+            *src_out = jl_type_infer(mi, world, 0);
+            codeinst = jl_get_method_inferred(mi, (*src_out)->rettype, (*src_out)->min_world, (*src_out)->max_world);
+            if ((*src_out)->inferred && !codeinst->inferred)
+                codeinst->inferred = jl_nothing;
+        }
+    }
+    *ci_out = codeinst;
+}
 
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup, and can
@@ -294,23 +320,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
             // then we want to compile and emit this
             if (mi->def.method->primary_world <= params.world && params.world <= mi->def.method->deleted_world) {
                 // find and prepare the source code to compile
-                jl_value_t *ci = jl_rettype_inferred(mi, params.world, params.world);
                 jl_code_instance_t *codeinst = NULL;
-                if (ci != jl_nothing) {
-                    codeinst = (jl_code_instance_t*)ci;
-                    src = (jl_code_info_t*)codeinst->inferred;
-                    jl_method_t *def = codeinst->def->def.method;
-                    if ((jl_value_t*)src == jl_nothing)
-                        src = NULL;
-                    if (src && jl_is_method(def))
-                        src = jl_uncompress_ir(def, codeinst, (jl_array_t*)src);
-                }
-                if (src == NULL || !jl_is_code_info(src)) {
-                    src = jl_type_infer(mi, params.world, 0);
-                    codeinst = jl_get_method_inferred(mi, src->rettype, src->min_world, src->max_world);
-                    if (src->inferred && !codeinst->inferred)
-                        codeinst->inferred = jl_nothing;
-                }
+                jl_ci_cache_lookup(cgparams, mi, params.world, &codeinst, &src);
                 if (src && !emitted.count(codeinst)) {
                     // now add it to our compilation results
                     JL_GC_PROMISE_ROOTED(codeinst->rettype);
diff --git a/src/codegen.cpp b/src/codegen.cpp
index b171db9047465..ca5e6c6c7dba3 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -366,7 +366,8 @@ extern "C" {
 #else
         1,
 #endif
-        jl_default_debug_info_kind, NULL, NULL, NULL, NULL, NULL};
+        jl_default_debug_info_kind, NULL, NULL, NULL, NULL, NULL,
+        jl_rettype_inferred };
 }
 
 template<typename T>
@@ -2794,7 +2795,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
             }
         }
         else {
-            jl_value_t *ci = jl_rettype_inferred(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
+            jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
             jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
             if (ci != jl_nothing && codeinst->invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
                 if (codeinst->invoke == jl_fptr_const_return) {
diff --git a/src/gf.c b/src/gf.c
index 183a6c74d7187..1f71b422bf679 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -209,10 +209,12 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ
 // ----- MethodInstance specialization instantiation ----- //
 
 JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t*);
-JL_DLLEXPORT jl_code_instance_t* jl_set_method_inferred(
+JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *rettype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world);
+JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
 
 jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED
 {
@@ -236,8 +238,10 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
     m->unspecialized = mi;
     jl_gc_wb(m, mi);
 
-    jl_code_instance_t *codeinst = jl_set_method_inferred(mi, (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
+    jl_code_instance_t *codeinst = jl_new_codeinst(mi,
+        (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
         0, 1, ~(size_t)0);
+    jl_mi_cache_insert(mi, codeinst);
     codeinst->specptr.fptr1 = fptr;
     codeinst->invoke = jl_fptr_args;
 
@@ -344,7 +348,7 @@ JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *mi, size_t mi
 
 
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
-        jl_method_instance_t *mi, jl_value_t *rettype,
+        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
         size_t min_world, size_t max_world)
 {
     jl_code_instance_t *codeinst = mi->cache;
@@ -356,13 +360,15 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         }
         codeinst = codeinst->next;
     }
-    return jl_set_method_inferred(
+    codeinst = jl_new_codeinst(
         mi, rettype, NULL, NULL,
         0, min_world, max_world);
+    jl_mi_cache_insert(mi, codeinst);
+    return codeinst;
 }
 
-JL_DLLEXPORT jl_code_instance_t *jl_set_method_inferred(
-        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
+JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
+        jl_method_instance_t *mi, jl_value_t *rettype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world
         /*, jl_array_t *edges, int absolute_max*/)
@@ -388,15 +394,24 @@ JL_DLLEXPORT jl_code_instance_t *jl_set_method_inferred(
     }
     codeinst->specptr.fptr = NULL;
     codeinst->isspecsig = 0;
+    codeinst->next = NULL;
+    JL_GC_POP();
+    return codeinst;
+}
+
+JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED)
+{
+    JL_GC_PUSH1(&ci);
     if (jl_is_method(mi->def.method))
         JL_LOCK(&mi->def.method->writelock);
-    codeinst->next = mi->cache;
-    mi->cache = codeinst;
-    jl_gc_wb(mi, codeinst);
+    ci->next = mi->cache;
+    mi->cache = ci;
+    jl_gc_wb(mi, ci);
     if (jl_is_method(mi->def.method))
         JL_UNLOCK(&mi->def.method->writelock);
     JL_GC_POP();
-    return codeinst;
+    return;
 }
 
 static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
@@ -1938,20 +1953,24 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         if (jl_is_method(def) && def->unspecialized) {
             jl_code_instance_t *unspec = def->unspecialized->cache;
             if (unspec && unspec->invoke != NULL) {
-                jl_code_instance_t *codeinst = jl_set_method_inferred(mi, (jl_value_t*)jl_any_type, NULL, NULL,
+                jl_code_instance_t *codeinst = jl_new_codeinst(mi,
+                    (jl_value_t*)jl_any_type, NULL, NULL,
                     0, 1, ~(size_t)0);
                 codeinst->isspecsig = 0;
                 codeinst->specptr = unspec->specptr;
                 codeinst->rettype_const = unspec->rettype_const;
                 jl_atomic_store_release(&codeinst->invoke, unspec->invoke);
+                jl_mi_cache_insert(mi, codeinst);
                 return codeinst;
             }
         }
         jl_code_info_t *src = jl_code_for_interpreter(mi);
         if (!jl_code_requires_compiler(src)) {
-            jl_code_instance_t *codeinst = jl_set_method_inferred(mi, (jl_value_t*)jl_any_type, NULL, NULL,
+            jl_code_instance_t *codeinst = jl_new_codeinst(mi,
+                (jl_value_t*)jl_any_type, NULL, NULL,
                 0, 1, ~(size_t)0);
             jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
+            jl_mi_cache_insert(mi, codeinst);
             return codeinst;
         }
         if (jl_options.compile_enabled == JL_OPTIONS_COMPILE_OFF) {
@@ -1972,12 +1991,13 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             ucache->invoke != jl_fptr_interpret_call) {
             return ucache;
         }
-        codeinst = jl_set_method_inferred(mi, (jl_value_t*)jl_any_type, NULL, NULL,
+        codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
             0, 1, ~(size_t)0);
         codeinst->isspecsig = 0;
         codeinst->specptr = ucache->specptr;
         codeinst->rettype_const = ucache->rettype_const;
         jl_atomic_store_release(&codeinst->invoke, ucache->invoke);
+        jl_mi_cache_insert(mi, codeinst);
     }
     return codeinst;
 }
diff --git a/src/julia.h b/src/julia.h
index a90aa6dc8b265..d7d255b9cd19b 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -2034,7 +2034,10 @@ typedef struct {
 #define jl_root_task (jl_get_ptls_states()->root_task)
 
 // codegen interface ----------------------------------------------------------
-
+// The root propagation here doesn't have to be literal, but callers should
+// ensure that the return value outlives the MethodInstance
+typedef jl_value_t *(*jl_codeinstance_lookup_t)(jl_method_instance_t *mi JL_PROPAGATES_ROOT,
+    size_t min_world, size_t max_world);
 typedef struct {
     int track_allocations;  // can we track allocations?
     int code_coverage;      // can we measure coverage?
@@ -2072,6 +2075,9 @@ typedef struct {
     // parameters: MethodInstance, CodeInfo, world age as UInt
     // return value: none
     jl_value_t *emitted_function;
+
+    // Cache access. Default: jl_rettype_inferred.
+    jl_codeinstance_lookup_t lookup;
 } jl_cgparams_t;
 extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index fec496f321b5c..455e8ea75aef6 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -1145,7 +1145,7 @@ end
 
 function test_const_return(@nospecialize(f), @nospecialize(t), @nospecialize(val))
     interp = Core.Compiler.NativeInterpreter()
-    linfo = Core.Compiler.inf_for_methodinstance(interp, get_linfo(f, t), Core.Compiler.get_world_counter())::Core.CodeInstance
+    linfo = Core.Compiler.getindex(Core.Compiler.code_cache(interp), get_linfo(f, t))
     # If coverage is not enabled, make the check strict by requiring constant ABI
     # Otherwise, check the typed AST to make sure we return a constant.
     if Base.JLOptions().code_coverage == 0
diff --git a/test/reflection.jl b/test/reflection.jl
index 4b77f2e2ac058..9e47a3a0b508a 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -514,14 +514,14 @@ let
     @test !isempty(m.specializations) # uncached, but creates the specializations entry
     mi = Core.Compiler.specialize_method(m, Tuple{ft}, Core.svec())
     interp = Core.Compiler.NativeInterpreter(world)
-    @test Core.Compiler.inf_for_methodinstance(interp, mi, world) === nothing
+    @test !Core.Compiler.haskey(Core.Compiler.code_cache(interp), mi)
     @test !isdefined(mi, :cache)
 
     code_typed(f18888, Tuple{}; optimize=true)
     @test !isdefined(mi, :cache)
 
     Base.return_types(f18888, Tuple{})
-    @test Core.Compiler.inf_for_methodinstance(interp, mi, world) === mi.cache
+    @test Core.Compiler.getindex(Core.Compiler.code_cache(interp), mi) === mi.cache
     @test mi.cache isa Core.CodeInstance
     @test !isdefined(mi.cache, :next)
 end

From 770487fa94fae7a6090d0c3616af838a148f1391 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Thu, 4 Jun 2020 18:43:42 -0400
Subject: [PATCH 2/4] Update base/compiler/cicache.jl

Co-authored-by: Tim Besard <tim.besard@gmail.com>
---
 base/compiler/cicache.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/compiler/cicache.jl b/base/compiler/cicache.jl
index 5e5690ff3a014..d82665b61fc9a 100644
--- a/base/compiler/cicache.jl
+++ b/base/compiler/cicache.jl
@@ -27,7 +27,7 @@ struct WorldView{Cache}
 end
 WorldView(cache, r::UnitRange) = WorldView(cache, first(r), last(r))
 WorldView(cache, world::UInt) = WorldView(cache, world, world)
-WorldView(wvc::WorldView{InternalCodeCache}, min_world::UInt, max_world::UInt) =
+WorldView(wvc::WorldView, min_world::UInt, max_world::UInt) =
     WorldView(wvc.cache, min_world, max_world)
 
 function haskey(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)

From 914d7f76e829211d1d2ae9cc2f1c23db54a1a0c5 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Fri, 5 Jun 2020 14:53:57 -0400
Subject: [PATCH 3/4] Apply suggestions from code review

Co-authored-by: Jameson Nash <vtjnash@gmail.com>
---
 base/compiler/typeinfer.jl |  2 +-
 base/compiler/types.jl     | 10 +---------
 src/aotcompile.cpp         |  3 ++-
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 1df620436defb..8f5960b52df5d 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -137,7 +137,7 @@ already_inferred_quick_test(interp::AbstractInterpreter, mi::MethodInstance) =
     false
 
 # inference completed on `me`
-# update the MethodInstance and notify the edges
+# update the MethodInstance
 function cache_result!(interp::AbstractInterpreter, result::InferenceResult, min_valid::UInt, max_valid::UInt)
     # check if the existing linfo metadata is also sufficient to describe the current inference result
     # to decide if it is worth caching this
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 4a8feb261da99..4fc49c9fa2bb0 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -181,15 +181,7 @@ code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, ni.world)
 """
     lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance)
 
-Locks `mi`'s inference flag to prevent infinite recursion inside inference.
-While running inference, we must occaisionally compile additional code, which
-may in turn request additional code to be inferred. This can happen during
-bootstrap (for part of inference itself), but also during regular execution (e.g.
-to expand generated functions). The inference flag lets the runtime know that
-it should not attempt to re-infer the requested functions as it is being worked
-on higher in the stack. Not that inference itself does not look at this flag,
-instead checking its own working caches - it is only used for locking the C
-runtime.
+Hint that `mi` is in inference to help accelerate bootstrapping. This helps limit the amount of wasted work we might do when inference is working on initially inferring itself by letting us detect when inference is already in progress and not running a second copy on it. This creates a data-race, but the entry point into this code from C (jl_type_infer) already includes detection and restriction on recursion, so it is hopefully mostly a benign problem (since it should really only happen during the first phase of bootstrapping that we encounter this flag).
 """
 lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
 
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index bb92e185b6e7a..1cfe0772856ec 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -265,7 +265,8 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
     if (*src_out == NULL || !jl_is_code_info(*src_out)) {
         if (cgparams.lookup != jl_rettype_inferred) {
             jl_error("Refusing to automatically run type inference with custom cache lookup.");
-        } else {
+        }
+        else {
             *src_out = jl_type_infer(mi, world, 0);
             codeinst = jl_get_method_inferred(mi, (*src_out)->rettype, (*src_out)->min_world, (*src_out)->max_world);
             if ((*src_out)->inferred && !codeinst->inferred)

From 2df4e31e0d3587523210e92c65a510ffe6a40682 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Mon, 8 Jun 2020 18:22:52 -0400
Subject: [PATCH 4/4] Rename always_cache_tree -> !allow_discard_tree

---
 base/compiler/typeinfer.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 8f5960b52df5d..12b3da4a4e2db 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -81,7 +81,7 @@ end
 
 
 function CodeInstance(result::InferenceResult, min_valid::UInt, max_valid::UInt,
-                      may_compress=true, always_cache_tree=false)
+                      may_compress=true, allow_discard_tree=true)
     inferred_result = result.src
     local const_flags::Int32
     if inferred_result isa Const
@@ -103,7 +103,7 @@ function CodeInstance(result::InferenceResult, min_valid::UInt, max_valid::UInt,
             def = result.linfo.def
             toplevel = !isa(def, Method)
             if !toplevel
-                cache_the_tree = always_cache_tree || (result.src.inferred &&
+                cache_the_tree = !allow_discard_tree || (result.src.inferred &&
                     (result.src.inlineable ||
                     ccall(:jl_isa_compileable_sig, Int32, (Any, Any), result.linfo.specTypes, def) != 0))
                 if cache_the_tree