From 0e094a818de1618b58edd56b759ed808accd3f9f Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 2 Jun 2021 14:45:25 +0200 Subject: [PATCH] prevent doing excessive file system operations in require calls (#40890) --- base/initdefs.jl | 2 + base/loading.jl | 168 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 118 insertions(+), 52 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index eee4b9ef4f325..2cac786cfd194 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -313,6 +313,8 @@ Return the fully expanded value of [`LOAD_PATH`](@ref) that is searched for proj packages. """ function load_path() + cache = LOADING_CACHE[] + cache !== nothing && return cache.load_path paths = String[] for env in LOAD_PATH path = load_path_expand(env) diff --git a/base/loading.jl b/base/loading.jl index 10a59de688b7c..a22cf2c2052bf 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -129,12 +129,21 @@ end const ns_dummy_uuid = UUID("fe0723d6-3a44-4c41-8065-ee0f42c8ceab") function dummy_uuid(project_file::String) + cache = LOADING_CACHE[] + if cache !== nothing + uuid = get(cache.dummy_uuid, project_file, nothing) + uuid === nothing || return uuid + end project_path = try realpath(project_file) catch project_file end - return uuid5(ns_dummy_uuid, project_path) + uuid = uuid5(ns_dummy_uuid, project_path) + if cache !== nothing + cache.dummy_uuid[project_file] = uuid + end + return uuid end ## package path slugs: turning UUID + SHA1 into a pair of 4-byte "slugs" ## @@ -210,6 +219,17 @@ function get_updated_dict(p::TOML.Parser, f::CachedTOMLDict) return f.d end +struct LoadingCache + load_path::Vector{String} + dummy_uuid::Dict{String, UUID} + env_project_file::Dict{String, Union{Bool, String}} + project_file_manifest_path::Dict{String, Union{Nothing, String}} + require_parsed::Set{String} +end +const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing) +LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set()) + + struct TOMLCache p::TOML.Parser d::Dict{String, CachedTOMLDict} @@ -220,14 +240,25 @@ const TOML_LOCK = ReentrantLock() parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, TOML_LOCK) function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock) lock(toml_lock) do - if !haskey(toml_cache.d, project_file) + cache = LOADING_CACHE[] + dd = if !haskey(toml_cache.d, project_file) d = CachedTOMLDict(toml_cache.p, project_file) toml_cache.d[project_file] = d - return d.d + d.d else d = toml_cache.d[project_file] - return get_updated_dict(toml_cache.p, d) + # We are in a require call and have already parsed this TOML file + # assume that it is unchanged to avoid hitting disk + if cache !== nothing && project_file in cache.require_parsed + d.d + else + get_updated_dict(toml_cache.p, d) + end + end + if cache !== nothing + push!(cache.require_parsed, project_file) end + return dd end end @@ -352,16 +383,29 @@ const preferences_names = ("JuliaLocalPreferences.toml", "LocalPreferences.toml" # - `true`: `env` is an implicit environment # - `path`: the path of an explicit project file function env_project_file(env::String)::Union{Bool,String} + cache = LOADING_CACHE[] + if cache !== nothing + project_file = get(cache.env_project_file, env, nothing) + project_file === nothing || return project_file + end if isdir(env) for proj in project_names - project_file = joinpath(env, proj) - isfile_casesensitive(project_file) && return project_file + maybe_project_file = joinpath(env, proj) + if isfile_casesensitive(maybe_project_file) + project_file = maybe_project_file + break + end end - return true + project_file =true elseif basename(env) in project_names && isfile_casesensitive(env) - return env + project_file = env + else + project_file = false end - return false + if cache !== nothing + cache.env_project_file[env] = project_file + end + return project_file end function project_deps_get(env::String, name::String)::Union{Nothing,PkgId} @@ -415,10 +459,9 @@ end # find project file's top-level UUID entry (or nothing) function project_file_name_uuid(project_file::String, name::String)::PkgId - uuid = dummy_uuid(project_file) d = parsed_toml(project_file) uuid′ = get(d, "uuid", nothing)::Union{String, Nothing} - uuid′ === nothing || (uuid = UUID(uuid′)) + uuid = uuid′ === nothing ? dummy_uuid(project_file) : UUID(uuid′) name = get(d, "name", name)::String return PkgId(uuid, name) end @@ -430,18 +473,34 @@ end # find project file's corresponding manifest file function project_file_manifest_path(project_file::String)::Union{Nothing,String} + cache = LOADING_CACHE[] + if cache !== nothing + manifest_path = get(cache.project_file_manifest_path, project_file, missing) + manifest_path === missing || return manifest_path + end dir = abspath(dirname(project_file)) d = parsed_toml(project_file) explicit_manifest = get(d, "manifest", nothing)::Union{String, Nothing} + manifest_path = nothing if explicit_manifest !== nothing manifest_file = normpath(joinpath(dir, explicit_manifest)) - isfile_casesensitive(manifest_file) && return manifest_file + if isfile_casesensitive(manifest_file) + manifest_path = manifest_file + end end - for mfst in manifest_names - manifest_file = joinpath(dir, mfst) - isfile_casesensitive(manifest_file) && return manifest_file + if manifest_path === nothing + for mfst in manifest_names + manifest_file = joinpath(dir, mfst) + if isfile_casesensitive(manifest_file) + manifest_path = manifest_file + break + end + end end - return nothing + if cache !== nothing + cache.project_file_manifest_path[project_file] = manifest_path + end + return manifest_path end # given a directory (implicit env from LOAD_PATH) and a name, @@ -576,10 +635,10 @@ function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry:: hash = SHA1(hash) # Keep the 4 since it used to be the default uuid = pkg.uuid::UUID # checked within `explicit_manifest_uuid_path` - for slug in (version_slug(uuid, hash, 4), version_slug(uuid, hash)) + for slug in (version_slug(uuid, hash), version_slug(uuid, hash, 4)) for depot in DEPOT_PATH - path = abspath(depot, "packages", pkg.name, slug) - ispath(path) && return path + path = joinpath(depot, "packages", pkg.name, slug) + ispath(path) && return abspath(path) end end return nothing @@ -876,42 +935,47 @@ For more details regarding code loading, see the manual sections on [modules](@r [parallel computing](@ref code-availability). """ function require(into::Module, mod::Symbol) - uuidkey = identify_package(into, String(mod)) - # Core.println("require($(PkgId(into)), $mod) -> $uuidkey") - if uuidkey === nothing - where = PkgId(into) - if where.uuid === nothing - throw(ArgumentError(""" - Package $mod not found in current path: - - Run `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package. - """)) - else - s = """ - Package $(where.name) does not have $mod in its dependencies: - - If you have $(where.name) checked out for development and have - added $mod as a dependency but haven't updated your primary - environment's manifest file, try `Pkg.resolve()`. - - Otherwise you may need to report an issue with $(where.name)""" - - uuidkey = identify_package(PkgId(string(into)), String(mod)) - uuidkey === nothing && throw(ArgumentError(s)) - - # fall back to toplevel loading with a warning - if !(where in modules_warned_for) - @warn string( - full_warning_showed[] ? "" : s, "\n", - string("Loading $(mod) into $(where.name) from project dependency, ", - "future warnings for $(where.name) are suppressed.") - ) _module = nothing _file = nothing _group = nothing - push!(modules_warned_for, where) + LOADING_CACHE[] = LoadingCache() + try + uuidkey = identify_package(into, String(mod)) + # Core.println("require($(PkgId(into)), $mod) -> $uuidkey") + if uuidkey === nothing + where = PkgId(into) + if where.uuid === nothing + throw(ArgumentError(""" + Package $mod not found in current path: + - Run `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package. + """)) + else + s = """ + Package $(where.name) does not have $mod in its dependencies: + - If you have $(where.name) checked out for development and have + added $mod as a dependency but haven't updated your primary + environment's manifest file, try `Pkg.resolve()`. + - Otherwise you may need to report an issue with $(where.name)""" + + uuidkey = identify_package(PkgId(string(into)), String(mod)) + uuidkey === nothing && throw(ArgumentError(s)) + + # fall back to toplevel loading with a warning + if !(where in modules_warned_for) + @warn string( + full_warning_showed[] ? "" : s, "\n", + string("Loading $(mod) into $(where.name) from project dependency, ", + "future warnings for $(where.name) are suppressed.") + ) _module = nothing _file = nothing _group = nothing + push!(modules_warned_for, where) + end + full_warning_showed[] = true end - full_warning_showed[] = true end + if _track_dependencies[] + push!(_require_dependencies, (into, binpack(uuidkey), 0.0)) + end + return require(uuidkey) + finally + LOADING_CACHE[] = nothing end - if _track_dependencies[] - push!(_require_dependencies, (into, binpack(uuidkey), 0.0)) - end - return require(uuidkey) end mutable struct PkgOrigin