Skip to content

Commit

Permalink
Merge branch 'master' into source-build--zlib
Browse files Browse the repository at this point in the history
  • Loading branch information
inkydragon committed Jul 5, 2022
2 parents 2fdc646 + fc1093f commit b363bd5
Show file tree
Hide file tree
Showing 8 changed files with 376 additions and 139 deletions.
1 change: 0 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ please remove the `backport-X.Y` tag from the originating pull request for the c
- If you see any unrelated changes to submodules like `deps/libuv`, `deps/openlibm`, etc., try running `git submodule update` first.
- Descriptive commit messages are good.
- Using `git add -p` or `git add -i` can be useful to avoid accidentally committing unrelated changes.
- GitHub does not send notifications when you push a new commit to a pull request, so please add a comment to the pull request thread to let reviewers know when you've made changes.
- When linking to specific lines of code in discussion of an issue or pull request, hit the `y` key while viewing code on GitHub to reload the page with a URL that includes the specific version that you're viewing. That way any lines of code that you refer to will still make sense in the future, even if the content of the file changes.
- Whitespace can be automatically removed from existing commits with `git rebase`.
- To remove whitespace for the previous commit, run
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ If you would rather not compile the latest Julia from source,
platform-specific tarballs with pre-compiled binaries are also
[available for download](https://julialang.org/downloads/). The
downloads page also provides details on the
[different tiers of support](https://julialang.org/downloads/#support-tiers)
[different tiers of support](https://julialang.org/downloads/#supported_platforms)
for OS and platform combinations.

If everything works correctly, you will see a Julia banner and an
Expand Down
355 changes: 240 additions & 115 deletions base/loading.jl

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions deps/checksums/llvm
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ libLLVM.v14.0.5+1.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/b6
libLLVM.v14.0.5+1.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/f60d64488d8cd332c812b0fe393287419456face8a5ab543c257fb5e5d917189e438ec16ab8d04a66645b8dde7eeec5bad2d341926546df8caf66ffbae43abc5
libLLVM.v14.0.5+1.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/ad2a9f52a8fb9e67859ac34e395f2328
libLLVM.v14.0.5+1.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/51d111cbdab11e2d598ae553a922565161b5a66333fc644a8a566040d177965ec0fa377b08e21d9a5836f71feb61d7c0194bade3d8c4b6cba028efb5a1ee03f3
llvm-julia-14.0.5-0.tar.gz/md5/c7df1a3f2cc19201ece78996582f43ce
llvm-julia-14.0.5-0.tar.gz/sha512/51c61d842cb61dab74df6d7263caa8c91e7b5e832bd8665cf40b3c2d8191a8c9665eb8b5ea1499607b6fba9013260f6b087c90ac850dd7e66f5fd37ebc407d15
llvm-julia-14.0.5-1.tar.gz/md5/7f540b9ffc21fbad6e6b349ab7dd1a41
llvm-julia-14.0.5-1.tar.gz/sha512/9842821fd19af8b9f091d8e3b600d83b77794271074b86e9d6505ab0325a40a52e38029b65c349e368e8bd985521203df00f125f93462379ea2931e5c81f9793
llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
6 changes: 3 additions & 3 deletions deps/llvm.version
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ LLVM_JLL_NAME := libLLVM
LLVM_ASSERT_JLL_VER := 14.0.5+1

## source build
LLVM_VER := 14.0.2
LLVM_BRANCH=julia-14.0.5-0
LLVM_SHA1=julia-14.0.5-0
LLVM_VER := 14.0.5
LLVM_BRANCH=julia-14.0.5-1
LLVM_SHA1=julia-14.0.5-1
98 changes: 83 additions & 15 deletions src/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,76 @@ extern "C" {
// careful to match the sequence, if necessary reserving space for something that will
// be updated later.

// It is also necessary to save & restore references to externally-defined objects,
// e.g., for package methods that call methods defined in Base or elsewhere.
// Consequently during deserialization there's a distinction between "reference"
// types, methods, and method instances (essentially like a GlobalRef),
// and "recached" version that refer to the actual entity in the running session.
// We complete deserialization before beginning the process of recaching,
// because we need the backreferences during deserialization and the actual
// objects during recaching.
// It is also necessary to save & restore references to externally-defined
// objects, e.g., for package methods that call methods defined in Base or
// elsewhere. Consequently during deserialization there's a distinction between
// "reference" types, methods, and method instances (essentially like a
// GlobalRef), and "recached" version that refer to the actual entity in the
// running session. As a concrete example, types have a module in which they are
// defined, but once defined those types can be used by any dependent package.
// We don't store the full type definition again in that dependent package, we
// just encode a reference to that type. In the running session, such references
// are merely pointers to the type-cache, but the specific address is obviously
// not likely to be reproducible across sessions (it will differ between the
// session in which you precompile and the session in which you're using the
// package). Hence, during serialization we recode them as "verbose" references
// (that follow Julia syntax to allow them to be reconstructed), but on
// deserialization we have to replace those verbose references with the
// appropriate pointer in the user's running session. We complete
// deserialization before beginning the process of recaching, because we need
// the backreferences during deserialization and the actual objects during
// recaching.

// Finally, because our backedge graph is not bidirectional, special handling is
// required to identify backedges from external methods that call internal methods.
// These get set aside and restored at the end of deserialization.

// In broad terms, the major steps in serialization are:
// - starting from a "worklist" of modules, write the header. This stores things
// like the Julia build this was precompiled for, the package dependencies,
// the list of include files, file modification times, etc.
// - gather the collection of items to be written to this precompile file. This
// includes accessible from the module's binding table (if they are owned by a
// worklist module), but also includes things like methods added to external
// functions, instances of external methods that were newly type-inferred
// while precompiling a worklist module, and backedges of callees that were
// called by methods in this package. By and large, these latter items are not
// referenced by the module(s) in the package, and so these have to be
// extracted by traversing the entire system searching for things that do link
// back to a module in the worklist.
// - serialize all the items. The first time we encounter an item, we serialized
// it, and on future references (pointers) to that item we replace them with
// with a backreference. `jl_serialize_*` functions handle this work.
// - write source text for the files that defined the package. This is primarily
// to support Revise.jl.

// Deserialization is the mirror image of serialization, but in some ways is
// trickier:
// - we have to merge items into the running session (recaching as described
// above) and handle cases like having two dependent packages caching the same
// MethodInstance of a dependency
// - we have to check for invalidation---the user might have loaded other
// packages that define methods that supersede some of the dispatches chosen
// when the package was precompiled, or this package might define methods that
// supercede dispatches for previously-loaded packages. These two
// possibilities are checked during backedge and method insertion,
// respectively.
// Both of these mean that deserialization requires one to look up a lot of
// things in the running session; for example, for invalidation checks we have
// to do type-intersection between signatures used for MethodInstances and the
// current session's full MethodTable. In practice, such steps dominate package
// loading time (it has very little to do with I/O or deserialization
// performance). Paradoxically, sometimes storing more code in a package can
// lead to faster performance: references to things in the same .ji file can be
// precomputed, but external references have to be looked up. You can see this
// effect in the benchmarks for #43990, where storing external MethodInstances
// and CodeInstances (more code than was stored previously) actually decreased
// load times for many packages.

// Note that one should prioritize deserialization performance over serialization performance,
// since deserialization may be performed much more often than serialization.
// Certain items are preprocessed during serialization to save work when they are
// later deserialized.


// TODO: put WeakRefs on the weak_refs list during deserialization
Expand All @@ -69,9 +124,11 @@ static jl_value_t *deser_symbols[256];
// (the order in the serializer stream). the low
// bit is reserved for flagging certain entries and pos is
// left shift by 1
static htable_t backref_table;
static htable_t backref_table; // pos = backref_table[obj]
static int backref_table_numel;
static arraylist_t backref_list;
static arraylist_t backref_list; // obj = backref_list[pos]

// set of all CodeInstances yet to be (in)validated
static htable_t new_code_instance_validate;

// list of (jl_value_t **loc, size_t pos) entries
Expand All @@ -83,24 +140,35 @@ static arraylist_t flagref_list;
// like types, methods, and method instances
static htable_t uniquing_table;

// list of (size_t pos, (void *f)(jl_value_t*)) entries
// for the serializer to mark values in need of rework by function f
// list of (size_t pos, itemkey) entries
// for the serializer to mark values in need of rework
// during deserialization later
// This includes items that need rehashing (IdDict, TypeMapLevels)
// and modules.
static arraylist_t reinit_list;

// list of stuff that is being serialized
// list of modules being serialized
// This is not quite globally rooted, but we take care to only
// ever assigned rooted values here.
static jl_array_t *serializer_worklist JL_GLOBALLY_ROOTED;
// external MethodInstances we want to serialize
// The set of external MethodInstances we want to serialize
// (methods owned by other modules that were first inferred for a
// module currently being serialized)
static htable_t external_mis;
// Inference tracks newly-inferred MethodInstances during precompilation
// and registers them by calling jl_set_newly_inferred
static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED;

// New roots to add to Methods. These can't be added until after
// recaching is complete, so we have to hold on to them separately
// Stored as method => (worklist_key, roots)
// Stored as method => (worklist_key, newroots)
// The worklist_key is the uuid of the module that triggered addition
// of `newroots`. This is needed because CodeInstances reference
// their roots by "index", and we use a bipartite index
// (module_uuid, integer_index) to make indexes "relocatable"
// (meaning that users can load modules in different orders and
// so the absolute integer index of a root is not reproducible).
// See the "root blocks" section of method.c for more detail.
static htable_t queued_method_roots;

// inverse of backedges graph (caller=>callees hash)
Expand Down
45 changes: 45 additions & 0 deletions src/method.c
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,46 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,

// root blocks

// This section handles method roots. Roots are GC-preserved items needed to
// represent lowered, type-inferred, and/or compiled code. These items are
// stored in a flat list (`m.roots`), and during serialization and
// deserialization of code we replace C-pointers to these items with a
// relocatable reference. We use a bipartite reference, `(key, index)` pair,
// where `key` identifies the module that added the root and `index` numbers
// just those roots with the same `key`.
//
// During precompilation (serialization), we save roots that were added to
// methods that are tagged with this package's module-key, even for "external"
// methods not owned by a module currently being precompiled. During
// deserialization, we load the new roots and append them to the method. When
// code is deserialized (see ircode.c), we replace the bipartite reference with
// the pointer to the memory address in the current session. The bipartite
// reference allows us to cache both roots and references in precompilation .ji
// files using a naming scheme that is independent of which packages are loaded
// in arbitrary order.
//
// To track the module-of-origin for each root, methods also have a
// `root_blocks` field that uses run-length encoding (RLE) storing `key` and the
// (absolute) integer index within `roots` at which a block of roots with that
// key begins. This makes it possible to look up an individual `(key, index)`
// pair fairly efficiently. A given `key` may possess more than one block; the
// `index` continues to increment regardless of block boundaries.
//
// Roots with `key = 0` are considered to be of unknown origin, and
// CodeInstances referencing such roots will remain unserializable unless all
// such roots were added at the time of system image creation. To track this
// additional data, we use two fields:
//
// - methods have an `nroots_sysimg` field to count the number of roots defined
// at the time of writing the system image (such occur first in the list of
// roots). These are the cases with `key = 0` that do not prevent
// serialization.
// - CodeInstances have a `relocatability` field which when 1 indicates that
// every root is "safe," meaning it was either added at sysimg creation or is
// tagged with a non-zero `key`. Even a single unsafe root will cause this to
// have value 0.

// Get the key of the current (final) block of roots
static uint64_t current_root_id(jl_array_t *root_blocks)
{
if (!root_blocks)
Expand All @@ -1028,6 +1068,7 @@ static uint64_t current_root_id(jl_array_t *root_blocks)
return blocks[nx2-2];
}

// Add a new block of `len` roots with key `modid` (module id)
static void add_root_block(jl_array_t *root_blocks, uint64_t modid, size_t len)
{
assert(jl_is_array(root_blocks));
Expand All @@ -1038,6 +1079,7 @@ static void add_root_block(jl_array_t *root_blocks, uint64_t modid, size_t len)
blocks[nx2-1] = len;
}

// Allocate storage for roots
static void prepare_method_for_roots(jl_method_t *m, uint64_t modid)
{
if (!m->roots) {
Expand All @@ -1050,6 +1092,7 @@ static void prepare_method_for_roots(jl_method_t *m, uint64_t modid)
}
}

// Add a single root with owner `mod` to a method
JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root)
{
JL_GC_PUSH2(&m, &root);
Expand All @@ -1066,6 +1109,7 @@ JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_
JL_GC_POP();
}

// Add a list of roots with key `modid` to a method
void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots)
{
JL_GC_PUSH2(&m, &roots);
Expand Down Expand Up @@ -1105,6 +1149,7 @@ jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index)
return jl_array_ptr_ref(m->roots, i);
}

// Count the number of roots added by module with id `key`
int nroots_with_key(jl_method_t *m, uint64_t key)
{
size_t nroots = 0;
Expand Down
4 changes: 2 additions & 2 deletions test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,8 @@ precompile_test_harness(false) do dir
# the module doesn't reload from the image:
@test_warn "@ccallable was already defined for this method name" begin
@test_logs (:warn, "Replacing module `$Foo_module`") begin
ms = Base._require_from_serialized(Base.PkgId(Foo), cachefile)
@test isa(ms, Array{Any,1})
m = Base._require_from_serialized(Base.PkgId(Foo), cachefile)
@test isa(m, Module)
end
end

Expand Down

0 comments on commit b363bd5

Please sign in to comment.