Skip to content

Commit

Permalink
add --strip-metadata option (#42513)
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson committed Nov 2, 2021
1 parent e03ead0 commit d9b1a3c
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 9 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Compiler/Runtime improvements
Command-line option changes
---------------------------

* New option `--strip-metadata` to remove docstrings, source location information, and local
variable names when building a system image.

Multi-threading changes
-----------------------
Expand Down
6 changes: 3 additions & 3 deletions base/docs/Docs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ const META = gensym(:meta)
const METAType = IdDict{Any,Any}

function meta(m::Module)
if !isdefined(m, META)
if !isdefined(m, META) || getfield(m, META) === nothing
initmeta(m)
end
return getfield(m, META)::METAType
end

function initmeta(m::Module)
if !isdefined(m, META)
Core.eval(m, :(const $META = $(METAType())))
if !isdefined(m, META) || getfield(m, META) === nothing
Core.eval(m, :($META = $(METAType())))
push!(modules, m)
end
nothing
Expand Down
2 changes: 1 addition & 1 deletion base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ function _include_from_serialized(path::String, depmods::Vector{Any})
restored = sv[1]::Vector{Any}
for M in restored
M = M::Module
if isdefined(M, Base.Docs.META)
if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing
push!(Base.Docs.modules, M)
end
if parentmodule(M) === M
Expand Down
1 change: 1 addition & 0 deletions base/options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct JLOptions
warn_scope::Int8
image_codegen::Int8
rr_detach::Int8
strip_metadata::Int8
end

# This runs early in the sysimage != is not defined yet
Expand Down
8 changes: 5 additions & 3 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6864,7 +6864,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
};
std::vector<unsigned> current_lineinfo, new_lineinfo;
auto coverageVisitStmt = [&] (size_t dbg) {
if (dbg == 0)
if (dbg == 0 || dbg >= linetable.size())
return;
// Compute inlining stack for current line, inner frame first
while (dbg) {
Expand Down Expand Up @@ -6957,8 +6957,10 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
{ // coverage for the function definition line number
const auto &topinfo = linetable.at(0);
if (topinfo == linetable.at(1))
current_lineinfo.push_back(1);
if (linetable.size() > 1) {
if (topinfo == linetable.at(1))
current_lineinfo.push_back(1);
}
if (do_coverage(topinfo.is_user_code))
coverageVisitLine(ctx, topinfo.file, topinfo.line);
}
Expand Down
7 changes: 7 additions & 0 deletions src/jloptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ JL_DLLEXPORT void jl_init_options(void)
JL_OPTIONS_WARN_SCOPE_ON, // ambiguous scope warning
0, // image-codegen
0, // rr-detach
0, // strip-metadata
};
jl_options_initialized = 1;
}
Expand Down Expand Up @@ -163,6 +164,7 @@ static const char opts_hidden[] =
// compiler output options
" --output-o name Generate an object file (including system image data)\n"
" --output-ji name Generate a system image data file (.ji)\n"
" --strip-metadata Remove docstrings and source location info from system image\n"

// compiler debugging (see the devdocs for tips on using these options)
" --output-unopt-bc name Generate unoptimized LLVM bitcode (.bc)\n"
Expand Down Expand Up @@ -212,6 +214,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
opt_bug_report,
opt_image_codegen,
opt_rr_detach,
opt_strip_metadata,
};
static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
static const struct option longopts[] = {
Expand Down Expand Up @@ -265,6 +268,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
{ "lisp", no_argument, 0, 1 },
{ "image-codegen", no_argument, 0, opt_image_codegen },
{ "rr-detach", no_argument, 0, opt_rr_detach },
{ "strip-metadata", no_argument, 0, opt_strip_metadata },
{ 0, 0, 0, 0 }
};

Expand Down Expand Up @@ -689,6 +693,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
case opt_rr_detach:
jl_options.rr_detach = 1;
break;
case opt_strip_metadata:
jl_options.strip_metadata = 1;
break;
default:
jl_errorf("julia: unhandled option -- %c\n"
"This is a bug, please report it.", c);
Expand Down
1 change: 1 addition & 0 deletions src/jloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef struct {
int8_t warn_scope;
int8_t image_codegen;
int8_t rr_detach;
int8_t strip_metadata;
} jl_options_t;

#endif
95 changes: 93 additions & 2 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ static jl_typename_t *jl_idtable_typename = NULL;
static jl_value_t *jl_bigint_type = NULL;
static int gmp_limb_size = 0;

static jl_sym_t *jl_docmeta_sym = NULL;

enum RefTags {
DataRef,
ConstDataRef,
Expand Down Expand Up @@ -399,7 +401,10 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
jl_serialize_value(s, (jl_value_t*)table[i]);
jl_binding_t *b = (jl_binding_t*)table[i+1];
jl_serialize_value(s, b->name);
jl_serialize_value(s, jl_atomic_load_relaxed(&b->value));
if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
jl_serialize_value(s, jl_nothing);
else
jl_serialize_value(s, jl_atomic_load_relaxed(&b->value));
jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref));
jl_serialize_value(s, b->owner);
}
Expand Down Expand Up @@ -651,7 +656,10 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
record_gvar(s, jl_get_llvm_gv(native_functions, (jl_value_t*)b),
((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset);
write_pointerfield(s, (jl_value_t*)b->name);
write_pointerfield(s, jl_atomic_load_relaxed(&b->value));
if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
write_pointerfield(s, jl_nothing);
else
write_pointerfield(s, jl_atomic_load_relaxed(&b->value));
write_pointerfield(s, jl_atomic_load_relaxed(&b->globalref));
write_pointerfield(s, (jl_value_t*)b->owner);
size_t flag_offset = offsetof(jl_binding_t, owner) + sizeof(b->owner);
Expand Down Expand Up @@ -1519,6 +1527,81 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
}
}

static jl_value_t *strip_codeinfo(jl_method_t *m, jl_value_t *ci_, int isdef)
{
jl_code_info_t *ci = NULL;
JL_GC_PUSH1(&ci);
int compressed = 0;
if (!jl_is_code_info(ci_)) {
compressed = 1;
ci = jl_uncompress_ir(m, NULL, (jl_array_t*)ci_);
}
else {
ci = (jl_code_info_t*)ci_;
}
// leave codelocs length the same so the compiler can assume that; just zero it
memset(jl_array_data(ci->codelocs), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
// empty linetable
if (jl_is_array(ci->linetable))
jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable));
// replace slot names with `?`, except unused_sym since the compiler looks at it
jl_sym_t *questionsym = jl_symbol("?");
int i, l = jl_array_len(ci->slotnames);
for (i = 0; i < l; i++) {
jl_value_t *s = jl_array_ptr_ref(ci->slotnames, i);
if (s != (jl_value_t*)jl_unused_sym)
jl_array_ptr_set(ci->slotnames, i, questionsym);
}
if (isdef) {
m->slot_syms = jl_compress_argnames(ci->slotnames);
jl_gc_wb(m, m->slot_syms);
}
jl_value_t *ret = (jl_value_t*)ci;
if (compressed)
ret = (jl_value_t*)jl_compress_ir(m, ci);
JL_GC_POP();
return ret;
}

static void strip_specializations_(jl_method_instance_t *mi)
{
assert(jl_is_method_instance(mi));
jl_code_instance_t *codeinst = mi->cache;
while (codeinst) {
if (codeinst->inferred && codeinst->inferred != jl_nothing) {
codeinst->inferred = strip_codeinfo(mi->def.method, codeinst->inferred, 0);
jl_gc_wb(codeinst, codeinst->inferred);
}
codeinst = jl_atomic_load_relaxed(&codeinst->next);
}
}

static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
{
jl_method_t *m = def->func.method;
if (m->source) {
m->source = strip_codeinfo(m, m->source, 1);
jl_gc_wb(m, m->source);
}
jl_svec_t *specializations = def->func.method->specializations;
size_t i, l = jl_svec_len(specializations);
for (i = 0; i < l; i++) {
jl_value_t *mi = jl_svecref(specializations, i);
if (mi != jl_nothing)
strip_specializations_((jl_method_instance_t*)mi);
}
return 1;
}

static void strip_all_codeinfos_(jl_methtable_t *mt, void *_env)
{
jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL);
}

static void jl_strip_all_codeinfos(void)
{
jl_foreach_reachable_mtable(strip_all_codeinfos_, NULL);
}

// --- entry points ---

Expand All @@ -1527,6 +1610,8 @@ static void jl_cleanup_serializer2(void);

static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
{
if (jl_options.strip_metadata)
jl_strip_all_codeinfos();
jl_gc_collect(JL_GC_FULL);
jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers
JL_TIMING(SYSIMG_DUMP);
Expand Down Expand Up @@ -1570,6 +1655,12 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
jl_symbol("BITS_PER_LIMB"))) / 8;
}
if (jl_base_module) {
jl_value_t *docs = jl_get_global(jl_base_module, jl_symbol("Docs"));
if (docs && jl_is_module(docs)) {
jl_docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META"));
}
}

{ // step 1: record values (recursively) that need to go in the image
size_t i;
Expand Down

0 comments on commit d9b1a3c

Please sign in to comment.