Skip to content

Commit

Permalink
atexit: move hook before precompile output (#51849)
Browse files Browse the repository at this point in the history
To show how this is used, this updates the profile_printing_listener
background job to use this mechanism.
  • Loading branch information
vtjnash committed Nov 15, 2023
2 parents a26e23a + d8a410c commit 539ca89
Show file tree
Hide file tree
Showing 14 changed files with 142 additions and 82 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ New language features

Language changes
----------------
* During precompilation, the `atexit` hooks now run before saving the output file. This
allows users to safely tear down background state (such as closing Timers and sending
disconnect notifications to heartbeat tasks) and cleanup other resources when the program
wants to begin exiting.

Compiler/Runtime improvements
-----------------------------
Expand Down
31 changes: 22 additions & 9 deletions base/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ include("summarysize.jl")
include("errorshow.jl")

include("initdefs.jl")
Filesystem.__postinit__()

# worker threads
include("threadcall.jl")
Expand Down Expand Up @@ -594,14 +595,12 @@ if is_primary_base_module
# Profiling helper
# triggers printing the report and (optionally) saving a heap snapshot after a SIGINFO/SIGUSR1 profile request
# Needs to be in Base because Profile is no longer loaded on boot
const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
function profile_printing_listener()
function profile_printing_listener(cond::Base.AsyncCondition)
profile = nothing
try
while true
wait(PROFILE_PRINT_COND[])
profile = @something(profile, require(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))

while _trywait(cond)
# this call to require is mostly legal, only because Profile has no dependencies and is usually in LOAD_PATH
profile = @something(profile, require(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))::Module
invokelatest(profile.peek_report[])
if Base.get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
println(stderr, "Saving heap snapshot...")
Expand All @@ -614,10 +613,13 @@ function profile_printing_listener()
@error "Profile printing listener crashed" exception=ex,catch_backtrace()
end
end
nothing
end

function __init__()
# Base library init
global _atexit_hooks_finished = false
Filesystem.__postinit__()
reinit_stdio()
Multimedia.reinit_displays() # since Multimedia.displays uses stdout as fallback
# initialize loading
Expand All @@ -633,9 +635,20 @@ function __init__()
# triggering a profile via signals is not implemented on windows
cond = Base.AsyncCondition()
Base.uv_unref(cond.handle)
PROFILE_PRINT_COND[] = cond
ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
errormonitor(Threads.@spawn(profile_printing_listener()))
t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
atexit() do
# destroy this callback when exiting
ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
# this will prompt any ongoing or pending event to flush also
close(cond)
# error-propagation is not needed, since the errormonitor will handle printing that better
_wait(t)
end
finalizer(cond) do c
# if something goes south, still make sure we aren't keeping a reference in C to this
ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
end
ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
end
_require_world_age[] = get_world_counter()
# Prevent spawned Julia process from getting stuck waiting on Tracy to connect.
Expand Down
61 changes: 48 additions & 13 deletions base/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -521,37 +521,70 @@ const TEMP_CLEANUP = Dict{String,Bool}()
const TEMP_CLEANUP_LOCK = ReentrantLock()

function temp_cleanup_later(path::AbstractString; asap::Bool=false)
lock(TEMP_CLEANUP_LOCK)
@lock TEMP_CLEANUP_LOCK begin
# each path should only be inserted here once, but if there
# is a collision, let !asap win over asap: if any user might
# still be using the path, don't delete it until process exit
TEMP_CLEANUP[path] = get(TEMP_CLEANUP, path, true) & asap
if length(TEMP_CLEANUP) > TEMP_CLEANUP_MAX[]
temp_cleanup_purge()
temp_cleanup_purge_prelocked(false)
TEMP_CLEANUP_MAX[] = max(TEMP_CLEANUP_MIN[], 2*length(TEMP_CLEANUP))
end
unlock(TEMP_CLEANUP_LOCK)
return nothing
end
nothing
end

function temp_cleanup_purge(; force::Bool=false)
need_gc = Sys.iswindows()
for (path, asap) in TEMP_CLEANUP
function temp_cleanup_forget(path::AbstractString)
@lock TEMP_CLEANUP_LOCK delete!(TEMP_CLEANUP, path)
nothing
end

function temp_cleanup_purge_prelocked(force::Bool)
filter!(TEMP_CLEANUP) do (path, asap)
try
if (force || asap) && ispath(path)
need_gc && GC.gc(true)
need_gc = false
ispath(path) || return false
if force || asap
prepare_for_deletion(path)
rm(path, recursive=true, force=true)
end
!ispath(path) && delete!(TEMP_CLEANUP, path)
return ispath(path)
catch ex
@warn """
Failed to clean up temporary path $(repr(path))
$ex
""" _group=:file
ex isa InterruptException && rethrow()
return true
end
end
nothing
end

function temp_cleanup_purge_all()
may_need_gc = false
@lock TEMP_CLEANUP_LOCK filter!(TEMP_CLEANUP) do (path, asap)
try
ispath(path) || return false
may_need_gc = true
return true
catch ex
ex isa InterruptException && rethrow()
return true
end
end
if may_need_gc
# this is only usually required on Sys.iswindows(), but may as well do it everywhere
GC.gc(true)
end
@lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(true)
nothing
end

# deprecated internal function used by some packages
temp_cleanup_purge(; force=false) = force ? temp_cleanup_purge_all() : @lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(false)

function __postinit__()
Base.atexit(temp_cleanup_purge_all)
end

const temp_prefix = "jl_"
Expand Down Expand Up @@ -733,10 +766,11 @@ temporary file upon completion.
See also: [`mktempdir`](@ref).
"""
function mktemp(fn::Function, parent::AbstractString=tempdir())
(tmp_path, tmp_io) = mktemp(parent, cleanup=false)
(tmp_path, tmp_io) = mktemp(parent)
try
fn(tmp_path, tmp_io)
finally
temp_cleanup_forget(tmp_path)
try
close(tmp_io)
ispath(tmp_path) && rm(tmp_path)
Expand All @@ -761,10 +795,11 @@ See also: [`mktemp`](@ref), [`mkdir`](@ref).
"""
function mktempdir(fn::Function, parent::AbstractString=tempdir();
prefix::AbstractString=temp_prefix)
tmpdir = mktempdir(parent; prefix=prefix, cleanup=false)
tmpdir = mktempdir(parent; prefix=prefix)
try
fn(tmpdir)
finally
temp_cleanup_forget(tmpdir)
try
if ispath(tmpdir)
prepare_for_deletion(tmpdir)
Expand Down
10 changes: 4 additions & 6 deletions base/initdefs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -367,9 +367,7 @@ end

## atexit: register exit hooks ##

const atexit_hooks = Callable[
() -> Filesystem.temp_cleanup_purge(force=true)
]
const atexit_hooks = Callable[]
const _atexit_hooks_lock = ReentrantLock()
global _atexit_hooks_finished::Bool = false

Expand Down Expand Up @@ -413,7 +411,7 @@ function _atexit(exitcode::Cint)
# will immediately run here.
while true
local f
Base.@lock _atexit_hooks_lock begin
@lock _atexit_hooks_lock begin
# If this is the last iteration, atomically disable atexit hooks to prevent
# someone from registering a hook that will never be run.
# (We do this inside the loop, so that it is atomic: no one can have registered
Expand All @@ -434,7 +432,7 @@ function _atexit(exitcode::Cint)
end
catch ex
showerror(stderr, ex)
Base.show_backtrace(stderr, catch_backtrace())
show_backtrace(stderr, catch_backtrace())
println(stderr)
end
end
Expand All @@ -454,7 +452,7 @@ function _postoutput()
f()
catch ex
showerror(stderr, ex)
Base.show_backtrace(stderr, catch_backtrace())
show_backtrace(stderr, catch_backtrace())
println(stderr)
end
end
Expand Down
4 changes: 0 additions & 4 deletions contrib/generate_precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,6 @@ end

generate_precompile_statements()

# As a last step in system image generation,
# remove some references to build time environment for a more reproducible build.
Base.Filesystem.temp_cleanup_purge(force=true)

let stdout = Ref{IO}(stdout)
Base.PROGRAM_FILE = ""
Sys.BINDIR = ""
Expand Down
30 changes: 16 additions & 14 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,25 +246,16 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER

jl_task_t *ct = jl_get_current_task();

if (ct) {
if (exitcode == 0)
jl_write_compiler_output();
if (ct == NULL && jl_base_module) {
ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
}
else if (ct != NULL) {
// we are about to start tearing everything down, so lets try not to get
// upset by the local mess of things when we run the user's _atexit hooks
// this also forces us into a GC-unsafe region without a safepoint
jl_task_frame_noreturn(ct);
}

if (ct == NULL && jl_base_module)
ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
else if (ct != NULL)
jl_gc_safepoint_(ct->ptls);

jl_print_gc_stats(JL_STDERR);
if (jl_options.code_coverage)
jl_write_coverage_data(jl_options.output_code_coverage);
if (jl_options.malloc_log)
jl_write_malloc_log();
}

if (jl_base_module) {
jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit"));
Expand All @@ -290,6 +281,15 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
}
}

if (ct && exitcode == 0)
jl_write_compiler_output();

jl_print_gc_stats(JL_STDERR);
if (jl_options.code_coverage)
jl_write_coverage_data(jl_options.output_code_coverage);
if (jl_options.malloc_log)
jl_write_malloc_log();

// replace standard output streams with something that we can still print to
// after the finalizers from base/stream.jl close the TTY
JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
Expand Down Expand Up @@ -710,6 +710,7 @@ extern jl_mutex_t jl_modules_mutex;
extern jl_mutex_t precomp_statement_out_lock;
extern jl_mutex_t newly_inferred_mutex;
extern jl_mutex_t global_roots_lock;
extern jl_mutex_t profile_show_peek_cond_lock;

static void restore_fp_env(void)
{
Expand All @@ -729,6 +730,7 @@ static void init_global_mutexes(void) {
JL_MUTEX_INIT(&global_roots_lock, "global_roots_lock");
JL_MUTEX_INIT(&jl_codegen_lock, "jl_codegen_lock");
JL_MUTEX_INIT(&typecache_lock, "typecache_lock");
JL_MUTEX_INIT(&profile_show_peek_cond_lock, "profile_show_peek_cond_lock");
}

JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
Expand Down
4 changes: 1 addition & 3 deletions src/jl_uv.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,10 @@ void jl_wait_empty_begin(void)
}
JL_UV_UNLOCK();
}

void jl_wait_empty_end(void)
{
JL_UV_LOCK();
// n.b. caller must be holding jl_uv_mutex
uv_close((uv_handle_t*)&wait_empty_worker, NULL);
JL_UV_UNLOCK();
}


Expand Down
1 change: 0 additions & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
// Returns time in nanosec
JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT;

JL_DLLEXPORT void jl_set_peek_cond(uintptr_t);
JL_DLLEXPORT double jl_get_profile_peek_duration(void);
JL_DLLEXPORT void jl_set_profile_peek_duration(double);

Expand Down
5 changes: 5 additions & 0 deletions src/partr.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,9 +357,14 @@ void jl_task_wait_empty(void)
ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
if (f)
jl_apply_generic(f, NULL, 0);
// we are back from jl_task_get_next now
ct->world_age = lastage;
wait_empty = NULL;
// TODO: move this lock acquire-release pair to the caller, so that we ensure new work
// (from uv_unref objects) didn't unexpectedly get scheduled and start running behind our back
JL_UV_LOCK();
jl_wait_empty_end();
JL_UV_UNLOCK();
}
}

Expand Down
25 changes: 19 additions & 6 deletions src/signal-handling.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,21 +285,27 @@ void jl_set_profile_peek_duration(double t)
profile_peek_duration = t;
}

uintptr_t profile_show_peek_cond_loc;
JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond)
jl_mutex_t profile_show_peek_cond_lock;
static uv_async_t *profile_show_peek_cond_loc;
JL_DLLEXPORT void jl_set_peek_cond(uv_async_t *cond)
{
JL_LOCK_NOGC(&profile_show_peek_cond_lock);
profile_show_peek_cond_loc = cond;
JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
}

static void jl_check_profile_autostop(void)
{
if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) {
if (profile_show_peek_cond_loc != NULL && profile_autostop_time != -1.0 && jl_hrtime() > profile_autostop_time) {
profile_autostop_time = -1.0;
jl_profile_stop_timer();
jl_safe_printf("\n==============================================================\n");
jl_safe_printf("Profile collected. A report will print at the next yield point\n");
jl_safe_printf("==============================================================\n\n");
uv_async_send((uv_async_t*)profile_show_peek_cond_loc);
JL_LOCK_NOGC(&profile_show_peek_cond_lock);
if (profile_show_peek_cond_loc != NULL)
uv_async_send(profile_show_peek_cond_loc);
JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
}
}

Expand Down Expand Up @@ -425,11 +431,18 @@ void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT
ct->gcstack = NULL;
ct->eh = NULL;
ct->world_age = 1;
ct->ptls->locks.len = 0;
// Force all locks to drop. Is this a good idea? Of course not. But the alternative would probably deadlock instead of crashing.
small_arraylist_t *locks = &ct->ptls->locks;
for (size_t i = locks->len; i > 0; i--)
jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
locks->len = 0;
ct->ptls->in_pure_callback = 0;
ct->ptls->in_finalizer = 0;
ct->ptls->defer_signal = 0;
jl_atomic_store_release(&ct->ptls->gc_state, 0); // forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
// forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
jl_atomic_store_release(&ct->ptls->gc_state, 0);
// allow continuing to use a Task that should have already died--unsafe necromancy!
jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
}
}

Expand Down
Loading

0 comments on commit 539ca89

Please sign in to comment.