From f5e5a73d4afcbef9e26906c7ad48047666fe16f8 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 4 Aug 2023 21:29:42 -0300 Subject: [PATCH] Add fallback if we have make a weird GC decision. (#50682) If something odd happens during GC (the PC goes to sleep) or a very big transient the heuristics might make a bad decision. What this PR implements is if we try to make our target more than double the one we had before we fallback to a more conservative method. This fixes the new issue @vtjnash found in https://github.com/JuliaLang/julia/issues/40644 for me. --- Make.inc | 6 ++++ src/gc-debug.c | 6 ++-- src/gc.c | 81 +++++++++++++++++++++++++++++++++++---------- test/cmdlineargs.jl | 4 +-- test/testenv.jl | 7 ++++ 5 files changed, 82 insertions(+), 22 deletions(-) diff --git a/Make.inc b/Make.inc index def5f40f97926..a772b5910069c 100644 --- a/Make.inc +++ b/Make.inc @@ -1461,6 +1461,12 @@ endef # Overridable in Make.user WINE ?= wine +ifeq ($(BINARY),32) +HEAPLIM := --heap-size-hint=1000M +else +HEAPLIM := +endif + # many of the following targets must be = not := because the expansion of the makefile functions (and $1) shouldn't happen until later ifeq ($(BUILD_OS), WINNT) # MSYS spawn = $(1) diff --git a/src/gc-debug.c b/src/gc-debug.c index c68abf7e40b8e..b7be75aed4cb9 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1410,17 +1410,17 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect if (!gc_logging_enabled) { return; } - jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n", + jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n", pause/1e6, freed/(double)(1<<20), full ? "full" : "incr", recollect ? "recollect" : "" ); - jl_safe_printf("Heap stats: bytes_mapped %.2f MB, heap_size %.2f MB, heap_target %.2f MB, live_bytes %.2f MB\n, Fragmentation %.3f", + jl_safe_printf("Heap stats: bytes_mapped %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n", jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20), + // live_bytes/(double)(1<<20), live byes tracking is not accurate. jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20), jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20), - live_bytes/(double)(1<<20), (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size) ); } diff --git a/src/gc.c b/src/gc.c index 69537c529da60..a8a6a9cd3c742 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1,6 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include "gc.h" +#include "julia.h" #include "julia_gcext.h" #include "julia_assert.h" #ifdef __GLIBC__ @@ -804,8 +805,8 @@ static uint64_t old_heap_size = 0; static uint64_t old_alloc_diff = 0; static uint64_t old_freed_diff = 0; static uint64_t gc_end_time = 0; - - +static int thrash_counter = 0; +static int thrashing = 0; // global variables for GC stats // Resetting the object to a young object, this is used when marking the @@ -1337,7 +1338,10 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc - free_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); } } } @@ -3533,9 +3537,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // If the live data outgrows the suggested max_total_memory // we keep going with minimum intervals and full gcs until // we either free some space or get an OOM error. - if (live_bytes > max_total_memory) { - sweep_full = 1; - } if (gc_sweep_always_full) { sweep_full = 1; } @@ -3569,8 +3570,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) uint64_t sweep_time = gc_end_time - start_sweep_time; gc_num.total_sweep_time += sweep_time; gc_num.sweep_time = sweep_time; - - int thrashing = 0; // maybe we should report this to the user or error out? + size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); double target_allocs = 0.0; double min_interval = default_collect_interval; @@ -3581,24 +3581,32 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) double collect_smooth_factor = 0.5; double tuning_factor = 0.03; double alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor); - double alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor); + double alloc_time = jl_gc_smooth(old_mut_time, mutator_time + sweep_time, alloc_smooth_factor); // Charge sweeping to the mutator double gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor); - double gc_time = jl_gc_smooth(old_pause_time, pause, collect_smooth_factor); + double gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor); old_alloc_diff = alloc_diff; old_mut_time = mutator_time; old_freed_diff = freed_diff; old_pause_time = pause; - old_heap_size = heap_size; - thrashing = gc_time > mutator_time * 98 ? 1 : 0; + old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC + if (gc_time > alloc_time * 95 && !(thrash_counter < 4)) + thrash_counter += 1; + else if (thrash_counter > 0) + thrash_counter -= 1; if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0 ) { double alloc_rate = alloc_mem/alloc_time; double gc_rate = gc_mem/gc_time; target_allocs = sqrt(((double)heap_size/min_interval * alloc_rate)/(gc_rate * tuning_factor)); // work on multiples of min interval } } - if (target_allocs == 0.0 || thrashing) // If we are thrashing go back to default - target_allocs = 2*sqrt((double)heap_size/min_interval); + if (thrashing == 0 && thrash_counter >= 3) + thrashing = 1; + else if (thrashing == 1 && thrash_counter <= 2) + thrashing = 0; // maybe we should report this to the user or error out? + int bad_result = (target_allocs*min_interval + heap_size) > 2 * jl_atomic_load_relaxed(&gc_heap_stats.heap_target); // Don't follow through on a bad decision + if (target_allocs == 0.0 || thrashing || bad_result) // If we are thrashing go back to default + target_allocs = 2*sqrt((double)heap_size/min_interval); uint64_t target_heap = (uint64_t)target_allocs*min_interval + heap_size; if (target_heap > max_total_memory && !thrashing) // Allow it to go over if we are thrashing if we die we die target_heap = max_total_memory; @@ -3852,10 +3860,10 @@ void jl_gc_init(void) total_mem = uv_get_total_memory(); uint64_t constrained_mem = uv_get_constrained_memory(); if (constrained_mem > 0 && constrained_mem < total_mem) - total_mem = constrained_mem; + jl_gc_set_max_memory(constrained_mem - 250*1024*1024); // LLVM + other libraries need some amount of memory #endif if (jl_options.heap_size_hint) - jl_gc_set_max_memory(jl_options.heap_size_hint); + jl_gc_set_max_memory(jl_options.heap_size_hint - 250*1024*1024); jl_gc_mark_sp_t sp = {NULL, NULL, NULL, NULL}; gc_mark_loop(NULL, sp); @@ -3959,7 +3967,26 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz-old); + + int64_t diff = sz - old; + if (diff < 0) { + uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); + if (free_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + (-diff)); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + (-diff))); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + } + } + else { + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); + if (alloc_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + } + } } return realloc(p, sz); } @@ -4076,7 +4103,27 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); jl_atomic_store_relaxed(&ptls->gc_num.realloc, jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); - jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz-oldsz); + + int64_t diff = allocsz - oldsz; + if (diff < 0) { + uint64_t free_acc = jl_atomic_load_relaxed(&ptls->gc_num.free_acc); + if (free_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, free_acc + (-diff)); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_acc + (-diff))); + jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0); + } + } + else { + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc); + if (alloc_acc + diff < 16*1024) + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc + diff); + else { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc + diff); + jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0); + } + } + int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 7f86534f923b3..ffe055dfbd8c9 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -858,6 +858,6 @@ end @test lines[3] == "foo" @test lines[4] == "bar" end -#heap-size-hint -@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "524288000" +#heap-size-hint, we reserve 250 MB for non GC memory (llvm, etc.) +@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$((500-250)*1024*1024)" end diff --git a/test/testenv.jl b/test/testenv.jl index 41706dd24e75e..07b81812bb078 100644 --- a/test/testenv.jl +++ b/test/testenv.jl @@ -37,6 +37,13 @@ if !@isdefined(testenv_defined) function addprocs_with_testenv(X; rr_allowed=true, kwargs...) exename = rr_allowed ? `$rr_exename $test_exename` : test_exename +<<<<<<< HEAD +======= + if X isa Integer + heap_size=round(Int,(Sys.free_memory()/(1024^2)/(X+1))) + push!(test_exeflags.exec, "--heap-size-hint=$(heap_size)M") + end +>>>>>>> ab94fadc9f (Add fallback if we have make a weird GC decision. (#50682)) addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...) end