Skip to content

Commit

Permalink
make GC counters thread-local (JuliaLang#32217)
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson committed Jun 11, 2019
1 parent 5ca47e0 commit 5335a94
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 42 deletions.
4 changes: 4 additions & 0 deletions src/atomics.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
__atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
# define jl_atomic_fetch_add(obj, arg) \
__atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST)
# define jl_atomic_add_fetch(obj, arg) \
__atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST)
# define jl_atomic_fetch_and_relaxed(obj, arg) \
__atomic_fetch_and(obj, arg, __ATOMIC_RELAXED)
# define jl_atomic_fetch_and(obj, arg) \
Expand Down Expand Up @@ -91,6 +93,8 @@
__atomic_load_n(obj, __ATOMIC_SEQ_CST)
# define jl_atomic_load_acquire(obj) \
__atomic_load_n(obj, __ATOMIC_ACQUIRE)
# define jl_atomic_load_relaxed(obj) \
__atomic_load_n(obj, __ATOMIC_RELAXED)
#elif defined(_COMPILER_MICROSOFT_)
# define jl_signal_fence() _ReadWriteBarrier()

Expand Down
131 changes: 90 additions & 41 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,19 @@ static size_t max_collect_interval = 1250000000UL;
static size_t max_collect_interval = 500000000UL;
#endif

// determine how often the given thread should atomically update
// the global allocation counter.
// NOTE: currently the same for all threads.
static int64_t per_thread_counter_interval(jl_ptls_t ptls)
{
if (jl_n_threads == 1)
return gc_num.interval;
size_t intvl = gc_num.interval / jl_n_threads / 2;
if (intvl < 1048576)
return 1048576;
return intvl;
}

// global variables for GC stats

// Resetting the object to a young object, this is used when marking the
Expand Down Expand Up @@ -802,16 +815,21 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT
jl_gc_queue_root(v);
}

#define should_collect() (__unlikely(gc_num.allocd>0))

static inline int maybe_collect(jl_ptls_t ptls)
static inline void maybe_collect(jl_ptls_t ptls)
{
if (should_collect() || gc_debug_check_other()) {
int should_collect = 0;
if (ptls->gc_num.allocd >= 0) {
int64_t intvl = per_thread_counter_interval(ptls);
size_t localbytes = ptls->gc_num.allocd + intvl;
ptls->gc_num.allocd = -intvl;
should_collect = (jl_atomic_add_fetch(&gc_num.allocd, localbytes) >= 0);
}
if (should_collect || gc_debug_check_other()) {
jl_gc_collect(0);
return 1;
}
jl_gc_safepoint_(ptls);
return 0;
else {
jl_gc_safepoint_(ptls);
}
}

// weak references
Expand Down Expand Up @@ -876,12 +894,8 @@ JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
jl_throw(jl_memory_exception);
gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t,
gc_cblist_notify_external_alloc, (v, allocsz));
#ifdef JULIA_ENABLE_THREADING
jl_atomic_fetch_add(&gc_num.allocd, allocsz);
#else
gc_num.allocd += allocsz;
#endif
gc_num.bigalloc++;
ptls->gc_num.allocd += allocsz;
ptls->gc_num.bigalloc++;
#ifdef MEMDEBUG
memset(v, 0xee, allocsz);
#endif
Expand Down Expand Up @@ -973,14 +987,44 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT

void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
{
gc_num.allocd += sz;
jl_ptls_t ptls = jl_get_ptls_states();
ptls->gc_num.allocd += sz;
}

static void combine_thread_gc_counts(jl_gc_num_t *dest)
{
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls = jl_all_tls_states[i];
if (ptls) {
dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + per_thread_counter_interval(ptls));
dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed);
dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc);
dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc);
dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc);
dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall);
}
}
}

static void reset_thread_gc_counts(void)
{
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls = jl_all_tls_states[i];
if (ptls) {
memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t));
ptls->gc_num.allocd = -per_thread_counter_interval(ptls);
}
}
}

void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
{
combine_thread_gc_counts(&gc_num);
live_bytes += (gc_num.deferred_alloc + (gc_num.allocd + gc_num.interval));
gc_num.allocd = -(int64_t)gc_num.interval;
gc_num.deferred_alloc = 0;
reset_thread_gc_counts();
}

static size_t array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -1098,16 +1142,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
#ifdef MEMDEBUG
return jl_gc_big_alloc(ptls, osize);
#endif
// FIXME - need JL_ATOMIC_FETCH_AND_ADD here
if (__unlikely((gc_num.allocd += osize) >= 0) || gc_debug_check_pool()) {
//gc_num.allocd -= osize;
jl_gc_collect(0);
//gc_num.allocd += osize;
}
else {
jl_gc_safepoint_(ptls);
}
gc_num.poolalloc++;
maybe_collect(ptls);
ptls->gc_num.allocd += osize;
ptls->gc_num.poolalloc++;
// first try to use the freelist
jl_taggedvalue_t *v = p->freelist;
if (v) {
Expand Down Expand Up @@ -2603,17 +2640,21 @@ JL_DLLEXPORT int jl_gc_is_enabled(void)

JL_DLLEXPORT int64_t jl_gc_total_bytes(void)
{
jl_gc_num_t num = gc_num;
combine_thread_gc_counts(&num);
// Sync this logic with `base/util.jl:GC_Diff`
return (gc_num.total_allocd + gc_num.deferred_alloc +
gc_num.allocd + gc_num.interval);
return (num.total_allocd + num.deferred_alloc +
num.allocd + num.interval);
}
JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
{
return gc_num.total_time;
}
JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
{
return gc_num;
jl_gc_num_t num = gc_num;
combine_thread_gc_counts(&num);
return num;
}

JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void)
Expand Down Expand Up @@ -2687,6 +2728,8 @@ static void jl_gc_queue_bt_buf(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp
// Only one thread should be running in this function
static int _jl_gc_collect(jl_ptls_t ptls, int full)
{
combine_thread_gc_counts(&gc_num);

jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
jl_gc_mark_sp_t sp;
gc_mark_sp_init(gc_cache, &sp);
Expand Down Expand Up @@ -2853,6 +2896,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, int full)
gc_num.total_time += pause;
gc_num.since_sweep = 0;
gc_num.freed = 0;
reset_thread_gc_counts();

return recollect;
}
Expand Down Expand Up @@ -2962,6 +3006,10 @@ void jl_init_thread_heap(jl_ptls_t ptls)
gc_cache->pc_stack = (void**)malloc(init_size * sizeof(void*));
gc_cache->pc_stack_end = gc_cache->pc_stack + init_size;
gc_cache->data_stack = (jl_gc_mark_data_t *)malloc(init_size * sizeof(jl_gc_mark_data_t));

memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t));
assert(gc_num.interval == default_collect_interval);
ptls->gc_num.allocd = -per_thread_counter_interval(ptls);
}

// System-wide initializations
Expand Down Expand Up @@ -2999,8 +3047,8 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
{
jl_ptls_t ptls = jl_get_ptls_states();
maybe_collect(ptls);
gc_num.allocd += sz;
gc_num.malloc++;
ptls->gc_num.allocd += sz;
ptls->gc_num.malloc++;
void *b = malloc(sz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -3011,8 +3059,8 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
{
jl_ptls_t ptls = jl_get_ptls_states();
maybe_collect(ptls);
gc_num.allocd += nm*sz;
gc_num.malloc++;
ptls->gc_num.allocd += nm*sz;
ptls->gc_num.malloc++;
void *b = calloc(nm, sz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -3021,9 +3069,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)

JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
{
jl_ptls_t ptls = jl_get_ptls_states();
free(p);
gc_num.freed += sz;
gc_num.freecall++;
ptls->gc_num.freed += sz;
ptls->gc_num.freecall++;
}

// older name for jl_gc_counted_free_with_size
Expand All @@ -3037,10 +3086,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
jl_ptls_t ptls = jl_get_ptls_states();
maybe_collect(ptls);
if (sz < old)
gc_num.freed += (old - sz);
ptls->gc_num.freed += (old - sz);
else
gc_num.allocd += (sz - old);
gc_num.realloc++;
ptls->gc_num.allocd += (sz - old);
ptls->gc_num.realloc++;
void *b = realloc(p, sz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand Down Expand Up @@ -3100,8 +3149,8 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
if (allocsz < sz) // overflow in adding offs, size was "negative"
jl_throw(jl_memory_exception);
gc_num.allocd += allocsz;
gc_num.malloc++;
ptls->gc_num.allocd += allocsz;
ptls->gc_num.malloc++;
void *b = malloc_cache_align(allocsz);
if (b == NULL)
jl_throw(jl_memory_exception);
Expand All @@ -3123,10 +3172,10 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
live_bytes += allocsz - oldsz;
}
else if (allocsz < oldsz)
gc_num.freed += (oldsz - allocsz);
ptls->gc_num.freed += (oldsz - allocsz);
else
gc_num.allocd += (allocsz - oldsz);
gc_num.realloc++;
ptls->gc_num.allocd += (allocsz - oldsz);
ptls->gc_num.realloc++;

void *b;
if (isaligned)
Expand Down
3 changes: 2 additions & 1 deletion src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -742,10 +742,11 @@ void _julia_init(JL_IMAGE_SEARCH rel)
jl_error("cannot generate code-coverage or track allocation information while generating a .o or .bc output file");
}

jl_gc_init();

jl_init_threading();
jl_init_intrinsic_properties();

jl_gc_init();
jl_gc_enable(0);

jl_resolve_sysimg_location(rel);
Expand Down
11 changes: 11 additions & 0 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ typedef struct {
uint16_t osize; // size of objects in this pool
} jl_gc_pool_t;

typedef struct {
int64_t allocd;
int64_t freed;
uint64_t malloc;
uint64_t realloc;
uint64_t poolalloc;
uint64_t bigalloc;
uint64_t freecall;
} jl_thread_gc_num_t;

typedef struct {
// variable for tracking weak references
arraylist_t weak_refs;
Expand Down Expand Up @@ -156,6 +166,7 @@ struct _jl_tls_states_t {
volatile int8_t in_finalizer;
int8_t disable_gc;
jl_thread_heap_t heap;
jl_thread_gc_num_t gc_num;
uv_mutex_t sleep_lock;
uv_cond_t wake_signal;
volatile sig_atomic_t defer_signal;
Expand Down

0 comments on commit 5335a94

Please sign in to comment.