Skip to content

Commit

Permalink
allow tasks to request dedicated stack space when created
Browse files Browse the repository at this point in the history
never copy over the root stack:
this is a hybrid approach to COPY_STACK where the root task is never
moved or copied, and all other task stacks are layered into the same
memory area (ptls->basestack + basesize)

several strategies exist for making new stacks:
ucontext_t (where it is available, aka linux)
unw_context_t (as an alternative to ucontext_t that avoids a syscall on task-switch)
makecontext (as a posix standard implemention)
setjmp/longjmp-based implementation (for systems where this is sufficient)
Windows Fibers (implemented here, since we can be more efficient and
    reliable than the official Fibers API)

also, uses an alternate stack for use in collecting stack-overflow backtraces
like posix, but managed manually
  • Loading branch information
vtjnash committed Sep 19, 2018
1 parent d28a7d5 commit 082d7d2
Show file tree
Hide file tree
Showing 26 changed files with 1,259 additions and 680 deletions.
4 changes: 3 additions & 1 deletion base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,9 @@ eval(Core, :(LineInfoNode(mod::Module, method::Symbol, file::Symbol, line::Int,

Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)

Task(@nospecialize(f)) = ccall(:jl_new_task, Ref{Task}, (Any, Int), f, 0)
function Task(@nospecialize(f), reserved_stack::Int=0)
return ccall(:jl_new_task, Ref{Task}, (Any, Int), f, reserved_stack)
end

# simple convert for use by constructors of types in Core
# note that there is no actual conversion defined here,
Expand Down
8 changes: 4 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ SRCS := \
jltypes gf typemap ast builtins module interpreter symbol \
dlload sys init task array dump staticdata toplevel jl_uv datatype \
simplevector APInt-C runtime_intrinsics runtime_ccall precompile \
threadgroup threading stackwalk gc gc-debug gc-pages method \
threadgroup threading stackwalk gc gc-debug gc-pages gc-stacks method \
jlapi signal-handling safepoint jloptions timing subtype rtutils \
crc32c processor

Expand Down Expand Up @@ -207,14 +207,14 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
$(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h

# archive library file rules
$(BUILDDIR)/support/libsupport.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'

$(BUILDDIR)/support/libsupport-debug.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'

$(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport.a
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a
$(MAKE) -C $(SRCDIR)/flisp BUILDDIR='$(abspath $(BUILDDIR)/flisp)'

$(FLISP_EXECUTABLE_debug): $(BUILDDIR)/flisp/libflisp-debug.a
Expand Down
10 changes: 5 additions & 5 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,11 +595,11 @@ static void gc_scrub_task(jl_task_t *ta)
#else
jl_task_t *thread_task = ptls2->root_task;
#endif
if (ta == thread_task)
gc_scrub_range(ptls2->stack_lo, ptls2->stack_hi);
if (ta->stkbuf == (void*)(intptr_t)(-1) || !ta->stkbuf)
return;
gc_scrub_range((char*)ta->stkbuf, (char*)ta->stkbuf + ta->ssize);
void *stkbuf = ta->stkbuf;
if (ta == thread_task && ptls->copy_stack)
gc_scrub_range(ptls2->stackbase, ptls2->stacksize);
else if (stkbuf)
gc_scrub_range((char*)stkbuf, (char*)stkbuf + ta->bufsz);
}

void gc_scrub(void)
Expand Down
1 change: 0 additions & 1 deletion src/gc-pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#ifndef _OS_WINDOWS_
# include <sys/resource.h>
#endif
#include "julia_assert.h"

#ifdef __cplusplus
extern "C" {
Expand Down
177 changes: 177 additions & 0 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "gc.h"
#ifndef _OS_WINDOWS_
# include <sys/resource.h>
#endif

const size_t jl_guard_size = (4096 * 16);

#ifdef _OS_WINDOWS_
#define MAP_FAILED NULL
static void *malloc_stack(size_t bufsz)
{
void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (stk == NULL)
return MAP_FAILED;
DWORD dwOldProtect;
if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
VirtualFree(stk, 0, MEM_RELEASE);
return MAP_FAILED;
}
return stk;
}


static void free_stack(void *stkbuf, size_t bufsz)
{
VirtualFree(stkbuf, 0, MEM_RELEASE);
}

#else

static void *malloc_stack(size_t bufsz)
{
void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (stk == MAP_FAILED)
return MAP_FAILED;
#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
// setup a guard page to detect stack overflow
if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
munmap(stk, bufsz);
return MAP_FAILED;
}
#endif
return stk;
}

static void free_stack(void *stkbuf, size_t bufsz)
{
munmap(stkbuf, bufsz);
}
#endif


const unsigned pool_sizes[] = {
128 * 1024,
192 * 1024,
256 * 1024,
384 * 1024,
512 * 1024,
768 * 1024,
1024 * 1024,
1537 * 1024,
2048 * 1024,
3 * 1024 * 1024,
4 * 1024 * 1024,
6 * 1024 * 1024,
8 * 1024 * 1024,
12 * 1024 * 1024,
16 * 1024 * 1024,
24 * 1024 * 1024,
};

static_assert(sizeof(pool_sizes) == JL_N_STACK_POOLS * sizeof(pool_sizes[0]), "JL_N_STACK_POOLS size mismatch");

static unsigned select_pool(size_t nb)
{
unsigned pool_id = 0;
while (pool_sizes[pool_id] < nb)
pool_id++;
return pool_id;
}


static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
{
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
return;
}
}
free_stack(stkbuf, bufsz);
}


JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
{
_jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz);
}


JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner)
{
jl_ptls_t ptls = jl_get_ptls_states();
size_t ssize = *bufsz;
void *stk = NULL;
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(ssize);
ssize = pool_sizes[pool_id];
arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
if (pool->len > 0) {
stk = arraylist_pop(pool);
}
}
else {
ssize = LLT_ALIGN(ssize, jl_page_size);
}
if (stk == NULL) {
// TODO: allocate blocks of stacks? but need to mprotect individually anyways
stk = malloc_stack(ssize);
if (stk == MAP_FAILED)
jl_throw(jl_memory_exception);
}
*bufsz = ssize;
if (owner) {
arraylist_t *live_tasks = &ptls->heap.live_tasks;
arraylist_push(live_tasks, owner);
}
return stk;
}

void sweep_stack_pools(void)
{
// TODO: deallocate stacks if we have too many sitting around unused
// for (stk in halfof(free_stacks))
// free_stack(stk, pool_sz);
// // then sweep the task stacks
// for (t in live_tasks)
// if (!gc-marked(t))
// stkbuf = t->stkbuf
// bufsz = t->bufsz
// if (stkbuf)
// push(free_stacks[sz], stkbuf)
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];
arraylist_t *live_tasks = &ptls2->heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
void **lst = live_tasks->items;
if (l == 0)
continue;
while (1) {
jl_task_t *t = (jl_task_t*)lst[n];
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
n++;
}
else {
ndel++;
void *stkbuf = t->stkbuf;
size_t bufsz = t->bufsz;
if (stkbuf) {
t->stkbuf = NULL;
_jl_free_stack(ptls2, stkbuf, bufsz);
}
}
if (n >= l - ndel)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n + ndel] = tmp;
}
live_tasks->len -= ndel;
}
}
35 changes: 15 additions & 20 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,

static void sweep_weak_refs(void)
{
for (int i = 0;i < jl_n_threads;i++) {
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];
size_t n = 0;
size_t ndel = 0;
Expand All @@ -710,7 +710,8 @@ static void sweep_weak_refs(void)
while (1) {
jl_weakref_t *wr = (jl_weakref_t*)lst[n];
if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
// weakref itself is alive
// weakref itself is alive,
// so the user could still re-set it to a new value
if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
wr->value = (jl_value_t*)jl_nothing;
n++;
Expand All @@ -722,7 +723,7 @@ static void sweep_weak_refs(void)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n+ndel] = tmp;
lst[n + ndel] = tmp;
}
ptls2->heap.weak_refs.len -= ndel;
}
Expand Down Expand Up @@ -1026,7 +1027,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
int freedall = 1;
int pg_skpd = 1;
if (!pg->has_marked) {
// lazy version: (empty) if the whole page was already unused, free it
// lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
// eager version: (freedall) free page as soon as possible
// the eager one uses less memory.
// FIXME - need to do accounting on a per-thread basis
Expand Down Expand Up @@ -2124,19 +2125,13 @@ mark: {
objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
jl_task_t *ta = (jl_task_t*)new_obj;
gc_scrub_record_task(ta);
int stkbuf = (ta->stkbuf != (void*)(intptr_t)-1 && ta->stkbuf != NULL);
void *stkbuf = ta->stkbuf;
int16_t tid = ta->tid;
jl_ptls_t ptls2 = jl_all_tls_states[tid];
if (stkbuf) {
#ifdef COPY_STACKS
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->bufsz);
#else
// stkbuf isn't owned by julia for the root task
if (ta != ptls2->root_task) {
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->ssize);
}
if (stkbuf && ta->copy_stack)
gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
#endif
}
jl_gcframe_t *s = NULL;
size_t nroots;
uintptr_t offset = 0;
Expand All @@ -2148,9 +2143,11 @@ mark: {
else if (stkbuf) {
s = ta->gcstack;
#ifdef COPY_STACKS
ub = (uintptr_t)ptls2->stackbase;
lb = ub - ta->ssize;
offset = (uintptr_t)ta->stkbuf - lb;
if (ta->copy_stack) {
ub = (uintptr_t)ptls2->stackbase;
lb = ub - ta->copy_stack;
offset = (uintptr_t)stkbuf - lb;
}
#endif
}
if (s) {
Expand Down Expand Up @@ -2278,10 +2275,6 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp)
if (jl_all_methods != NULL)
gc_mark_queue_obj(gc_cache, sp, jl_all_methods);

#ifndef COPY_STACKS
gc_mark_queue_obj(gc_cache, sp, jl_unprotect_stack_func);
#endif

// constants
gc_mark_queue_obj(gc_cache, sp, jl_typetype_type);
gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
Expand Down Expand Up @@ -2564,6 +2557,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, int full)
scanned_bytes = 0;
// 5. start sweeping
sweep_weak_refs();
sweep_stack_pools();
gc_sweep_other(ptls, sweep_full);
gc_scrub();
gc_verify_tags();
Expand Down Expand Up @@ -2687,6 +2681,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
p[i].newpages = NULL;
}
arraylist_new(&heap->weak_refs, 0);
arraylist_new(&heap->live_tasks, 0);
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
Expand Down
1 change: 1 addition & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, gc_mark_sp_t *sp);
void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp,
arraylist_t *list, size_t start);
void gc_mark_loop(jl_ptls_t ptls, gc_mark_sp_t sp);
void sweep_stack_pools(void);
void gc_debug_init(void);

extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];
Expand Down
Loading

0 comments on commit 082d7d2

Please sign in to comment.