diff --git a/base/base.jl b/base/base.jl index daf59b0f9433a..7f67b265d34e4 100644 --- a/base/base.jl +++ b/base/base.jl @@ -154,7 +154,7 @@ end finalize(o::ANY) = ccall(:jl_finalize, Void, (Any,), o) -gc() = ccall(:jl_gc_collect, Void, ()) +gc(full = true) = ccall(:jl_gc_collect, Void, (Int,), full ? 1 : 0) gc_enable() = ccall(:jl_gc_enable, Void, ()) gc_disable() = ccall(:jl_gc_disable, Void, ()) diff --git a/base/util.jl b/base/util.jl index af7ac1495332e..4834518acfff2 100644 --- a/base/util.jl +++ b/base/util.jl @@ -12,6 +12,9 @@ gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ()) # total number of bytes allocated so far gc_bytes() = ccall(:jl_gc_total_bytes, Int64, ()) +gc_num_pause() = ccall(:jl_gc_num_pause, Int64, ()) +gc_num_full_sweep() = ccall(:jl_gc_num_full_sweep, Int64, ()) + function tic() t0 = time_ns() task_local_storage(:TIMERS, (t0, get(task_local_storage(), :TIMERS, ()))) @@ -36,12 +39,17 @@ function toc() end # print elapsed time, return expression value - -function time_print(t, b, g) +const _units = ["bytes", "kB", "MB"] +function time_print(t, b, g, np, nfs) + i = 1 + while b > 1024 && i < length(_units) + b = div(b, 1024) + i += 1 + end if 0 < g - @printf("elapsed time: %s seconds (%d bytes allocated, %.2f%% gc time)\n", t/1e9, b, 100*g/t) + @printf("elapsed time: %s seconds (%d %s allocated, %.2f%% gc time in %d pauses with %d full sweep)\n", t/1e9, b, _units[i], 100*g/t, np, nfs) else - @printf("elapsed time: %s seconds (%d bytes allocated)\n", t/1e9, b) + @printf("elapsed time: %s seconds (%d %s allocated)\n", t/1e9, b, _units[i]) end end @@ -50,11 +58,15 @@ macro time(ex) local b0 = gc_bytes() local t0 = time_ns() local g0 = gc_time_ns() + local n0 = gc_num_pause() + local nfs0 = gc_num_full_sweep() local val = $(esc(ex)) + local nfs1 = gc_num_full_sweep() + local n1 = gc_num_pause() local g1 = gc_time_ns() local t1 = time_ns() local b1 = gc_bytes() - time_print(t1-t0, b1-b0, g1-g0) + time_print(t1-t0, b1-b0, g1-g0, n1-n0, nfs1-nfs0) val end end diff --git a/doc/manual/embedding.rst b/doc/manual/embedding.rst index 31d590f07201e..92416b125c9db 100644 --- a/doc/manual/embedding.rst +++ b/doc/manual/embedding.rst @@ -107,6 +107,23 @@ Several Julia values can be pushed at once using the ``JL_GC_PUSH2`` , ``JL_GC_P // Do something with args (e.g. call jl_... functions) JL_GC_POP(); +The garbage collector also operates under the assumption that it is aware of every old-generation object pointing to a young-generation one. Any time a pointer is updated breaking that assumption, it must be signaled to the collector with the ``gc_wb`` (write barrier) function like so:: + + jl_value_t *parent = some_old_value, *child = some_young_value; + ((some_specific_type*)parent)->field = child; + gc_wb(parent, child); + +It is in general impossible to predict which values will be old at runtime, so the write barrier must be inserted after all explicit stores. One notable exception is if the ``parent`` object was just allocated and garbage collection was not run since then. Remember that most ``jl_...`` functions can sometimes invoke garbage collection. + +The write barrier is also necessary for arrays of pointers when updating their data directly. For example:: + + jl_array_t *some_array = ...; // e.g. a Vector{Any} + void **data = (void**)jl_array_data(some_array); + jl_value_t *some_value = ...; + data[0] = some_value; + gc_wb(some_array, some_value); + + Manipulating the Garbage Collector --------------------------------------------------- diff --git a/src/alloc.c b/src/alloc.c index a905422062fab..0b47e89de6bfd 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -278,6 +278,7 @@ void jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs) size_t offs = jl_field_offset(st,i) + sizeof(void*); if (st->fields[i].isptr) { *(jl_value_t**)((char*)v + offs) = rhs; + if(rhs != NULL) gc_wb(v, rhs); } else { jl_assign_bits((char*)v + offs, rhs); @@ -521,7 +522,7 @@ static jl_sym_t *mk_symbol(const char *str) static void unmark_symbols_(jl_sym_t *root) { while (root != NULL) { - root->type = (jl_value_t*)(((uptrint_t)root->type)&~1UL); + root->type = (jl_value_t*)(((uptrint_t)root->type)&~3UL); unmark_symbols_(root->left); root = root->right; } @@ -529,9 +530,10 @@ static void unmark_symbols_(jl_sym_t *root) void jl_unmark_symbols(void) { unmark_symbols_(symtab); } -static jl_sym_t **symtab_lookup(jl_sym_t **ptree, const char *str) +static jl_sym_t **symtab_lookup(jl_sym_t **ptree, const char *str, jl_sym_t **parent) { int x; + if (parent != NULL) *parent = NULL; uptrint_t h = hash_symbol(str, strlen(str)); // Tree nodes sorted by major key of (int(hash)) and minor key o (str). @@ -542,6 +544,7 @@ static jl_sym_t **symtab_lookup(jl_sym_t **ptree, const char *str) if (x == 0) return ptree; } + if (parent != NULL) *parent = *ptree; if (x < 0) ptree = &(*ptree)->left; else @@ -553,16 +556,19 @@ static jl_sym_t **symtab_lookup(jl_sym_t **ptree, const char *str) jl_sym_t *jl_symbol(const char *str) { jl_sym_t **pnode; - - pnode = symtab_lookup(&symtab, str); - if (*pnode == NULL) + jl_sym_t *parent; + pnode = symtab_lookup(&symtab, str, &parent); + if (*pnode == NULL) { *pnode = mk_symbol(str); + if (parent != NULL) + gc_wb(parent, *pnode); + } return *pnode; } jl_sym_t *jl_symbol_lookup(const char *str) { - return *symtab_lookup(&symtab, str); + return *symtab_lookup(&symtab, str, NULL); } DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, int32_t len) @@ -696,12 +702,15 @@ jl_datatype_t *jl_new_datatype(jl_sym_t *name, jl_datatype_t *super, t = jl_new_uninitialized_datatype(jl_tuple_len(fnames)); else tn = t->name; - // init before possibly calling jl_new_typename t->super = super; + if(super != NULL) gc_wb(t, t->super); t->parameters = parameters; + gc_wb(t, t->parameters); t->names = fnames; + gc_wb(t, t->names); t->types = ftypes; + if(ftypes != NULL) gc_wb(t, t->types); t->abstract = abstract; t->mutabl = mutabl; t->pointerfree = 0; @@ -718,10 +727,13 @@ jl_datatype_t *jl_new_datatype(jl_sym_t *name, jl_datatype_t *super, else tn = jl_new_typename((jl_sym_t*)name); t->name = tn; + gc_wb(t, t->name); } - if (t->name->primary == NULL) + if (t->name->primary == NULL) { t->name->primary = (jl_value_t*)t; + gc_wb(t->name, t); + } if (abstract || jl_tuple_len(parameters) > 0) { t->uid = 0; diff --git a/src/array.c b/src/array.c index 98347cdcf8169..d6f80a720f3f6 100644 --- a/src/array.c +++ b/src/array.c @@ -99,6 +99,7 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, memset(data, 0, tot); JL_GC_POP(); } + a->pooled = tsz <= 2048; a->data = data; if (elsz == 1) ((char*)data)[tot-1] = '\0'; @@ -147,8 +148,10 @@ jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, jl_tuple_t *di size_t ndims = jl_tuple_len(dims); int ndimwords = jl_array_ndimwords(ndims); - a = (jl_array_t*)allocobj((sizeof(jl_array_t) + sizeof(void*) + ndimwords*sizeof(size_t) + 15)&-16); + int tsz = (sizeof(jl_array_t) + sizeof(void*) + ndimwords*sizeof(size_t) + 15)&-16; + a = (jl_array_t*)allocobj(tsz); a->type = atype; + a->pooled = tsz <= 2048; a->ndims = ndims; a->offset = 0; a->data = NULL; @@ -211,8 +214,9 @@ jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, size_t nel, elsz = jl_datatype_size(el_type); else elsz = sizeof(void*); - - a = (jl_array_t*)allocobj((sizeof(jl_array_t)+jl_array_ndimwords(1)*sizeof(size_t)+15)&-16); + int tsz = (sizeof(jl_array_t)+jl_array_ndimwords(1)*sizeof(size_t)+15)&-16; + a = (jl_array_t*)allocobj(tsz); + a->pooled = tsz <= 2048; a->type = atype; a->data = data; #ifdef STORE_ARRAY_LEN @@ -226,6 +230,7 @@ jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, size_t nel, if (own_buffer) { a->how = 2; jl_gc_track_malloced_array(a); + jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0)); } else { a->how = 0; @@ -260,7 +265,9 @@ jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, jl_tuple_t *dims, elsz = sizeof(void*); int ndimwords = jl_array_ndimwords(ndims); - a = (jl_array_t*)allocobj((sizeof(jl_array_t) + ndimwords*sizeof(size_t)+15)&-16); + int tsz = (sizeof(jl_array_t) + ndimwords*sizeof(size_t)+15)&-16; + a = (jl_array_t*)allocobj(tsz); + a->pooled = tsz <= 2048; a->type = atype; a->data = data; #ifdef STORE_ARRAY_LEN @@ -275,6 +282,7 @@ jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, jl_tuple_t *dims, if (own_buffer) { a->how = 2; jl_gc_track_malloced_array(a); + jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0)); } else { a->how = 0; @@ -502,6 +510,7 @@ void jl_arrayset(jl_array_t *a, jl_value_t *rhs, size_t i) } else { ((jl_value_t**)a->data)[i] = rhs; + gc_wb(a, rhs); } } @@ -545,7 +554,7 @@ static void array_resize_buffer(jl_array_t *a, size_t newlen, size_t oldlen, siz if (a->how == 2) { // already malloc'd - use realloc newdata = (char*)jl_gc_managed_realloc((char*)a->data - oldoffsnb, nbytes, - oldnbytes+oldoffsnb, a->isaligned); + oldnbytes+oldoffsnb, a->isaligned, (jl_value_t*)a); if (offs != a->offset) { memmove(&newdata[offsnb], &newdata[oldoffsnb], oldnbytes); } @@ -574,6 +583,8 @@ static void array_resize_buffer(jl_array_t *a, size_t newlen, size_t oldlen, siz a->isshared = 0; if (a->ptrarray || es==1) memset(newdata+offsnb+oldnbytes, 0, nbytes-oldnbytes-offsnb); + if (a->how == 1) + gc_wb_buf(a, newdata); a->maxsize = newlen; } diff --git a/src/ast.c b/src/ast.c index fb6dbf8315672..b5c8140461377 100644 --- a/src/ast.c +++ b/src/ast.c @@ -731,6 +731,7 @@ static jl_value_t *copy_ast(jl_value_t *expr, jl_tuple_t *sp, int do_sp) // of a top-level thunk that gets type inferred. li->def = li; li->ast = jl_prepare_ast(li, li->sparams); + gc_wb(li, li->ast); JL_GC_POP(); return (jl_value_t*)li; } @@ -749,17 +750,18 @@ static jl_value_t *copy_ast(jl_value_t *expr, jl_tuple_t *sp, int do_sp) jl_expr_t *ne = jl_exprn(e->head, jl_array_len(e->args)); JL_GC_PUSH1(&ne); if (e->head == lambda_sym) { - jl_exprarg(ne, 0) = copy_ast(jl_exprarg(e,0), sp, 0); - jl_exprarg(ne, 1) = copy_ast(jl_exprarg(e,1), sp, 0); - jl_exprarg(ne, 2) = copy_ast(jl_exprarg(e,2), sp, 1); + jl_exprargset(ne, 0, copy_ast(jl_exprarg(e,0), sp, 0)); + jl_exprargset(ne, 1, copy_ast(jl_exprarg(e,1), sp, 0)); + jl_exprargset(ne, 2, copy_ast(jl_exprarg(e,2), sp, 1)); } else if (e->head == assign_sym) { - jl_exprarg(ne, 0) = copy_ast(jl_exprarg(e,0), sp, 0); - jl_exprarg(ne, 1) = copy_ast(jl_exprarg(e,1), sp, 1); + jl_exprargset(ne, 0, copy_ast(jl_exprarg(e,0), sp, 0)); + jl_exprargset(ne, 1, copy_ast(jl_exprarg(e,1), sp, 1)); } else { - for(size_t i=0; i < jl_array_len(e->args); i++) - jl_exprarg(ne, i) = copy_ast(jl_exprarg(e,i), sp, 1); + for(size_t i=0; i < jl_array_len(e->args); i++) { + jl_exprargset(ne, i, copy_ast(jl_exprarg(e,i), sp, 1)); + } } JL_GC_POP(); return (jl_value_t*)ne; @@ -780,10 +782,12 @@ DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr) ne = jl_exprn(e->head, l); if (l == 0) { ne->args = jl_alloc_cell_1d(0); + gc_wb(ne, ne->args); } else { - for(i=0; i < l; i++) - jl_exprarg(ne, i) = jl_copy_ast(jl_exprarg(e,i)); + for(i=0; i < l; i++) { + jl_exprargset(ne, i, jl_copy_ast(jl_exprarg(e,i))); + } } JL_GC_POP(); return (jl_value_t*)ne; @@ -820,17 +824,18 @@ static jl_value_t *dont_copy_ast(jl_value_t *expr, jl_tuple_t *sp, int do_sp) else if (jl_is_expr(expr)) { jl_expr_t *e = (jl_expr_t*)expr; if (e->head == lambda_sym) { - jl_exprarg(e, 0) = dont_copy_ast(jl_exprarg(e,0), sp, 0); - jl_exprarg(e, 1) = dont_copy_ast(jl_exprarg(e,1), sp, 0); - jl_exprarg(e, 2) = dont_copy_ast(jl_exprarg(e,2), sp, 1); + jl_exprargset(e, 0, dont_copy_ast(jl_exprarg(e,0), sp, 0)); + jl_exprargset(e, 1, dont_copy_ast(jl_exprarg(e,1), sp, 0)); + jl_exprargset(e, 2, dont_copy_ast(jl_exprarg(e,2), sp, 1)); } else if (e->head == assign_sym) { - jl_exprarg(e, 0) = dont_copy_ast(jl_exprarg(e,0), sp, 0); - jl_exprarg(e, 1) = dont_copy_ast(jl_exprarg(e,1), sp, 1); + jl_exprargset(e, 0, dont_copy_ast(jl_exprarg(e,0), sp, 0)); + jl_exprargset(e, 1, dont_copy_ast(jl_exprarg(e,1), sp, 1)); } else { - for(size_t i=0; i < jl_array_len(e->args); i++) - jl_exprarg(e, i) = dont_copy_ast(jl_exprarg(e,i), sp, 1); + for(size_t i=0; i < jl_array_len(e->args); i++) { + jl_exprargset(e, i, dont_copy_ast(jl_exprarg(e,i), sp, 1)); + } } return (jl_value_t*)e; } diff --git a/src/builtins.c b/src/builtins.c index 7c1293fecdcb5..c08bb12e226d4 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -908,6 +908,7 @@ void jl_trampoline_compile_function(jl_function_t *f, int always_infer, jl_tuple if (!jl_in_inference) { if (!jl_is_expr(f->linfo->ast)) { f->linfo->ast = jl_uncompress_ast(f->linfo, f->linfo->ast); + gc_wb(f->linfo, f->linfo->ast); } if (always_infer || jl_eval_with_compiler_p(jl_lam_body((jl_expr_t*)f->linfo->ast),1)) { jl_type_infer(f->linfo, sig, f->linfo); @@ -921,6 +922,7 @@ void jl_trampoline_compile_function(jl_function_t *f, int always_infer, jl_tuple jl_generate_fptr(f); if (jl_boot_file_loaded && jl_is_expr(f->linfo->ast)) { f->linfo->ast = jl_compress_ast(f->linfo, f->linfo->ast); + gc_wb(f->linfo, f->linfo->ast); } } @@ -1185,7 +1187,7 @@ void jl_init_primitives(void) // toys for debugging --------------------------------------------------------- // comma_one prints a comma for 1 element, e.g. "(x,)" -static size_t jl_show_tuple(JL_STREAM *out, jl_tuple_t *t, char *opn, char *cls, int comma_one) +static size_t jl_show_tuple(JL_STREAM *out, jl_tuple_t *t, char *opn, char *cls, int comma_one, int depth) { size_t i, n=0, len = jl_tuple_len(t); n += JL_PRINTF(out, "("); @@ -1201,14 +1203,18 @@ static size_t jl_show_tuple(JL_STREAM *out, jl_tuple_t *t, char *opn, char *cls, return n; } -DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) +#define MAX_DEPTH 5 + +size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, int depth) { // mimic jl_show, but never calling a julia method size_t n = 0; + if(depth > MAX_DEPTH) return 0; // cheap way of bailing out of cycles + depth++; if (v == NULL) { n += JL_PRINTF(out, "#"); } - else if (v->type == NULL) { + else if (jl_typeof(v) == NULL) { n += JL_PRINTF(out, ""); } else if ((uptrint_t)v->type < 4096U) { @@ -1216,10 +1222,10 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) } else if (jl_is_lambda_info(v)) { jl_lambda_info_t *li = (jl_lambda_info_t*)v; - n += jl_static_show(out, (jl_value_t*)li->module); + n += jl_static_show_x(out, (jl_value_t*)li->module, depth); n += JL_PRINTF(out, ".%s", li->name->name); if (li->specTypes) { - n += jl_static_show(out, (jl_value_t*)li->specTypes); + n += jl_static_show_x(out, (jl_value_t*)li->specTypes, depth); } else { n += JL_PRINTF(out, "(?)"); @@ -1230,16 +1236,16 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) //jl_static_show(out, !jl_is_expr(li->ast) ? jl_uncompress_ast(li, li->ast) : li->ast); } else if (jl_is_tuple(v)) { - n += jl_show_tuple(out, (jl_tuple_t*)v, "(", ")", 1); + n += jl_show_tuple(out, (jl_tuple_t*)v, "(", ")", 1, depth); } else if (jl_is_vararg_type(v)) { - n += jl_static_show(out, jl_tparam0(v)); + n += jl_static_show_x(out, jl_tparam0(v), depth); n += JL_PRINTF(out, "..."); } else if (jl_is_datatype(v)) { jl_datatype_t *dv = (jl_datatype_t*)v; if (dv->name->module != jl_core_module) { - n += jl_static_show(out, (jl_value_t*)dv->name->module); + n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth); JL_PUTS(".", out); n += 1; } n += JL_PRINTF(out, "%s", dv->name->name->name); @@ -1249,7 +1255,7 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) n += JL_PRINTF(out, "{"); for (j = 0; j < tlen; j++) { jl_value_t *p = jl_tupleref(dv->parameters,j); - n += jl_static_show(out, p); + n += jl_static_show_x(out, p, depth); if (j != tlen-1) n += JL_PRINTF(out, ", "); } @@ -1319,10 +1325,10 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) } else if (jl_is_uniontype(v)) { n += JL_PRINTF(out, "Union"); - n += jl_static_show(out, (jl_value_t*)((jl_uniontype_t*)v)->types); + n += jl_static_show_x(out, (jl_value_t*)((jl_uniontype_t*)v)->types, depth); } else if (jl_is_typector(v)) { - n += jl_static_show(out, ((jl_typector_t*)v)->body); + n += jl_static_show_x(out, ((jl_typector_t*)v)->body, depth); } else if (jl_is_typevar(v)) { if (((jl_tvar_t*)v)->lb != jl_bottom_type) { @@ -1335,7 +1341,7 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) else if (jl_is_module(v)) { jl_module_t *m = (jl_module_t*)v; if (m->parent != m && m->parent != jl_main_module) { - n += jl_static_show(out, (jl_value_t*)m->parent); + n += jl_static_show_x(out, (jl_value_t*)m->parent, depth); n += JL_PRINTF(out, "."); } n += JL_PRINTF(out, "%s", m->name->name); @@ -1345,13 +1351,13 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) } else if (jl_is_symbolnode(v)) { n += JL_PRINTF(out, "%s::", jl_symbolnode_sym(v)->name); - n += jl_static_show(out, jl_symbolnode_type(v)); + n += jl_static_show_x(out, jl_symbolnode_type(v), depth); } else if (jl_is_getfieldnode(v)) { - n += jl_static_show(out, jl_getfieldnode_val(v)); + n += jl_static_show_x(out, jl_getfieldnode_val(v), depth); n += JL_PRINTF(out, ".%s", jl_getfieldnode_name(v)->name); n += JL_PRINTF(out, "::"); - n += jl_static_show(out, jl_getfieldnode_type(v)); + n += jl_static_show_x(out, jl_getfieldnode_type(v), depth); } else if (jl_is_labelnode(v)) { n += JL_PRINTF(out, "%d:", jl_labelnode_label(v)); @@ -1362,12 +1368,17 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) else if (jl_is_quotenode(v)) { jl_value_t *qv = jl_fieldref(v,0); if (!jl_is_symbol(qv)) { n += JL_PRINTF(out, "quote "); } - n += jl_static_show(out, qv); + n += jl_static_show_x(out, jl_fieldref(v,0), depth); if (!jl_is_symbol(qv)) { n += JL_PRINTF(out, " end"); } } + else if (jl_is_newvarnode(v)) { + n += JL_PRINTF(out, ""); + } else if (jl_is_topnode(v)) { n += JL_PRINTF(out, "top("); - n += jl_static_show(out, jl_fieldref(v,0)); + n += jl_static_show_x(out, jl_fieldref(v,0), depth); n += JL_PRINTF(out, ")"); } else if (jl_is_linenode(v)) { @@ -1376,9 +1387,9 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) else if (jl_is_expr(v)) { jl_expr_t *e = (jl_expr_t*)v; if (e->head == assign_sym && jl_array_len(e->args) == 2) { - n += jl_static_show(out, jl_exprarg(e,0)); + n += jl_static_show_x(out, jl_exprarg(e,0), depth); n += JL_PRINTF(out, " = "); - n += jl_static_show(out, jl_exprarg(e,1)); + n += jl_static_show_x(out, jl_exprarg(e,1), depth); } else { char sep = ' '; @@ -1388,14 +1399,14 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) size_t i, len = jl_array_len(e->args); for (i = 0; i < len; i++) { n += JL_PRINTF(out, ",%c", sep); - n += jl_static_show(out, jl_exprarg(e,i)); + n += jl_static_show_x(out, jl_exprarg(e,i), depth); } n += JL_PRINTF(out, ")::"); - n += jl_static_show(out, e->etype); + n += jl_static_show_x(out, e->etype, depth); } } else if (jl_is_array(v)) { - n += jl_static_show(out, jl_typeof(v)); + n += jl_static_show_x(out, jl_typeof(v), depth); n += JL_PRINTF(out, "["); size_t j, tlen = jl_array_len(v); for (j = 0; j < tlen; j++) { @@ -1404,29 +1415,30 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) elt = jl_cellref(v, j); else elt = jl_arrayref((jl_array_t*)v,j); - n += jl_static_show(out, elt); + n += jl_static_show_x(out, elt, depth); if (j != tlen-1) n += JL_PRINTF(out, ", "); } + if(j < tlen) n += JL_PRINTF(out, " ..."); n += JL_PRINTF(out, "]"); } else if (jl_typeis(v,jl_loaderror_type)) { n += JL_PRINTF(out, "LoadError(at "); - n += jl_static_show(out, jl_fieldref(v, 0)); + n += jl_static_show_x(out, jl_fieldref(v, 0), depth); n += JL_PRINTF(out, " line "); - n += jl_static_show(out, jl_fieldref(v, 1)); + n += jl_static_show_x(out, jl_fieldref(v, 1), depth); n += JL_PRINTF(out, ": "); - n += jl_static_show(out, jl_fieldref(v, 2)); + n += jl_static_show_x(out, jl_fieldref(v, 2), depth); n += JL_PRINTF(out, ")"); } else if (jl_typeis(v,jl_errorexception_type)) { n += JL_PRINTF(out, "ErrorException("); - n += jl_static_show(out, jl_fieldref(v, 0)); + n += jl_static_show_x(out, jl_fieldref(v, 0), depth); n += JL_PRINTF(out, ")"); } else if (jl_is_datatype(jl_typeof(v))) { jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); - n += jl_static_show(out, (jl_value_t*)t); + n += jl_static_show_x(out, (jl_value_t*)t, depth); n += JL_PRINTF(out, "("); size_t nb = jl_datatype_size(t); size_t tlen = jl_tuple_len(t->names); @@ -1444,7 +1456,7 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) //jl_fielddesc_t f = t->fields[i]; n += JL_PRINTF(out, "="); fldval = jl_get_nth_field(v, i); - n += jl_static_show(out, fldval); + n += jl_static_show_x(out, fldval, depth); if (i != tlen-1) n += JL_PRINTF(out, ", "); } @@ -1454,12 +1466,19 @@ DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) } else { n += JL_PRINTF(out, ""); } return n; } + +DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) +{ + return jl_static_show_x(out, v, 0); +} + + int in_jl_ = 0; DLLEXPORT void jl_(void *jl_value) { diff --git a/src/cgutils.cpp b/src/cgutils.cpp index d13e115139178..5a0384de91c7c 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -750,6 +750,8 @@ static Value *emit_typeof(Value *p) tt = builder. CreateLoad(builder.CreateGEP(tt,ConstantInt::get(T_size,0)), false); + tt = builder. + CreateIntToPtr(builder. CreateAnd(builder.CreatePtrToInt(tt, T_int64), ConstantInt::get(T_int64,~(uptrint_t)3)), jl_pvalue_llvmt); #ifdef OVERLAP_TUPLE_LEN tt = builder. CreateIntToPtr(builder. @@ -970,7 +972,8 @@ static Value *typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype, static Value *emit_unbox(Type *to, Value *x, jl_value_t *jt); static void typed_store(Value *ptr, Value *idx_0based, Value *rhs, - jl_value_t *jltype, jl_codectx_t *ctx, MDNode* tbaa) + jl_value_t *jltype, jl_codectx_t *ctx, MDNode* tbaa, + Value* parent) // for the write barrier, NULL if no barrier needed { Type *elty = julia_type_to_llvm(jltype); assert(elty != NULL); @@ -979,8 +982,10 @@ static void typed_store(Value *ptr, Value *idx_0based, Value *rhs, if (elty==T_int1) { elty = T_int8; } if (jl_isbits(jltype) && ((jl_datatype_t*)jltype)->size > 0) rhs = emit_unbox(elty, rhs, jltype); - else + else { rhs = boxed(rhs,ctx); + if(parent != NULL) emit_write_barrier(ctx, parent, rhs); + } Value *data; if (ptr->getType()->getContainedType(0) != elty) data = builder.CreateBitCast(ptr, PointerType::get(elty, 0)); @@ -1722,3 +1727,17 @@ static void emit_cpointercheck(Value *x, const std::string &msg, ctx->f->getBasicBlockList().push_back(passBB); builder.SetInsertPoint(passBB); } + +// allocation for known size object +static Value* emit_allocobj(size_t static_size) +{ + if (static_size == sizeof(void*)*2) + return builder.CreateCall(prepare_call(jlalloc2w_func)); + else if (static_size == sizeof(void*)*3) + return builder.CreateCall(prepare_call(jlalloc3w_func)); + else if (static_size == sizeof(void*)*4) + return builder.CreateCall(prepare_call(jlalloc4w_func)); + else + return builder.CreateCall(prepare_call(jlallocobj_func), + ConstantInt::get(T_size, static_size)); +} diff --git a/src/codegen.cpp b/src/codegen.cpp index 1451992146205..de62ac039f132 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -286,6 +286,7 @@ static Function *jlegal_func; static Function *jlallocobj_func; static Function *jlalloc2w_func; static Function *jlalloc3w_func; +static Function *jlalloc4w_func; static Function *jl_alloc_tuple_func; static Function *jlsubtype_func; static Function *setjmp_func; @@ -305,6 +306,9 @@ static Function *box16_func; static Function *box32_func; static Function *box64_func; static Function *jlputs_func; +static Function *wbfunc; +static Function *queuerootfun; +static Function *expect_func; static Function *jldlsym_func; static Function *jlnewbits_func; //static Function *jlgetnthfield_func; @@ -319,6 +323,11 @@ static Function *show_execution_point_func; static std::vector two_pvalue_llvmt; static std::vector three_pvalue_llvmt; +extern "C" DLLEXPORT void gc_wb_slow(void* parent, void* ptr) +{ + gc_wb(parent, ptr); +} + // --- code generation --- // per-local-variable information @@ -527,6 +536,8 @@ jl_value_t *jl_get_cpu_name(void) return jl_pchar_to_string(HostCPUName.data(), HostCPUName.size()); } +static void emit_write_barrier(jl_codectx_t*,Value*,Value*); + #include "cgutils.cpp" static void jl_rethrow_with_add(const char *fmt, ...) @@ -1449,6 +1460,50 @@ static Value *emit_boxed_rooted(jl_value_t *e, jl_codectx_t *ctx) return v; } +// if ptr is NULL this emits a write barrier _back_ +static void emit_write_barrier(jl_codectx_t* ctx, Value *parent, Value *ptr) +{ + /* builder.CreateCall2(wbfunc, builder.CreateBitCast(parent, jl_pvalue_llvmt), builder.CreateBitCast(ptr, jl_pvalue_llvmt)); + return;*/ + parent = builder.CreateBitCast(parent, T_psize); + Value* parent_type = builder.CreateLoad(parent); + Value* parent_mark_bits = builder.CreateAnd(parent_type, 1); + + // the branch hint does not seem to make it to the generated code + //builder.CreateCall2(expect_func, parent_marked, ConstantInt::get(T_int1, 0)); + Value* parent_marked = builder.CreateICmpEQ(parent_mark_bits, ConstantInt::get(T_size, 1)); + + BasicBlock* cont = BasicBlock::Create(getGlobalContext(), "cont"); + BasicBlock* barrier_may_trigger = BasicBlock::Create(getGlobalContext(), "wb_may_trigger", ctx->f); + BasicBlock* barrier_trigger = BasicBlock::Create(getGlobalContext(), "wb_trigger", ctx->f); + builder.CreateCondBr(parent_marked, barrier_may_trigger, cont); + + builder.SetInsertPoint(barrier_may_trigger); + Value* ptr_mark_bit = builder.CreateAnd(builder.CreateLoad(builder.CreateBitCast(ptr, T_psize)), 1); + Value* ptr_not_marked = builder.CreateICmpEQ(ptr_mark_bit, ConstantInt::get(T_size, 0)); + builder.CreateCondBr(ptr_not_marked, barrier_trigger, cont); + builder.SetInsertPoint(barrier_trigger); + builder.CreateCall(prepare_call(queuerootfun), builder.CreateBitCast(parent, jl_pvalue_llvmt)); + builder.CreateBr(cont); + ctx->f->getBasicBlockList().push_back(cont); + builder.SetInsertPoint(cont); +} + +static void emit_checked_write_barrier(jl_codectx_t *ctx, Value *parent, Value *ptr) +{ + BasicBlock *cont; + Value *not_null = builder.CreateICmpNE(ptr, V_null); + BasicBlock *if_not_null = BasicBlock::Create(getGlobalContext(), "wb_not_null", ctx->f); + cont = BasicBlock::Create(getGlobalContext(), "cont"); + builder.CreateCondBr(not_null, if_not_null, cont); + builder.SetInsertPoint(if_not_null); + emit_write_barrier(ctx, parent, ptr); + builder.CreateBr(cont); + ctx->f->getBasicBlockList().push_back(cont); + builder.SetInsertPoint(cont); +} + + // --- lambda --- static void jl_add_linfo_root(jl_lambda_info_t *li, jl_value_t *val) @@ -1456,7 +1511,10 @@ static void jl_add_linfo_root(jl_lambda_info_t *li, jl_value_t *val) JL_GC_PUSH1(&val); li = li->def; if (li->roots == NULL) { + JL_GC_PUSH1(&val); li->roots = jl_alloc_cell_1d(1); + JL_GC_POP(); + gc_wb(li, li->roots); jl_cellset(li->roots, 0, val); } else { @@ -1648,7 +1706,7 @@ static Value *emit_getfield(jl_value_t *expr, jl_sym_t *name, jl_codectx_t *ctx) } static void emit_setfield(jl_datatype_t *sty, Value *strct, size_t idx, - Value *rhs, jl_codectx_t *ctx, bool checked=true) + Value *rhs, jl_codectx_t *ctx, bool checked, bool wb) { if (sty->mutabl || !checked) { Value *addr = @@ -1656,11 +1714,13 @@ static void emit_setfield(jl_datatype_t *sty, Value *strct, size_t idx, ConstantInt::get(T_size, sty->fields[idx].offset + sizeof(void*))); jl_value_t *jfty = jl_tupleref(sty->types, idx); if (sty->fields[idx].isptr) { - builder.CreateStore(boxed(rhs,ctx), + rhs = boxed(rhs, ctx); + builder.CreateStore(rhs, builder.CreateBitCast(addr, jl_ppvalue_llvmt)); + if (wb) emit_checked_write_barrier(ctx, strct, rhs); } else { - typed_store(addr, ConstantInt::get(T_size, 0), rhs, jfty, ctx, sty->mutabl ? tbaa_user : tbaa_immut); + typed_store(addr, ConstantInt::get(T_size, 0), rhs, jfty, ctx, sty->mutabl ? tbaa_user : tbaa_immut, strct); } } else { @@ -2053,13 +2113,12 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs, #else size_t nwords = nargs+2; #endif - Value *tup = - builder.CreateCall(prepare_call(jlallocobj_func), - ConstantInt::get(T_size, sizeof(void*)*nwords)); + Value *tup = emit_allocobj(sizeof(void*)*nwords); #ifdef OVERLAP_TUPLE_LEN builder.CreateStore(arg1, emit_nthptr_addr(tup, 1)); #else builder.CreateStore(arg1, emit_nthptr_addr(tup, 2)); + emit_write_barrier(ctx, tup, arg1); #endif ctx->argDepth = last_depth; #ifdef OVERLAP_TUPLE_LEN @@ -2096,6 +2155,7 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs, } Value *argi = boxed(argval,ctx); builder.CreateStore(argi, emit_nthptr_addr(tup, i+offs)); + emit_write_barrier(ctx, tup, argi); } ctx->argDepth = last_depth; JL_GC_POP(); @@ -2222,9 +2282,9 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs, emit_expr(args[2],ctx,false); } else { - typed_store(emit_arrayptr(ary,args[1],ctx), idx, - ety==(jl_value_t*)jl_any_type ? emit_expr(args[2],ctx) : emit_unboxed(args[2],ctx), - ety, ctx, tbaa_user); + Value* v = ety==(jl_value_t*)jl_any_type ? emit_expr(args[2],ctx) : emit_unboxed(args[2],ctx); + typed_store(emit_arrayptr(ary,args[1],ctx), idx, v, + ety, ctx, tbaa_user, ety == (jl_value_t*)jl_any_type ? ary : NULL); } JL_GC_POP(); return ary; @@ -2342,7 +2402,7 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs, rhs = emit_expr(args[3], ctx); else rhs = emit_unboxed(args[3], ctx); - emit_setfield(sty, strct, idx, rhs, ctx); + emit_setfield(sty, strct, idx, rhs, ctx, true, true); JL_GC_POP(); return rhs; } @@ -2633,6 +2693,14 @@ static Value *global_binding_pointer(jl_module_t *m, jl_sym_t *s, return julia_binding_gv(b); } +static bool is_stack(Value *v) +{ + if (isa(v)) return true; + GetElementPtrInst *i = dyn_cast(v); + if (i && is_stack(i->getOperand(0))) return true; + return false; +} + // yields a jl_value_t** giving the binding location of a variable static Value *var_binding_pointer(jl_sym_t *s, jl_binding_t **pbnd, bool assign, jl_codectx_t *ctx) @@ -2796,7 +2864,11 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx) rval = emit_unbox(vt->getContainedType(0), emit_unboxed(r, ctx), vi.declType); } else { - rval = boxed(emit_expr(r, ctx, true),ctx,rt); + rval = boxed(emit_expr(r, ctx, true), ctx, rt); + if (!is_stack(bp)) { + Value* box = builder.CreateGEP(bp, ConstantInt::get(T_size, -1)); + emit_write_barrier(ctx, box, rval); + } } if (builder.GetInsertBlock()->getTerminator() == NULL) { builder.CreateStore(rval, bp, vi.isVolatile); @@ -3011,7 +3083,7 @@ static Value *emit_expr(jl_value_t *expr, jl_codectx_t *ctx, bool isboxed, int last_depth = ctx->argDepth; Value *name = literal_pointer_val(mn); jl_binding_t *bnd = NULL; - Value *bp; + Value *bp, *bp_owner = V_null; if (theF != NULL) { bp = make_gcroot(theF, ctx); } @@ -3019,6 +3091,7 @@ static Value *emit_expr(jl_value_t *expr, jl_codectx_t *ctx, bool isboxed, if (is_global((jl_sym_t*)mn, ctx)) { bnd = jl_get_binding_for_method_def(ctx->module, (jl_sym_t*)mn); bp = julia_binding_gv(bnd); + bp_owner = literal_pointer_val((jl_value_t*)ctx->module); } else { bp = var_binding_pointer((jl_sym_t*)mn, &bnd, false, ctx); @@ -3028,12 +3101,12 @@ static Value *emit_expr(jl_value_t *expr, jl_codectx_t *ctx, bool isboxed, make_gcroot(a1, ctx); Value *a2 = boxed(emit_expr(args[2], ctx),ctx); make_gcroot(a2, ctx); - Value *mdargs[8] = - { name, bp, literal_pointer_val(bnd), a1, a2, literal_pointer_val(args[3]), + Value *mdargs[9] = + { name, bp, bp_owner, literal_pointer_val(bnd), a1, a2, literal_pointer_val(args[3]), literal_pointer_val((jl_value_t*)jl_module_call_func(ctx->module)), ConstantInt::get(T_int32, (int)iskw) }; ctx->argDepth = last_depth; - return builder.CreateCall(prepare_call(jlmethod_func), ArrayRef(&mdargs[0], 8)); + return builder.CreateCall(prepare_call(jlmethod_func), ArrayRef(&mdargs[0], 9)); } else if (head == const_sym) { jl_sym_t *sym = (jl_sym_t*)args[0]; @@ -3111,16 +3184,13 @@ static Value *emit_expr(jl_value_t *expr, jl_codectx_t *ctx, bool isboxed, if (might_need_root(args[1]) || fval->getType() != jl_pvalue_llvmt) make_gcroot(f1, ctx); } - Value *strct = - builder.CreateCall(prepare_call(jlallocobj_func), - ConstantInt::get(T_size, - sizeof(void*)+sty->size)); + Value *strct = emit_allocobj(sizeof(void*)+sty->size); builder.CreateStore(literal_pointer_val((jl_value_t*)ty), emit_nthptr_addr(strct, (size_t)0)); if (f1) { if (!jl_subtype(expr_type(args[1],ctx), jl_t0(sty->types), 0)) emit_typecheck(f1, jl_t0(sty->types), "new", ctx); - emit_setfield(sty, strct, 0, f1, ctx, false); + emit_setfield(sty, strct, 0, f1, ctx, false, false); ctx->argDepth = fieldStart; if (nf > 1 && needroots) make_gcroot(strct, ctx); @@ -3130,7 +3200,7 @@ static Value *emit_expr(jl_value_t *expr, jl_codectx_t *ctx, bool isboxed, } for(size_t i=j; i < nf; i++) { if (sty->fields[i].isptr) { - emit_setfield(sty, strct, i, V_null, ctx, false); + emit_setfield(sty, strct, i, V_null, ctx, false, false); } } for(size_t i=j+1; i < nargs; i++) { @@ -3146,7 +3216,7 @@ static Value *emit_expr(jl_value_t *expr, jl_codectx_t *ctx, bool isboxed, if (!jl_subtype(expr_type(args[i],ctx), jl_tupleref(sty->types,i-1), 0)) emit_typecheck(rhs, jl_tupleref(sty->types,i-1), "new", ctx); } - emit_setfield(sty, strct, i-1, rhs, ctx, false); + emit_setfield(sty, strct, i-1, rhs, ctx, false, false); } ctx->argDepth = fieldStart; return strct; @@ -4227,6 +4297,7 @@ static Function *emit_function(jl_lambda_info_t *lam, bool cstyle) ctx.dbuilder->finalize(); JL_GC_POP(); + return f; } @@ -4319,6 +4390,7 @@ extern "C" DLLEXPORT jl_value_t *jl_new_box(jl_value_t *v) #else box->type = jl_box_any_type; #endif + if(v) gc_wb(box, v); ((jl_value_t**)box)[1] = v; return box; } @@ -4603,6 +4675,22 @@ static void init_julia_llvm_env(Module *m) jlcall_func_to_llvm("jl_apply_generic", (void*)&jl_apply_generic, m); jlgetfield_func = jlcall_func_to_llvm("jl_f_get_field", (void*)&jl_f_get_field, m); + std::vector wbargs(0); + wbargs.push_back(jl_pvalue_llvmt); + wbargs.push_back(jl_pvalue_llvmt); + queuerootfun = Function::Create(FunctionType::get(T_void, args_1ptr, false), + Function::ExternalLinkage, + "gc_queue_root", m); + add_named_global(queuerootfun, (void*)&gc_queue_root); + wbfunc = Function::Create(FunctionType::get(T_void, wbargs, false), + Function::ExternalLinkage, + "gc_wb_slow", m); + add_named_global(wbfunc, (void*)&gc_wb_slow); + + std::vector exp_args(0); + exp_args.push_back(T_int1); + expect_func = Intrinsic::getDeclaration(m, Intrinsic::expect, exp_args); + std::vector args3(0); args3.push_back(jl_pvalue_llvmt); jlbox_func = @@ -4650,6 +4738,7 @@ static void init_julia_llvm_env(Module *m) mdargs.push_back(jl_pvalue_llvmt); mdargs.push_back(jl_pvalue_llvmt); mdargs.push_back(jl_pvalue_llvmt); + mdargs.push_back(jl_pvalue_llvmt); mdargs.push_back(T_int32); jlmethod_func = Function::Create(FunctionType::get(jl_pvalue_llvmt, mdargs, false), @@ -4740,6 +4829,12 @@ static void init_julia_llvm_env(Module *m) "alloc_3w", m); add_named_global(jlalloc3w_func, (void*)&alloc_3w); + jlalloc4w_func = + Function::Create(FunctionType::get(jl_pvalue_llvmt, empty_args, false), + Function::ExternalLinkage, + "alloc_4w", m); + add_named_global(jlalloc4w_func, (void*)&alloc_4w); + std::vector atargs(0); atargs.push_back(T_size); jl_alloc_tuple_func = diff --git a/src/dump.c b/src/dump.c index e291d171b5bdc..7c965643d67c1 100644 --- a/src/dump.c +++ b/src/dump.c @@ -473,8 +473,10 @@ static int is_ast_node(jl_value_t *v) { if (jl_is_lambda_info(v)) { jl_lambda_info_t *li = (jl_lambda_info_t*)v; - if (jl_is_expr(li->ast)) + if (jl_is_expr(li->ast)) { li->ast = jl_compress_ast(li, li->ast); + gc_wb(li, li->ast); + } return 0; } return jl_is_symbol(v) || jl_is_expr(v) || jl_is_newvarnode(v) || @@ -876,7 +878,9 @@ static jl_value_t *jl_deserialize_datatype(ios_t *s, int pos, jl_value_t **loc) dt->alignment = read_int32(s); ios_read(s, (char*)&dt->fields[0], nf*sizeof(jl_fielddesc_t)); dt->names = (jl_tuple_t*)jl_deserialize_value(s, (jl_value_t**)&dt->names); + gc_wb(dt, dt->names); dt->types = (jl_tuple_t*)jl_deserialize_value(s, (jl_value_t**)&dt->types); + gc_wb(dt, dt->types); } else { dt->alignment = dt->size; @@ -885,8 +889,11 @@ static jl_value_t *jl_deserialize_datatype(ios_t *s, int pos, jl_value_t **loc) dt->names = dt->types = jl_null; } dt->parameters = (jl_tuple_t*)jl_deserialize_value(s, (jl_value_t**)&dt->parameters); + gc_wb(dt, dt->parameters); dt->name = (jl_typename_t*)jl_deserialize_value(s, (jl_value_t**)&dt->name); + gc_wb(dt, dt->name); dt->super = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&dt->super); + gc_wb(dt, dt->super); if (datatype_list) { if (dt->name == jl_array_type->name || dt->name == jl_pointer_type->name || dt->name == jl_type_type->name || dt->name == jl_vararg_type->name || @@ -1018,10 +1025,11 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t jl_value_t** data = (jl_value_t**)jl_array_data(a); for(i=0; i < jl_array_len(a); i++) { data[i] = jl_deserialize_value(s, &data[i]); + if (data[i]) gc_wb(a, data[i]); } } if (mode == MODE_MODULE) { - aty = jl_deserialize_value(s, &jl_typeof(a)); + aty = jl_deserialize_value(s, &a->type); assert(aty == jl_typeof(a)); } return (jl_value_t*)a; @@ -1040,6 +1048,7 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t if (usetable) backref_list.items[pos] = e; e->etype = jl_deserialize_value(s, &e->etype); + gc_wb(e, e->etype); jl_value_t **data = (jl_value_t**)(e->args->data); for(i=0; i < len; i++) { data[i] = jl_deserialize_value(s, &data[i]); @@ -1051,8 +1060,11 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t if (usetable) arraylist_push(&backref_list, tv); tv->name = (jl_sym_t*)jl_deserialize_value(s, NULL); + gc_wb(tv, tv->name); tv->lb = jl_deserialize_value(s, &tv->lb); + gc_wb(tv, tv->lb); tv->ub = jl_deserialize_value(s, &tv->ub); + gc_wb(tv, tv->ub); tv->bound = read_int8(s); return (jl_value_t*)tv; } @@ -1062,7 +1074,9 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t if (usetable) arraylist_push(&backref_list, f); f->linfo = (jl_lambda_info_t*)jl_deserialize_value(s, (jl_value_t**)&f->linfo); + if(f->linfo != NULL) gc_wb(f, f->linfo); f->env = jl_deserialize_value(s, &f->env); + gc_wb(f, f->env); f->fptr = jl_deserialize_fptr(s); return (jl_value_t*)f; } @@ -1073,18 +1087,29 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t if (usetable) arraylist_push(&backref_list, li); li->ast = jl_deserialize_value(s, &li->ast); + gc_wb(li, li->ast); li->sparams = (jl_tuple_t*)jl_deserialize_value(s, (jl_value_t**)&li->sparams); + gc_wb(li, li->sparams); li->tfunc = jl_deserialize_value(s, (jl_value_t**)&li->tfunc); + gc_wb(li, li->tfunc); li->name = (jl_sym_t*)jl_deserialize_value(s, NULL); + gc_wb(li, li->name); li->specTypes = (jl_tuple_t*)jl_deserialize_value(s, (jl_value_t**)&li->specTypes); + if(li->specTypes) gc_wb(li, li->specTypes); li->specializations = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&li->specializations); + if(li->specializations) gc_wb(li, li->specializations); li->inferred = read_int8(s); li->file = (jl_sym_t*)jl_deserialize_value(s, NULL); + gc_wb(li, li->file); li->line = read_int32(s); li->module = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&li->module); + gc_wb(li, li->module); li->roots = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&li->roots); + if(li->roots) gc_wb(li, li->roots); li->def = (jl_lambda_info_t*)jl_deserialize_value(s, (jl_value_t**)&li->def); + gc_wb(li, li->def); li->capt = jl_deserialize_value(s, &li->capt); + if(li->capt) gc_wb(li, li->capt); li->fptr = &jl_trampoline; li->functionObject = NULL; li->cFunctionObject = NULL; @@ -1121,14 +1146,20 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t if (usetable) backref_list.items[pos] = m; m->parent = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->parent); + gc_wb(m, m->parent); + while (1) { jl_sym_t *name = (jl_sym_t*)jl_deserialize_value(s, NULL); if (name == NULL) break; jl_binding_t *b = jl_get_binding_wr(m, name); b->value = jl_deserialize_value(s, &b->value); + gc_wb_buf(m, b); + if(b->value != NULL) gc_wb(m, b->value); b->type = jl_deserialize_value(s, &b->type); + gc_wb(m, b->type); b->owner = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&b->owner); + if(b->owner != NULL) gc_wb(m, b->owner); int8_t flags = read_int8(s); b->constp = (flags>>2) & 1; b->exportp = (flags>>1) & 1; @@ -1144,6 +1175,7 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t i++; } m->constant_table = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->constant_table); + if(m->constant_table != NULL) gc_wb(m, m->constant_table); return (jl_value_t*)m; } else if (vtag == (jl_value_t*)SmallInt64_tag) { @@ -1233,7 +1265,7 @@ static jl_value_t *jl_deserialize_value_(ios_t *s, jl_value_t *vtag, jl_value_t } // TODO: put WeakRefs on the weak_refs list if (mode == MODE_MODULE) { - dt = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&jl_typeof(v)); + dt = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&v->type); assert((jl_value_t*)dt == jl_typeof(v)); } return v; @@ -1288,8 +1320,8 @@ extern jl_array_t *jl_module_init_order; DLLEXPORT void jl_save_system_image(const char *fname) { - jl_gc_collect(); - jl_gc_collect(); + jl_gc_collect(1); + jl_gc_collect(0); int en = jl_gc_is_enabled(); jl_gc_disable(); htable_reset(&backref_table, 250000); @@ -1496,10 +1528,13 @@ jl_value_t *jl_compress_ast(jl_lambda_info_t *li, jl_value_t *ast) int en = jl_gc_is_enabled(); jl_gc_disable(); - if (li->module->constant_table == NULL) + if (li->module->constant_table == NULL) { li->module->constant_table = jl_alloc_cell_1d(0); + gc_wb(li->module, li->module->constant_table); + } tree_literal_values = li->module->constant_table; li->capt = (jl_value_t*)jl_lam_capt((jl_expr_t*)ast); + gc_wb(li, li->capt); if (jl_array_len(li->capt) == 0) li->capt = NULL; jl_serialize_value(&dest, jl_lam_body((jl_expr_t*)ast)->etype); @@ -1531,10 +1566,8 @@ jl_value_t *jl_uncompress_ast(jl_lambda_info_t *li, jl_value_t *data) src.size = jl_array_len(bytes); int en = jl_gc_is_enabled(); jl_gc_disable(); - jl_gc_ephemeral_on(); (void)jl_deserialize_value(&src, NULL); // skip ret type jl_value_t *v = jl_deserialize_value(&src, NULL); - jl_gc_ephemeral_off(); if (en) jl_gc_enable(); tree_literal_values = NULL; @@ -1555,7 +1588,6 @@ int jl_save_new_module(const char *fname, jl_module_t *mod) int en = jl_gc_is_enabled(); jl_gc_disable(); - jl_gc_ephemeral_on(); DUMP_MODES last_mode = mode; mode = MODE_MODULE; jl_module_t *lastmod = jl_current_module; @@ -1570,7 +1602,6 @@ int jl_save_new_module(const char *fname, jl_module_t *mod) jl_current_module = lastmod; mode = last_mode; - jl_gc_ephemeral_off(); if (en) jl_gc_enable(); htable_reset(&backref_table, 0); @@ -1628,14 +1659,14 @@ jl_module_t *jl_restore_new_module(const char *fname) jl_value_t **loc = (jl_value_t**)flagref_list.items[i++]; int offs = (int)(intptr_t)flagref_list.items[i++]; if (t != dt) { - jl_typeof(dt) = (jl_value_t*)(ptrint_t)2; // invalidate the old value to help catch errors + dt->type = (jl_value_t*)(ptrint_t)2; // invalidate the old value to help catch errors if ((jl_value_t*)dt == o) { if (loc) *loc = (jl_value_t*)t; if (offs > 0) backref_list.items[offs] = t; } } if (t->instance != v) { - jl_typeof(v) = (jl_value_t*)(ptrint_t)1; // invalidate the old value to help catch errors + v->type = (jl_value_t*)(ptrint_t)1; // invalidate the old value to help catch errors if (v == o) { if (loc) *loc = v; if (offs > 0) backref_list.items[offs] = v; diff --git a/src/gc.c b/src/gc.c index 42b6bbad33aff..c2d9bd202e04d 100644 --- a/src/gc.c +++ b/src/gc.c @@ -10,79 +10,148 @@ // will wait for the next GC, to allow the space to be reused more // efficiently. default = on. #define FREE_PAGES_EAGER - #include #include +#include #include -#ifdef USE_MMAP -# include -# include -#endif #include "julia.h" #include "julia_internal.h" - -#ifdef _P64 -# ifdef USE_MMAP -# define GC_PAGE_SZ 16384//bytes -# else -# define GC_PAGE_SZ 12288//bytes -# endif -#else -#define GC_PAGE_SZ 8192//bytes +#ifndef _OS_WINDOWS_ +#include +#ifdef _OS_DARWIN_ +#define MAP_ANONYMOUS MAP_ANON +#endif #endif #ifdef __cplusplus extern "C" { #endif -typedef struct _gcpage_t { - char data[GC_PAGE_SZ]; +#pragma pack(push, 1) + +typedef struct { union { - struct _gcpage_t *next; - char _pad[8]; + uintptr_t header; + struct { + uintptr_t gc_bits:2; + uintptr_t pooled:1; + }; }; -} gcpage_t; + char data[]; +} buff_t; typedef struct _gcval_t { union { struct _gcval_t *next; uptrint_t flags; - uptrint_t data0; // overlapped - uptrint_t marked:1; + uptrint_t gc_bits:2; }; } gcval_t; -typedef struct _pool_t { - size_t osize; - gcpage_t *pages; - gcval_t *freelist; -} pool_t; +// layout for small (<2k) objects + +#define GC_PAGE_LG2 14 // log2(size of a page) +#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k + +// pool page metadata +typedef struct _gcpage_t { + struct { + uint16_t pool_n : 8; + uint16_t allocd : 1; // true if an allocation happened in this page since last sweep + uint16_t gc_bits : 2; // this is a bitwise | of all gc_bits in this page + }; + uint16_t nfree; + uint16_t osize; + uint16_t fl_begin_offset; + uint16_t fl_end_offset; + char *data; + char *ages; +} gcpage_t; + +#define PAGE_PFL_BEG(p) ((gcval_t**)((p->data) + (p)->fl_begin_offset)) +#define PAGE_PFL_END(p) ((gcval_t**)((p->data) + (p)->fl_end_offset)) +// round an address inside a gcpage's data to its begining +#define GC_PAGE_DATA(x) ((char*)((uintptr_t)(x) >> GC_PAGE_LG2 << GC_PAGE_LG2)) +// contiguous storage for up to REGION_PG_COUNT naturally aligned GC_PAGE_SZ blocks +// uses a very naive allocator (see malloc_page & free_page) #ifdef _P64 -# define BVOFFS 2 +#define REGION_PG_COUNT 16*8*4096 // 8G because virtual memory is cheap #else -# define BVOFFS 4 +#define REGION_PG_COUNT 8*4096 // 512M #endif +#define HEAP_COUNT 8 + +typedef struct { + uint32_t freemap[REGION_PG_COUNT/32]; + char pages[REGION_PG_COUNT][GC_PAGE_SZ]; + gcpage_t meta[REGION_PG_COUNT]; +} region_t; +static region_t *heaps[HEAP_COUNT] = {NULL}; +// store a lower bound of the first free block in each region +static int heaps_lb[HEAP_COUNT] = {0}; +// an upper bound of the last non-free block +static int heaps_ub[HEAP_COUNT] = {REGION_PG_COUNT/32-1}; + +typedef struct _pool_t { + gcval_t *freelist; + gcval_t *newpages; + uint16_t end_offset; // avoid to compute this at each allocation + uint16_t osize; + uint16_t nfree; +} pool_t; + +static region_t *find_region(void *ptr) +{ + // on 64bit systems we could probably use a single region and get rid of this loop + for (int i = 0; i < HEAP_COUNT && heaps[i]; i++) { + if ((char*)ptr >= (char*)heaps[i] && (char*)ptr <= (char*)heaps[i] + sizeof(region_t)) + return heaps[i]; + } + return NULL; +} +gcpage_t *page_metadata(void *data) +{ + region_t *r = find_region(data); + int pg_idx = (GC_PAGE_DATA(data) - &r->pages[0][0])/GC_PAGE_SZ; + return &r->meta[pg_idx]; +} + +char *page_age(gcpage_t *pg) +{ + return pg->ages; +} + +#define GC_POOL_END_OFS(osize) (((GC_PAGE_SZ/osize) - 1)*osize) + + +// layout for big (>2k) objects + typedef struct _bigval_t { struct _bigval_t *next; + struct _bigval_t **prev; // pointer to the next field of the prev entry size_t sz; -#ifndef _P64 - uptrint_t _pad0; - uptrint_t _pad1; -#endif + union { + uptrint_t _pad0; + uptrint_t age : 2; + }; + // must be 16-aligned here, in 32 & 64b union { uptrint_t flags; - uptrint_t marked:1; + uptrint_t gc_bits:2; char _data[1]; }; } bigval_t; +#define BVOFFS (offsetof(bigval_t, _data)/sizeof(void*)) +#define bigval_header(data) ((bigval_t*)((char*)(data) - BVOFFS*sizeof(void*))) + +#pragma pack(pop) + // GC knobs and self-measurement variables -static size_t allocd_bytes = 0; -static int64_t total_allocd_bytes = 0; static int64_t last_gc_total_bytes = 0; -static size_t freed_bytes = 0; -static uint64_t total_gc_time=0; + +static int gc_inc_steps = 1; #ifdef _P64 #define default_collect_interval (5600*1024*sizeof(void*)) static size_t max_collect_interval = 1250000000UL; @@ -90,23 +159,276 @@ static size_t max_collect_interval = 1250000000UL; #define default_collect_interval (3200*1024*sizeof(void*)) static size_t max_collect_interval = 500000000UL; #endif -static size_t collect_interval = default_collect_interval; +static size_t collect_interval; +static int64_t allocd_bytes; + +#define N_POOLS 42 +static __attribute__((aligned (64))) pool_t norm_pools[N_POOLS]; +#define pools norm_pools + +static bigval_t *big_objects = NULL; +static bigval_t *big_objects_marked = NULL; + +static int64_t total_allocd_bytes = 0; +static int64_t allocd_bytes_since_sweep = 0; +static int64_t freed_bytes = 0; +static uint64_t total_gc_time = 0; +#define NS_TO_S(t) ((double)(t/1000)/(1000*1000)) +#define NS2MS(t) ((double)(t/1000)/1000) +static int64_t live_bytes = 0; +static int64_t promoted_bytes = 0; +static size_t current_pg_count = 0; +static size_t max_pg_count = 0; + int jl_in_gc; // referenced from switchto task.c #ifdef OBJPROFILE -static htable_t obj_counts; +static htable_t obj_counts[3]; +static htable_t obj_sizes[3]; #endif #ifdef GC_FINAL_STATS static size_t total_freed_bytes=0; +static uint64_t max_pause = 0; +static uint64_t total_sweep_time=0; +static uint64_t total_mark_time=0; +static uint64_t total_fin_time=0; #endif +static int n_pause = 0; +static int n_full_sweep = 0; +int sweeping = 0; // manipulating mark bits -#define gc_marked(o) (((gcval_t*)(o))->marked) -#define gc_setmark(o) (((gcval_t*)(o))->marked=1) -#define gc_val_buf(o) ((gcval_t*)(((void**)(o))-1)) -#define gc_setmark_buf(o) gc_setmark(gc_val_buf(o)) -#define gc_typeof(v) ((jl_value_t*)(((uptrint_t)jl_typeof(v))&~1UL)) + +#define GC_CLEAN 0 // freshly allocated +#define GC_MARKED 1 // reachable and old +#define GC_QUEUED 2 // if it is reachable it will be marked as old +#define GC_MARKED_NOESC (GC_MARKED | GC_QUEUED) // reachable and young + +/* + The state transition looks like : + + <-[quicksweep]-- + <-[sweep]--- | + | | + ---> GC_QUEUED <---[sweep && age>promotion]-------- + | | ^ | + | [mark] | | + [sweep] | [write barrier] | + | v | | + ----- GC_MARKED <-------- | + | | | + --[quicksweep]-- | + | === above this line objects are old + ----[new]------> GC_CLEAN ------[mark]--------> GC_MARKED_NOESC + | ^ ^ | | + | | | | | + <---[sweep]-------- | ------[sweep && age<=promotion]--- | + | | + --[quicksweep && age<=promotion]------ + */ + +// A quick sweep is a sweep where sweep_mask == GC_MARKED_NOESC. It means we won't touch GC_MARKED objects. + +// When a reachable object has survived more than PROMOTE_AGE+1 collections +// it is tagged with GC_QUEUED during sweep and will be promoted on next mark +// because at that point we can know easily if it references young objects. +// Marked old objects that reference young ones are kept in the remset. + +// When a write barrier triggers, the offending marked object is both queued, +// so as not to trigger the barrier again, and put in the remset. + + +#define PROMOTE_AGE 1 +// this cannot be increased as is without changing : +// - sweep_page which is specialized for 1bit age +// - the size of the age storage in region_t + + +static int64_t scanned_bytes; // young bytes scanned while marking +static int64_t perm_scanned_bytes; // old bytes scanned while marking +static int prev_sweep_mask = GC_MARKED; +static size_t scanned_bytes_goal; + +#define gc_bits(o) (((gcval_t*)(o))->gc_bits) +#define gc_marked(o) (((gcval_t*)(o))->gc_bits & GC_MARKED) +#define _gc_setmark(o, mark_mode) (((gcval_t*)(o))->gc_bits = mark_mode) + +// mark verification +#ifdef GC_VERIFY +static jl_value_t* lostval = 0; +static arraylist_t lostval_parents; +static arraylist_t lostval_parents_done; +static int verifying; + +static void add_lostval_parent(jl_value_t* parent) +{ + for(int i = 0; i < lostval_parents_done.len; i++) { + if((jl_value_t*)lostval_parents_done.items[i] == parent) + return; + } + for(int i = 0; i < lostval_parents.len; i++) { + if((jl_value_t*)lostval_parents.items[i] == parent) + return; + } + arraylist_push(&lostval_parents, parent); +} + +#define verify_val(v) do { \ + if(lostval == (jl_value_t*)(v) && (v) != 0) { \ + JL_PRINTF(JL_STDOUT, \ + "Found lostval 0x%lx at %s:%d oftype: ", \ + (uintptr_t)(lostval), __FILE__, __LINE__); \ + jl_static_show(JL_STDOUT, jl_typeof(v)); \ + JL_PRINTF(JL_STDOUT, "\n"); \ + } \ + } while(0); + + +#define verify_parent(ty, obj, slot, args...) do { \ + if(*(jl_value_t**)(slot) == lostval && (obj) != lostval) { \ + JL_PRINTF(JL_STDOUT, "Found parent %s 0x%lx at %s:%d\n", \ + ty, (uintptr_t)(obj), __FILE__, __LINE__); \ + JL_PRINTF(JL_STDOUT, "\tloc 0x%lx : ", (uintptr_t)(slot)); \ + JL_PRINTF(JL_STDOUT, args); \ + JL_PRINTF(JL_STDOUT, "\n"); \ + JL_PRINTF(JL_STDOUT, "\ttype: "); \ + jl_static_show(JL_STDOUT, jl_typeof(obj)); \ + JL_PRINTF(JL_STDOUT, "\n"); \ + add_lostval_parent((jl_value_t*)(obj)); \ + } \ + } while(0); + +#else +#define verify_val(v) +#define verify_parent(ty,obj,slot,args...) +#endif + +#ifdef OBJPROFILE +static void *BUFFTY = (void*)0xdeadb00f; +#endif +static void *MATY = (void*)0xdeadaa01; +static size_t array_nbytes(jl_array_t*); +static inline void objprofile_count(void* ty, int old, int sz) +{ +#ifdef OBJPROFILE +#ifdef GC_VERIFY + if (verifying) return; +#endif + if ((intptr_t)ty <= 0x10) + ty = BUFFTY; + void **bp = ptrhash_bp(&obj_counts[old], ty); + if (*bp == HT_NOTFOUND) + *bp = (void*)2; + else + (*((ptrint_t*)bp))++; + bp = ptrhash_bp(&obj_sizes[old], ty); + if (*bp == HT_NOTFOUND) + *bp = (void*)(1 + sz); + else + *((ptrint_t*)bp) += sz; +#endif +} + +static inline void gc_setmark_other(void *o, int mark_mode) +{ + _gc_setmark(o, mark_mode); + verify_val(o); +} + +#define inc_sat(v,s) v = (v) >= s ? s : (v)+1 + +static inline int gc_setmark_big(void *o, int mark_mode) +{ +#ifdef GC_VERIFY + if (verifying) { + _gc_setmark(o, mark_mode); + return 0; + } +#endif + bigval_t* hdr = bigval_header(o); + int bits = gc_bits(o); + if (bits == GC_QUEUED || bits == GC_MARKED) + mark_mode = GC_MARKED; + if ((mark_mode == GC_MARKED) & (bits != GC_MARKED)) { + *hdr->prev = hdr->next; + if (hdr->next) + hdr->next->prev = hdr->prev; + hdr->next = big_objects_marked; + hdr->prev = &big_objects_marked; + if (big_objects_marked) + big_objects_marked->prev = &hdr->next; + big_objects_marked = hdr; + } + if (!(bits & GC_MARKED)) { + if (mark_mode == GC_MARKED) + perm_scanned_bytes += hdr->sz; + else + scanned_bytes += hdr->sz; +#ifdef OBJPROFILE + objprofile_count(jl_typeof(o), mark_mode == GC_MARKED, hdr->sz); +#endif + } + _gc_setmark(o, mark_mode); + verify_val(o); + return mark_mode; +} + +static inline int gc_setmark_pool(void *o, int mark_mode) +{ +#ifdef GC_VERIFY + if (verifying) { + _gc_setmark(o, mark_mode); + return mark_mode; + } +#endif + gcpage_t* page = page_metadata(o); + int bits = gc_bits(o); + if (bits == GC_QUEUED || bits == GC_MARKED) { + mark_mode = GC_MARKED; + } + if (!(bits & GC_MARKED)) { + if (mark_mode == GC_MARKED) + perm_scanned_bytes += page->osize; + else + scanned_bytes += page->osize; +#ifdef OBJPROFILE + objprofile_count(jl_typeof(o), mark_mode == GC_MARKED, page->osize); +#endif + } + _gc_setmark(o, mark_mode); + page->gc_bits |= mark_mode; + verify_val(o); + return mark_mode; +} + + +static inline int gc_setmark(void *o, int sz, int mark_mode) +{ +#ifdef MEMDEBUG + return gc_setmark_big(o, mark_mode); +#endif + if (sz <= 2048) + return gc_setmark_pool(o, mark_mode); + else + return gc_setmark_big(o, mark_mode); +} + +#define gc_typeof(v) ((jl_value_t*)(((uptrint_t)jl_typeof(v))&(~(uintptr_t)3))) +#define gc_val_buf(o) ((buff_t*)(((void**)(o))-1)) + +inline void gc_setmark_buf(void *o, int mark_mode) +{ + buff_t *buf = gc_val_buf(o); +#ifdef MEMDEBUG + gc_setmark_big(buf, mark_mode); + return; +#endif + if (buf->pooled) + gc_setmark_pool(buf, mark_mode); + else + gc_setmark_big(buf, mark_mode); +} // malloc wrappers, aligned allocation @@ -134,10 +456,107 @@ static inline void *malloc_a16(size_t sz) #endif +static __attribute__((noinline)) void *malloc_page(void) +{ + void *ptr = (void*)0; + int i; + region_t* heap; + int heap_i = 0; + while(heap_i < HEAP_COUNT) { + heap = heaps[heap_i]; + if (heap == NULL) { +#ifdef _OS_WINDOWS_ + char* mem = VirtualAlloc(NULL, sizeof(region_t) + GC_PAGE_SZ, MEM_RESERVE, PAGE_READWRITE); +#else + char* mem = mmap(0, sizeof(region_t) + GC_PAGE_SZ, PROT_READ | PROT_WRITE, MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + mem = mem == MAP_FAILED ? NULL : mem; +#endif + if (mem == NULL) { + jl_printf(JL_STDERR, "could not allocate pools\n"); + abort(); + } + heap = (region_t*)((char*)GC_PAGE_DATA(mem + REGION_PG_COUNT/8 + GC_PAGE_SZ - 1) - REGION_PG_COUNT/8); + heaps[heap_i] = heap; +#ifdef _OS_WINDOWS_ + VirtualAlloc(heap->freemap, REGION_PG_COUNT/8, MEM_COMMIT, PAGE_READWRITE); + VirtualAlloc(heap->meta, REGION_PG_COUNT*sizeof(gcpage_t), MEM_COMMIT, PAGE_READWRITE); +#endif + memset(heap->freemap, 0xff, REGION_PG_COUNT/8); + } + for(i = heaps_lb[heap_i]; i < REGION_PG_COUNT/32; i++) { + if (heap->freemap[i]) break; + } + if (i == REGION_PG_COUNT/32) { + // heap full + heap_i++; + continue; + } + break; + } + if (heap_i >= HEAP_COUNT) { + jl_printf(JL_STDERR, "increase HEAP_COUNT or allocate less memory\n"); + abort(); + } + if (heaps_lb[heap_i] < i) + heaps_lb[heap_i] = i; + if (heaps_ub[heap_i] < i) + heaps_ub[heap_i] = i; + +#ifdef __MINGW32__ + int j = __builtin_ffs(heap->freemap[i]) - 1; +#elif _MSC_VER + int j; + _BitScanForward(&j, heap->freemap[i]); +#else + int j = ffs(heap->freemap[i]) - 1; +#endif + + heap->freemap[i] &= ~(uint32_t)(1 << j); + ptr = heap->pages[i*32 + j]; +#ifdef _OS_WINDOWS_ + VirtualAlloc(ptr, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); +#endif + current_pg_count++; + max_pg_count = max_pg_count < current_pg_count ? current_pg_count : max_pg_count; + return ptr; +} + +static void free_page(void *p) +{ + int pg_idx = -1; + int i; + for(i = 0; i < HEAP_COUNT && heaps[i] != NULL; i++) { + pg_idx = ((char*)p - (char*)&heaps[i]->pages[0])/GC_PAGE_SZ; + if (pg_idx >= 0 && pg_idx < REGION_PG_COUNT) break; + } + assert(i < HEAP_COUNT && heaps[i] != NULL); + region_t *heap = heaps[i]; + uint32_t msk = (uint32_t)(1 << ((pg_idx % 32))); + assert(!(heap->freemap[pg_idx/32] & msk)); + heap->freemap[pg_idx/32] ^= msk; + free(heap->meta[pg_idx].ages); +#ifdef _OS_WINDOWS_ + VirtualFree(p, GC_PAGE_SZ, MEM_DECOMMIT); +#else + madvise(p, GC_PAGE_SZ, MADV_DONTNEED); +#endif + if (heaps_lb[i] > pg_idx/32) heaps_lb[i] = pg_idx/32; + current_pg_count--; +} + +#define should_collect() (__unlikely(allocd_bytes>0)) +static inline int maybe_collect(void) +{ + if (should_collect()) { + jl_gc_collect(0); + return 1; + } + return 0; +} + DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { - if (allocd_bytes > collect_interval) - jl_gc_collect(); + maybe_collect(); allocd_bytes += sz; void *b = malloc(sz); if (b == NULL) @@ -153,10 +572,8 @@ DLLEXPORT void jl_gc_counted_free(void *p, size_t sz) DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) { - if (allocd_bytes > collect_interval) - jl_gc_collect(); - if (sz > old) - allocd_bytes += (sz-old); + maybe_collect(); + allocd_bytes += (sz-old); void *b = realloc(p, sz); if (b == NULL) jl_throw(jl_memory_exception); @@ -165,20 +582,18 @@ DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t void *jl_gc_managed_malloc(size_t sz) { - if (allocd_bytes > collect_interval) - jl_gc_collect(); + maybe_collect(); + allocd_bytes += sz; sz = (sz+15) & -16; void *b = malloc_a16(sz); if (b == NULL) jl_throw(jl_memory_exception); - allocd_bytes += sz; return b; } -void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned) +void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t* owner) { - if (allocd_bytes > collect_interval) - jl_gc_collect(); + maybe_collect(); sz = (sz+15) & -16; void *b; #ifdef _P64 @@ -200,7 +615,10 @@ void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned) #endif if (b == NULL) jl_throw(jl_memory_exception); - allocd_bytes += sz; + if (gc_bits(owner) == GC_MARKED) + perm_scanned_bytes += (sz - oldsz); + else + allocd_bytes += (sz - oldsz); return b; } @@ -262,49 +680,47 @@ static void sweep_weak_refs(void) } // finalization - -static htable_t finalizer_table; +static arraylist_t finalizer_list; +static arraylist_t finalizer_list_marked; static arraylist_t to_finalize; -static void schedule_finalization(void *o) +static void schedule_finalization(void *o, void *f) { arraylist_push(&to_finalize, o); + arraylist_push(&to_finalize, f); } static void run_finalizer(jl_value_t *o, jl_value_t *ff) { - jl_function_t *f; - while (1) { - if (jl_is_tuple(ff)) - f = (jl_function_t*)jl_t0(ff); - else - f = (jl_function_t*)ff; - assert(jl_is_function(f)); - JL_TRY { - jl_apply(f, (jl_value_t**)&o, 1); - } - JL_CATCH { - JL_PRINTF(JL_STDERR, "error in running finalizer: "); - jl_static_show(JL_STDERR, jl_exception_in_transit); - JL_PUTC('\n',JL_STDERR); - } - if (jl_is_tuple(ff)) - ff = jl_t1(ff); - else - break; + jl_function_t *f = (jl_function_t*)ff; + assert(jl_is_function(f)); + JL_TRY { + jl_apply(f, (jl_value_t**)&o, 1); + } + JL_CATCH { + JL_PRINTF(JL_STDERR, "error in running finalizer: "); + jl_static_show(JL_STDERR, jl_exception_in_transit); + JL_PUTC('\n',JL_STDERR); } } static int finalize_object(jl_value_t *o) { - jl_value_t *ff = NULL; int success = 0; - JL_GC_PUSH1(&ff); - ff = (jl_value_t*)ptrhash_get(&finalizer_table, o); - if (ff != HT_NOTFOUND) { - ptrhash_remove(&finalizer_table, o); - run_finalizer((jl_value_t*)o, ff); - success = 1; + jl_value_t *f = NULL; + JL_GC_PUSH1(&f); + for(int i = 0; i < finalizer_list.len; i+=2) { + if (o == (jl_value_t*)finalizer_list.items[i]) { + f = (jl_value_t*)finalizer_list.items[i+1]; + if (i < finalizer_list.len - 2) { + finalizer_list.items[i] = finalizer_list.items[finalizer_list.len-2]; + finalizer_list.items[i+1] = finalizer_list.items[finalizer_list.len-1]; + i -= 2; + } + finalizer_list.len -= 2; + run_finalizer(o, f); + success = 1; + } } JL_GC_POP(); return success; @@ -312,11 +728,12 @@ static int finalize_object(jl_value_t *o) static void run_finalizers(void) { - void *o = NULL; - JL_GC_PUSH1(&o); + void *o = NULL, *f = NULL; + JL_GC_PUSH2(&o, &f); while (to_finalize.len > 0) { + f = arraylist_pop(&to_finalize); o = arraylist_pop(&to_finalize); - int ok = finalize_object((jl_value_t*)o); + int ok = 1;run_finalizer((jl_value_t*)o, (jl_value_t*)f); assert(ok); (void)ok; } JL_GC_POP(); @@ -324,10 +741,10 @@ static void run_finalizers(void) void jl_gc_run_all_finalizers(void) { - for(size_t i=0; i < finalizer_table.size; i+=2) { - jl_value_t *f = (jl_value_t*)finalizer_table.table[i+1]; + for(size_t i=0; i < finalizer_list.len; i+=2) { + jl_value_t *f = (jl_value_t*)finalizer_list.items[i+1]; if (f != HT_NOTFOUND && !jl_is_cpointer(f)) { - schedule_finalization(finalizer_table.table[i]); + schedule_finalization(finalizer_list.items[i], finalizer_list.items[i+1]); } } run_finalizers(); @@ -335,13 +752,8 @@ void jl_gc_run_all_finalizers(void) void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) { - jl_value_t **bp = (jl_value_t**)ptrhash_bp(&finalizer_table, v); - if (*bp == HT_NOTFOUND) { - *bp = (jl_value_t*)f; - } - else { - *bp = (jl_value_t*)jl_tuple2((jl_value_t*)f, *bp); - } + arraylist_push(&finalizer_list, (void*)v); + arraylist_push(&finalizer_list, (void*)f); } void jl_finalize(jl_value_t *o) @@ -351,12 +763,9 @@ void jl_finalize(jl_value_t *o) // big value list -static bigval_t *big_objects = NULL; - -static void *alloc_big(size_t sz) +static __attribute__((noinline)) void *alloc_big(size_t sz) { - if (allocd_bytes > collect_interval) - jl_gc_collect(); + maybe_collect(); size_t offs = BVOFFS*sizeof(void*); if (sz+offs+15 < offs+15) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); @@ -366,35 +775,78 @@ static void *alloc_big(size_t sz) if (v == NULL) jl_throw(jl_memory_exception); #ifdef MEMDEBUG - //memset(v, 0xee, allocsz); + memset(v, 0xee, allocsz); #endif - v->sz = sz; + v->sz = allocsz; v->flags = 0; + v->age = 0; v->next = big_objects; + v->prev = &big_objects; + if (v->next) + v->next->prev = &v->next; big_objects = v; - return &v->_data[0]; + void* ptr = &v->_data[0]; + return ptr; } -static void sweep_big(void) +static int big_total; +static int big_freed; +static int big_reset; + +static bigval_t** sweep_big_list(int sweep_mask, bigval_t** pv) { - bigval_t *v = big_objects; - bigval_t **pv = &big_objects; + bigval_t *v = *pv; while (v != NULL) { bigval_t *nxt = v->next; - if (v->marked) { + if (gc_marked(&v->_data)) { pv = &v->next; - v->marked = 0; + int age = v->age; + int bits = gc_bits(&v->_data); + if (age >= PROMOTE_AGE) { + if (sweep_mask == GC_MARKED || bits == GC_MARKED_NOESC) { + bits = GC_QUEUED; + } + } + else { + inc_sat(age, PROMOTE_AGE); + v->age = age; + if ((sweep_mask & bits) == sweep_mask) { + bits = GC_CLEAN; + big_reset++; + } + } + gc_bits(&v->_data) = bits; } else { *pv = nxt; + if (nxt) + nxt->prev = pv; freed_bytes += v->sz; #ifdef MEMDEBUG - memset(v, 0xbb, v->sz+BVOFFS*sizeof(void*)); + memset(v, 0xbb, v->sz); #endif free_a16(v); + big_freed++; } + big_total++; v = nxt; } + return pv; +} + +static void sweep_big(int sweep_mask) +{ + sweep_big_list(sweep_mask, &big_objects); + if (sweep_mask == GC_MARKED) { + bigval_t** last_next = sweep_big_list(sweep_mask, &big_objects_marked); + if (big_objects) + big_objects->prev = last_next; + *last_next = big_objects; + big_objects = big_objects_marked; + if (big_objects) + big_objects->prev = &big_objects; + big_objects_marked = NULL; + } } // tracking Arrays with malloc'd storage @@ -422,12 +874,19 @@ void jl_gc_track_malloced_array(jl_array_t *a) mallocarrays = ma; } +void jl_gc_count_allocd(size_t sz) +{ + allocd_bytes += sz; +} + static size_t array_nbytes(jl_array_t *a) { + size_t sz = 0; if (jl_array_ndims(a)==1) - return a->elsize * a->maxsize; + sz = a->elsize * a->maxsize + (a->elsize == 1 ? 1 : 0); else - return a->elsize * jl_array_len(a); + sz = a->elsize * jl_array_len(a); + return sz; } void jl_gc_free_array(jl_array_t *a) @@ -442,7 +901,11 @@ void jl_gc_free_array(jl_array_t *a) } } -static void sweep_malloced_arrays() +static int mallocd_array_total; +static int mallocd_array_freed; + + +static void sweep_malloced_arrays(void) { mallocarray_t *ma = mallocarrays; mallocarray_t **pma = &mallocarrays; @@ -457,60 +920,112 @@ static void sweep_malloced_arrays() jl_gc_free_array(ma->a); ma->next = mafreelist; mafreelist = ma; + mallocd_array_freed++; } + mallocd_array_total++; ma = nxt; } } // pool allocation +static inline gcval_t *reset_page(pool_t *p, gcpage_t *pg, gcval_t *fl) +{ + pg->gc_bits = 0; + pg->nfree = GC_PAGE_SZ/p->osize; + pg->pool_n = p - norm_pools; + memset(page_age(pg), 0, (GC_PAGE_SZ/p->osize + 7)/8); + gcval_t *beg = (gcval_t*)pg->data; + gcval_t *end = (gcval_t*)((char*)beg + (pg->nfree - 1)*p->osize); + end->next = fl; + pg->allocd = 0; + pg->fl_begin_offset = 0; + pg->fl_end_offset = (char*)end - (char*)beg; + return beg; +} -#define N_POOLS 42 -static pool_t norm_pools[N_POOLS]; -static pool_t ephe_pools[N_POOLS]; -static pool_t *pools = &norm_pools[0]; - -static void add_page(pool_t *p) +static __attribute__((noinline)) void add_page(pool_t *p) { -#ifdef USE_MMAP - gcpage_t *pg = (gcpage_t*)mmap(NULL, sizeof(gcpage_t), PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); -#else - gcpage_t *pg = (gcpage_t*)malloc_a16(sizeof(gcpage_t)); -#endif - if (pg == NULL) + char *data = malloc_page(); + if (data == NULL) jl_throw(jl_memory_exception); - gcval_t *v = (gcval_t*)&pg->data[0]; - char *lim = (char*)v + GC_PAGE_SZ - p->osize; - gcval_t *fl; - gcval_t **pfl = &fl; - while ((char*)v <= lim) { - *pfl = v; - pfl = &v->next; - v = (gcval_t*)((char*)v + p->osize); - } - // these statements are ordered so that interrupting after any of them - // leaves the system in a valid state - *pfl = p->freelist; - pg->next = p->pages; - p->pages = pg; - p->freelist = fl; + gcpage_t *pg = page_metadata(data); + pg->data = data; + pg->osize = p->osize; + pg->ages = malloc((GC_PAGE_SZ/p->osize + 7)/8); + gcval_t *fl = reset_page(p, pg, p->newpages); + p->newpages = fl; } -static inline void *pool_alloc(pool_t *p) +static inline void *__pool_alloc(pool_t* p, int osize, int end_offset) { - if (allocd_bytes > collect_interval) - jl_gc_collect(); - allocd_bytes += p->osize; - if (p->freelist == NULL) { + gcval_t *v, *end; + if (__unlikely((allocd_bytes += osize) >= 0)) { + //allocd_bytes -= osize; + jl_gc_collect(0); + //allocd_bytes += osize; + } + // first try to use the freelist + v = p->freelist; + if (v) { + gcval_t* next = v->next; + v->flags = 0; + p->nfree--; + p->freelist = next; + if (__unlikely(GC_PAGE_DATA(v) != GC_PAGE_DATA(next))) { + // we only update pg's fields when the freelist changes page + // since pg's metadata is likely not in cache + gcpage_t* pg = page_metadata(v); + pg->nfree = 0; + pg->allocd = 1; + if (next) + p->nfree = page_metadata(next)->nfree; + } + return v; + } + // if the freelist is empty we reuse empty but not freed pages + v = p->newpages; + if (__unlikely(!v)) { add_page(p); + v = p->newpages; + } + end = (gcval_t*)&(GC_PAGE_DATA(v)[end_offset]); + if (__likely(v != end)) { + p->newpages = (gcval_t*)((char*)v + osize); + } else { + // like in the freelist case, only update the page metadata when it is full + gcpage_t* pg = page_metadata(v); + pg->nfree = 0; + pg->allocd = 1; + p->newpages = v->next; } - assert(p->freelist != NULL); - gcval_t *v = p->freelist; - p->freelist = p->freelist->next; v->flags = 0; return v; } +// use this variant when osize is statically known +// GC_POOL_END_OFS uses an integer division +static inline void *_pool_alloc(pool_t *p, int osize) +{ + return __pool_alloc(p, osize, GC_POOL_END_OFS(osize)); +} + +static inline void *pool_alloc(pool_t *p) +{ + return __pool_alloc(p, p->osize, p->end_offset); +} + +static const int sizeclasses[N_POOLS] = { + 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, + 64, 72, 80, 88, 96, //#=18 + + 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, + + 288, 320, 352, 384, 416, 448, 480, 512, + + 640, 768, 896, 1024, + + 1536, 2048 }; + static int szclass(size_t sz) { #ifndef _P64 @@ -527,124 +1042,328 @@ static int szclass(size_t sz) return 41; } -static void sweep_pool(pool_t *p) +int check_timeout = 0; +#define should_timeout() 0 + +// sweep phase + +static int skipped_pages = 0; +static int total_pages = 0; +static int freed_pages = 0; +static int lazy_freed_pages = 0; +static int page_done = 0; +static gcval_t** sweep_page(pool_t* p, gcpage_t* pg, gcval_t **pfl,int,int); +static void sweep_pool_region(int heap_i, int sweep_mask) +{ + region_t* heap = heaps[heap_i]; + gcval_t **pfl[N_POOLS]; + + // update metadata of pages that were pointed to by freelist or newpages from a pool + // i.e. pages being the current allocation target + for (int i = 0; i < N_POOLS; i++) { + gcval_t* last = norm_pools[i].freelist; + if (last) { + gcpage_t* pg = page_metadata(last); + pg->allocd = 1; + pg->nfree = norm_pools[i].nfree; + } + norm_pools[i].freelist = NULL; + pfl[i] = &norm_pools[i].freelist; + + last = norm_pools[i].newpages; + if (last) { + gcpage_t* pg = page_metadata(last); + pg->nfree = (GC_PAGE_SZ - ((char*)last - GC_PAGE_DATA(last)))/norm_pools[i].osize; + pg->allocd = 1; + } + norm_pools[i].newpages = NULL; + } + + // the actual sweeping + int ub = 0; + int lb = heaps_lb[heap_i]; + for (int pg_i = 0; pg_i <= heaps_ub[heap_i]; pg_i++) { + uint32_t line = heap->freemap[pg_i]; + if (!!~line) { + ub = pg_i; + for (int j = 0; j < 32; j++) { + if (!((line >> j) & 1)) { + gcpage_t *pg = &heap->meta[pg_i*32 + j]; + int p_n = pg->pool_n; + pool_t *p = &norm_pools[p_n]; + int osize = pg->osize; + pfl[p_n] = sweep_page(p, pg, pfl[p_n], sweep_mask, osize); + } + } + } else if (pg_i < lb) lb = pg_i; + } + heaps_ub[heap_i] = ub; + heaps_lb[heap_i] = lb; + + // cache back pg->nfree in the pool_t + int i = 0; + for (pool_t* p = norm_pools; p < norm_pools + N_POOLS; p++) { + *pfl[i++] = NULL; + if (p->freelist) { + p->nfree = page_metadata(p->freelist)->nfree; + } + } +} + +static gcval_t** sweep_page(pool_t* p, gcpage_t* pg, gcval_t **pfl, int sweep_mask, int osize) { #ifdef FREE_PAGES_EAGER int freedall; #else int empty; #endif - gcval_t **prev_pfl; + gcval_t **prev_pfl = pfl; gcval_t *v; - gcpage_t *pg = p->pages; - gcpage_t **ppg = &p->pages; - gcval_t **pfl = &p->freelist; - size_t osize = p->osize; - size_t nfreed = 0; - - size_t old_nfree = 0; - gcval_t *ofl = p->freelist; - while (ofl != NULL) { - old_nfree++; - ofl = ofl->next; + size_t old_nfree = 0, nfree = 0; + int pg_freedall = 0, pg_total = 0, pg_skpd = 0; + int obj_per_page = GC_PAGE_SZ/osize; + char *data = pg->data; + char *ages = page_age(pg); + v = (gcval_t*)data; + char *lim = (char*)v + GC_PAGE_SZ - osize; + freedall = 1; + old_nfree += pg->nfree; + + if (pg->gc_bits == GC_MARKED) { + // this page only contains GC_MARKED and free cells + // if we are doing a quick sweep and nothing has been allocated inside since last sweep + // we can skip it + if (sweep_mask == GC_MARKED_NOESC && !pg->allocd) { + // the position of the freelist begin/end in this page is stored in it's metadata + if (pg->fl_begin_offset != (uint16_t)-1) { + *pfl = (gcval_t*)PAGE_PFL_BEG(pg); + pfl = prev_pfl = PAGE_PFL_END(pg); + } + pg_skpd++; + freedall = 0; + goto free_page; + } + } + else if(pg->gc_bits == GC_CLEAN) { + goto free_page; } - while (pg != NULL) { - v = (gcval_t*)&pg->data[0]; - char *lim = (char*)v + GC_PAGE_SZ - osize; -#ifdef FREE_PAGES_EAGER - freedall = 1; -#else - empty = 1; -#endif - prev_pfl = pfl; - while ((char*)v <= lim) { - if (!v->marked) { -#ifndef FREE_PAGES_EAGER - // check that all but last object points to its next object, - // which is a heuristic check for being on the freelist. - if ((char*)v->next != (char*)v + osize && v->next != NULL && - (char*)v+osize <= lim) - empty = 0; -#endif - *pfl = v; - pfl = &v->next; - nfreed++; + int pg_nfree = 0; + gcval_t **pfl_begin = NULL; + unsigned char msk = 1; // mask for the age bit in the current age byte + while ((char*)v <= lim) { + int bits = gc_bits(v); + if (!(bits & GC_MARKED)) { + *pfl = v; + pfl = &v->next; + pfl_begin = pfl_begin ? pfl_begin : pfl; + pg_nfree++; + *ages &= ~msk; + } + else { // marked young or old + if (*ages & msk) { // old enough + if (sweep_mask == GC_MARKED || bits == GC_MARKED_NOESC) { + gc_bits(v) = GC_QUEUED; // promote + } } - else { - v->marked = 0; -#ifdef FREE_PAGES_EAGER - freedall = 0; -#else - empty = 0; -#endif + else if ((sweep_mask & bits) == sweep_mask) { + gc_bits(v) = GC_CLEAN; // unmark } - v = (gcval_t*)((char*)v + osize); + *ages |= msk; + freedall = 0; } - gcpage_t *nextpg = pg->next; - // lazy version: (empty) if the whole page was already unused, free it - // eager version: (freedall) free page as soon as possible - // the eager one uses less memory. - if ( -#ifdef FREE_PAGES_EAGER - freedall -#else - empty -#endif - ) { + v = (gcval_t*)((char*)v + osize); + msk *= 2; + if (!msk) { + msk = 1; + ages++; + } + } + + pg->fl_begin_offset = pfl_begin ? (char*)pfl_begin - data : (uint16_t)-1; + pg->fl_end_offset = pfl_begin ? (char*)pfl - data : (uint16_t)-1; + + pg->nfree = pg_nfree; + page_done++; + pg->allocd = 0; + free_page: + pg_freedall += freedall; + + // lazy version: (empty) if the whole page was already unused, free it + // eager version: (freedall) free page as soon as possible + // the eager one uses less memory. + pg_total++; + if (freedall) { + // on quick sweeps, keep a few pages empty but allocated for performance + if (sweep_mask == GC_MARKED_NOESC && lazy_freed_pages <= default_collect_interval/GC_PAGE_SZ) { + gcval_t *begin = reset_page(p, pg, 0); + gcval_t** pend = (gcval_t**)((char*)begin + ((int)pg->nfree - 1)*osize); + gcval_t* npg = p->newpages; + *pend = npg; + p->newpages = begin; + begin->next = (gcval_t*)0; + lazy_freed_pages++; + pfl = prev_pfl; + } + else { pfl = prev_pfl; - *ppg = nextpg; #ifdef MEMDEBUG - memset(pg, 0xbb, sizeof(gcpage_t)); + memset(pg->data, 0xbb, GC_PAGE_SZ); #endif -#ifdef USE_MMAP - munmap(pg, sizeof(gcpage_t)); -#else - free_a16(pg); + free_page(data); +#ifdef MEMDEBUG + memset(pg, 0xbb, sizeof(gcpage_t)); #endif - //freed_bytes += GC_PAGE_SZ; - } - else { - ppg = &pg->next; } - pg = nextpg; + freed_pages++; + nfree += obj_per_page; } - *pfl = NULL; - freed_bytes += (nfreed - old_nfree)*osize; -} - -// sweep phase + else { + if (sweep_mask == GC_MARKED) + pg->gc_bits = GC_CLEAN; + if (sweep_mask == GC_MARKED_NOESC) + pg->gc_bits = GC_MARKED; + nfree += pg->nfree; + } + + skipped_pages += pg_skpd; + total_pages += pg_total; + freed_bytes += (nfree - old_nfree)*osize; + return pfl; +} extern void jl_unmark_symbols(void); -static void gc_sweep(void) +static void gc_sweep_once(int sweep_mask) { +#ifdef GC_TIME + double t0 = clock_now(); + mallocd_array_total = 0; + mallocd_array_freed = 0; +#endif sweep_malloced_arrays(); - sweep_big(); - int i; - for(i=0; i < N_POOLS; i++) { - sweep_pool(&norm_pools[i]); - sweep_pool(&ephe_pools[i]); +#ifdef GC_TIME + JL_PRINTF(JL_STDOUT, "GC sweep arrays %.2f (freed %d/%d)\n", (clock_now() - t0)*1000, mallocd_array_freed, mallocd_array_total); + t0 = clock_now(); + big_total = 0; + big_freed = 0; + big_reset = 0; +#endif + sweep_big(sweep_mask); +#ifdef GC_TIME + JL_PRINTF(JL_STDOUT, "GC sweep big %.2f (freed %d/%d with %d rst)\n", (clock_now() - t0)*1000, big_freed, big_total, big_reset); + t0 = clock_now(); +#endif + if (sweep_mask == GC_MARKED) + jl_unmark_symbols(); +#ifdef GC_TIME + JL_PRINTF(JL_STDOUT, "GC sweep symbols %.2f\n", (clock_now() - t0)*1000); +#endif +} + +// returns 0 if not finished +static int gc_sweep_inc(int sweep_mask) +{ +#ifdef GC_TIME + double t0 = clock_now(); +#endif + skipped_pages = 0; + total_pages = 0; + freed_pages = 0; + lazy_freed_pages = 0; + page_done = 0; + int finished = 1; + + for (int i = 0; i < HEAP_COUNT; i++) { + if (heaps[i]) + /*finished &= */sweep_pool_region(i, sweep_mask); } - jl_unmark_symbols(); + +#ifdef GC_TIME + double sweep_pool_sec = clock_now() - t0; + double sweep_speed = ((((double)total_pages)*GC_PAGE_SZ)/(1024*1024*1024))/sweep_pool_sec; + JL_PRINTF(JL_STDOUT, "GC sweep pools %s %.2f at %.1f GB/s (skipped %d%% of %d, done %d pgs, %d freed with %d lazily) mask %d\n", finished ? "end" : "inc", sweep_pool_sec*1000, sweep_speed, total_pages ? (skipped_pages*100)/total_pages : 0, total_pages, page_done, freed_pages, lazy_freed_pages, sweep_mask); +#endif + return finished; } // mark phase -static jl_value_t **mark_stack = NULL; -static size_t mark_stack_size = 0; -static size_t mark_sp = 0; +jl_value_t **mark_stack = NULL; +jl_value_t **mark_stack_base = NULL; +size_t mark_stack_size = 0; +size_t mark_sp = 0; +size_t perm_marked = 0; -static void push_root(jl_value_t *v, int d); -#define gc_push_root(v,d) do { assert(v != NULL); if (!gc_marked(v)) { push_root((jl_value_t*)(v),d); } } while (0) +void grow_mark_stack(void) +{ + size_t newsz = mark_stack_size>0 ? mark_stack_size*2 : 32000; + size_t offset = mark_stack - mark_stack_base; + mark_stack_base = (jl_value_t**)realloc(mark_stack_base, newsz*sizeof(void*)); + if (mark_stack_base == NULL) { + JL_PRINTF(JL_STDERR, "Could'nt grow mark stack to : %d\n", newsz); + exit(1); + } + mark_stack = mark_stack_base + offset; + mark_stack_size = newsz; +} + +int max_msp = 0; + +static arraylist_t tasks; +static arraylist_t rem_bindings; +static arraylist_t _remset[2]; +static arraylist_t *remset = &_remset[0]; +static arraylist_t *last_remset = &_remset[1]; +void reset_remset(void) +{ + arraylist_t *tmp = remset; + remset = last_remset; + last_remset = tmp; + remset->len = 0; +} + +DLLEXPORT void gc_queue_root(void *ptr) +{ + assert(gc_bits(ptr) != GC_QUEUED); + gc_bits(ptr) = GC_QUEUED; + arraylist_push(remset, ptr); +} +void gc_queue_binding(void *bnd) +{ + assert(gc_bits(bnd) != GC_QUEUED); + gc_bits(bnd) = GC_QUEUED; + arraylist_push(&rem_bindings, (void*)((void**)bnd + 1)); +} + +static int push_root(jl_value_t *v, int d, int); +static inline int gc_push_root(void *v, int d) +{ + assert((v) != NULL); + verify_val(v); + int bits = gc_bits(v); + if (!gc_marked(v)) { + return push_root((jl_value_t*)(v),d, bits); + } + return bits; +} -void jl_gc_setmark(jl_value_t *v) +void jl_gc_setmark(jl_value_t *v) // TODO rename this as it is misleading now { - gc_setmark(v); + // int64_t s = perm_scanned_bytes; + if (!gc_marked(v)) { + // objprofile_count(jl_typeof(v), 1, 16); +#ifdef MEMDEBUG + gc_setmark_big(v, GC_MARKED_NOESC); +#else + gc_setmark_pool(v, GC_MARKED_NOESC); +#endif + } + // perm_scanned_bytes = s; } -static void gc_mark_stack(jl_gcframe_t *s, ptrint_t offset, int d) +static void gc_mark_stack(jl_value_t* ta, jl_gcframe_t *s, ptrint_t offset, int d) { while (s != NULL) { s = (jl_gcframe_t*)((char*)s + offset); @@ -659,26 +1378,36 @@ static void gc_mark_stack(jl_gcframe_t *s, ptrint_t offset, int d) } else { for(size_t i=0; i < nr; i++) { - if (rts[i] != NULL) + if (rts[i] != NULL) { + verify_parent("task", ta, &rts[i], "stack(%d)", i); gc_push_root(rts[i], d); + } } } s = s->prev; } } -static void gc_mark_module(jl_module_t *m, int d) +__attribute__((noinline)) static int gc_mark_module(jl_module_t *m, int d) { size_t i; + int refyoung = 0; void **table = m->bindings.table; for(i=1; i < m->bindings.size; i+=2) { if (table[i] != HT_NOTFOUND) { jl_binding_t *b = (jl_binding_t*)table[i]; - gc_setmark_buf(b); - if (b->value != NULL) - gc_push_root(b->value, d); - if (b->type != (jl_value_t*)jl_any_type) - gc_push_root(b->type, d); + gc_setmark_buf(b, gc_bits(m)); +#ifdef GC_VERIFY + void* vb = gc_val_buf(b); + verify_parent("module", m, &vb, "binding_buff"); +#endif + if (b->value != NULL) { + verify_parent("module", m, &b->value, "binding(%s)", b->name->name); + refyoung |= gc_push_root(b->value, d); + } + if (b->type != (jl_value_t*)jl_any_type) { + refyoung |= gc_push_root(b->type, d); + } } } // this is only necessary because bindings for "using" modules @@ -686,41 +1415,59 @@ static void gc_mark_module(jl_module_t *m, int d) // after "using" it but before accessing it, this array might // contain the only reference. for(i=0; i < m->usings.len; i++) { - gc_push_root(m->usings.items[i], d); + refyoung |= gc_push_root(m->usings.items[i], d); } - if (m->constant_table) - gc_push_root(m->constant_table, d); + if (m->constant_table) { + verify_parent("module", m, &m->constant_table, "constant_table"); + refyoung |= gc_push_root(m->constant_table, d); + } + return refyoung; } -static void gc_mark_task(jl_task_t *ta, int d) +static void gc_mark_task_stack(jl_task_t *ta, int d) { - if (ta->parent) gc_push_root(ta->parent, d); - if (ta->last) gc_push_root(ta->last, d); - gc_push_root(ta->tls, d); - gc_push_root(ta->consumers, d); - gc_push_root(ta->donenotify, d); - gc_push_root(ta->exception, d); - if (ta->start) gc_push_root(ta->start, d); - if (ta->result) gc_push_root(ta->result, d); if (ta->stkbuf != NULL || ta == jl_current_task) { - if (ta->stkbuf != NULL) - gc_setmark_buf(ta->stkbuf); + if (ta->stkbuf != NULL) { + gc_setmark_buf(ta->stkbuf, gc_bits(ta)); + } #ifdef COPY_STACKS ptrint_t offset; if (ta == jl_current_task) { offset = 0; - gc_mark_stack(jl_pgcstack, offset, d); + gc_mark_stack((jl_value_t*)ta, jl_pgcstack, offset, d); } else { offset = (char *)ta->stkbuf - ((char *)jl_stackbase - ta->ssize); - gc_mark_stack(ta->gcstack, offset, d); + gc_mark_stack((jl_value_t*)ta, ta->gcstack, offset, d); } #else - gc_mark_stack(ta->gcstack, 0, d); + gc_mark_stack((jl_value_t*)ta, ta->gcstack, 0, d); #endif } } +#if 0 +static void mark_task_stacks(void) { + for (int i = 0; i < tasks.len; i++) { + gc_mark_task_stack(tasks.items[i], 0); + } +} +#endif + +__attribute__((noinline)) static void gc_mark_task(jl_task_t *ta, int d) +{ + if (ta->parent) gc_push_root(ta->parent, d); + if (ta->last) gc_push_root(ta->last, d); + gc_push_root(ta->tls, d); + gc_push_root(ta->consumers, d); + gc_push_root(ta->donenotify, d); + gc_push_root(ta->exception, d); + if (ta->start) gc_push_root(ta->start, d); + if (ta->result) gc_push_root(ta->result, d); + gc_mark_task_stack(ta, d); +} + + // for chasing down unwanted references /* static jl_value_t *lookforme = NULL; @@ -728,53 +1475,85 @@ DLLEXPORT void jl_gc_lookfor(jl_value_t *v) { lookforme = v; } */ #define MAX_MARK_DEPTH 400 - -static void push_root(jl_value_t *v, int d) +// mark v and recurse on its children (or store them on the mark stack when recursion depth becomes too high) +// it does so assuming the gc bits of v are "bits" and returns the new bits of v +// if v becomes GC_MARKED (old) and some of its children are GC_MARKED_NOESC (young), v is added to the remset +static int push_root(jl_value_t *v, int d, int bits) { assert(v != NULL); jl_value_t *vt = (jl_value_t*)gc_typeof(v); + int refyoung = 0; -#ifdef OBJPROFILE - if (!gc_marked(v)) { - void **bp = ptrhash_bp(&obj_counts, vt); - if (*bp == HT_NOTFOUND) - *bp = (void*)2; - else - (*((ptrint_t*)bp))++; + if (vt == (jl_value_t*)jl_weakref_type) { + bits = gc_setmark(v, jl_datatype_size(jl_weakref_type), GC_MARKED_NOESC); + goto ret; } -#endif - - gc_setmark(v); - - if (vt == (jl_value_t*)jl_weakref_type || - (jl_is_datatype(vt) && ((jl_datatype_t*)vt)->pointerfree)) { - return; + if ((jl_is_datatype(vt) && ((jl_datatype_t*)vt)->pointerfree)) { + int sz = jl_datatype_size(vt); + bits = gc_setmark(v, sz, GC_MARKED_NOESC); + goto ret; } - - if (d >= MAX_MARK_DEPTH) - goto queue_the_root; +#define MARK(v, s) do { \ + s; \ + if (d >= MAX_MARK_DEPTH) \ + goto queue_the_root; \ + if (should_timeout()) \ + goto queue_the_root; \ + } while (0) d++; // some values have special representations if (vt == (jl_value_t*)jl_tuple_type) { size_t l = jl_tuple_len(v); + MARK(v, bits = gc_setmark(v, l*sizeof(void*) + sizeof(jl_tuple_t), GC_MARKED_NOESC)); jl_value_t **data = ((jl_tuple_t*)v)->data; for(size_t i=0; i < l; i++) { jl_value_t *elt = data[i]; - if (elt != NULL) - gc_push_root(elt, d); + if (elt != NULL) { + verify_parent("tuple", v, &data[i], "elem(%d)", i); + refyoung |= gc_push_root(elt, d); + } } } else if (((jl_datatype_t*)(vt))->name == jl_array_typename) { jl_array_t *a = (jl_array_t*)v; + int todo = !(bits & GC_MARKED); + if (a->pooled) + MARK(a, +#ifdef MEMDEBUG + bits = gc_setmark_big(a, GC_MARKED_NOESC); +#else + bits = gc_setmark_pool(a, GC_MARKED_NOESC); +#endif + if (a->how == 2 && todo) { + objprofile_count(MATY, gc_bits(a) == GC_MARKED, array_nbytes(a)); + if (gc_bits(a) == GC_MARKED) + perm_scanned_bytes += array_nbytes(a); + else + scanned_bytes += array_nbytes(a); + }); + else + MARK(a, + bits = gc_setmark_big(a, GC_MARKED_NOESC); + if (a->how == 2 && todo) { + objprofile_count(MATY, gc_bits(a) == GC_MARKED, array_nbytes(a)); + if (gc_bits(a) == GC_MARKED) + perm_scanned_bytes += array_nbytes(a); + else + scanned_bytes += array_nbytes(a); + }); if (a->how == 3) { jl_value_t *owner = jl_array_data_owner(a); - gc_push_root(owner, d); - return; + refyoung |= gc_push_root(owner, d); + goto ret; } else if (a->how == 1) { - gc_setmark_buf((char*)a->data - a->offset*a->elsize); +#ifdef GC_VERIFY + void* val_buf = gc_val_buf((char*)a->data - a->offset*a->elsize); + verify_parent("array", v, &val_buf, "buffer ('loc' addr is meaningless)"); +#endif + gc_setmark_buf((char*)a->data - a->offset*a->elsize, gc_bits(v)); } if (a->ptrarray && a->data!=NULL) { size_t l = jl_array_len(a); @@ -787,47 +1566,105 @@ static void push_root(jl_value_t *v, int d) void *data = a->data; for(size_t i=0; i < l; i++) { jl_value_t *elt = ((jl_value_t**)data)[i]; - if (elt != NULL) gc_push_root(elt, d); + if (elt != NULL) { + verify_parent("array", v, &((jl_value_t**)data)[i], "elem(%d)", i); + refyoung |= gc_push_root(elt, d); + } + // try to split large array marking (incremental mark TODO) + // if (should_timeout() && l > 1000) goto queue_the_root; } } } } else if (vt == (jl_value_t*)jl_module_type) { - gc_mark_module((jl_module_t*)v, d); + MARK(v, bits = gc_setmark(v, sizeof(jl_module_t), GC_MARKED_NOESC)); + refyoung |= gc_mark_module((jl_module_t*)v, d); } else if (vt == (jl_value_t*)jl_task_type) { + MARK(v, bits = gc_setmark(v, sizeof(jl_task_t), GC_MARKED_NOESC)); gc_mark_task((jl_task_t*)v, d); + // tasks should always be remarked since we do not trigger the write barrier + // for stores to stack slots + refyoung = GC_MARKED_NOESC; } - else { + else if(vt == (jl_value_t*)jl_symbol_type) { + gc_setmark_other(v, GC_MARKED); // symbols have their own allocator + } + else if( +#ifdef GC_VERIFY + // this check should not be needed but it helps catching corruptions early + gc_typeof(vt) == (jl_value_t*)jl_datatype_type +#else + 1 +#endif + ) { jl_datatype_t *dt = (jl_datatype_t*)vt; + MARK(v, bits = gc_setmark(v, jl_datatype_size(dt), GC_MARKED_NOESC)); int nf = (int)jl_tuple_len(dt->names); + // TODO check if there is a perf improvement for objects with a lot of fields + // int fdsz = sizeof(void*)*nf; + // void** children = alloca(fdsz); + // int ci = 0; + jl_fielddesc_t* fields = dt->fields; for(int i=0; i < nf; i++) { - if (dt->fields[i].isptr) { - jl_value_t *fld = *(jl_value_t**)((char*)v + dt->fields[i].offset + sizeof(void*)); - if (fld) - gc_push_root(fld, d); + if (fields[i].isptr) { + jl_value_t **slot = (jl_value_t**)((char*)v + fields[i].offset + sizeof(void*)); + jl_value_t *fld = *slot; + if (fld) { + verify_parent("object", v, slot, "field(%d)", i); + //children[ci++] = fld; + refyoung |= gc_push_root(fld, d); + } } } + //while(ci) + // refyoung |= gc_push_root(children[--ci], d); } - return; +#ifdef GC_VERIFY + else { + JL_PRINTF(JL_STDOUT, "GC error (probable corruption) :\n"); + jl_(vt); + abort(); + } +#endif - queue_the_root: - if (mark_sp >= mark_stack_size) { - size_t newsz = mark_stack_size>0 ? mark_stack_size*2 : 32000; - mark_stack = (jl_value_t**)realloc(mark_stack,newsz*sizeof(void*)); - if (mark_stack == NULL) exit(1); - mark_stack_size = newsz; + ret: +#ifdef GC_VERIFY + if (verifying) return bits; +#endif + if ((bits == GC_MARKED) && (refyoung == GC_MARKED_NOESC)) { + // v is an old object referencing young objects + arraylist_push(remset, v); } - mark_stack[mark_sp++] = v; + return bits; + +#undef MARK + + queue_the_root: + if(mark_sp >= mark_stack_size) grow_mark_stack(); + mark_stack[mark_sp++] = (jl_value_t*)v; + max_msp = max_msp > mark_sp ? max_msp : mark_sp; + return bits; } -static void visit_mark_stack() +static void visit_mark_stack_inc(int mark_mode) { - while (mark_sp > 0) { - push_root(mark_stack[--mark_sp], 0); + while(mark_sp > 0 && !should_timeout()) { + jl_value_t* v = mark_stack[--mark_sp]; + assert(gc_bits(v) == GC_QUEUED || gc_bits(v) == GC_MARKED || gc_bits(v) == GC_MARKED_NOESC); + push_root(v, 0, gc_bits(v)); } } +static void visit_mark_stack(int mark_mode) +{ + int ct = check_timeout; + check_timeout = 0; + visit_mark_stack_inc(mark_mode); + assert(!mark_sp); + check_timeout = ct; +} + void jl_mark_box_caches(void); extern jl_value_t * volatile jl_task_arg_in_transit; @@ -839,39 +1676,28 @@ extern jl_module_t *jl_old_base_module; extern jl_array_t *typeToTypeId; extern jl_array_t *jl_module_init_order; -static void gc_mark(void) -{ - // mark all roots - - // active tasks - gc_push_root(jl_root_task, 0); - gc_push_root(jl_current_task, 0); +static int inc_count = 0; +static int quick_count = 0; +// mark the initial root set +static void pre_mark(void) +{ // modules gc_push_root(jl_main_module, 0); - gc_push_root(jl_internal_main_module, 0); gc_push_root(jl_current_module, 0); if (jl_old_base_module) gc_push_root(jl_old_base_module, 0); + gc_push_root(jl_internal_main_module, 0); + gc_push_root(jl_root_task, 0); + gc_push_root(jl_current_task, 0); // invisible builtin values if (jl_an_empty_cell) gc_push_root(jl_an_empty_cell, 0); gc_push_root(jl_exception_in_transit, 0); gc_push_root(jl_task_arg_in_transit, 0); - gc_push_root(jl_unprotect_stack_func, 0); - gc_push_root(jl_bottom_func, 0); - gc_push_root(jl_typetype_type, 0); - gc_push_root(jl_tupletype_type, 0); gc_push_root(typeToTypeId, 0); if (jl_module_init_order != NULL) gc_push_root(jl_module_init_order, 0); - // constants - gc_push_root(jl_null, 0); - gc_push_root(jl_true, 0); - gc_push_root(jl_false, 0); - - jl_mark_box_caches(); - size_t i; // stuff randomly preserved @@ -884,31 +1710,219 @@ static void gc_mark(void) gc_push_root(to_finalize.items[i], 0); } - visit_mark_stack(); + jl_mark_box_caches(); + gc_push_root(jl_unprotect_stack_func, 0); + gc_push_root(jl_bottom_func, 0); + gc_push_root(jl_typetype_type, 0); + gc_push_root(jl_tupletype_type, 0); - // find unmarked objects that need to be finalized. - // this must happen last. - for(i=0; i < finalizer_table.size; i+=2) { - if (finalizer_table.table[i+1] != HT_NOTFOUND) { - jl_value_t *v = (jl_value_t*)finalizer_table.table[i]; - if (!gc_marked(v)) { - jl_value_t *fin = (jl_value_t*)finalizer_table.table[i+1]; - if (gc_typeof(fin) == (jl_value_t*)jl_voidpointer_type) { - void *p = ((void**)fin)[1]; - if (p) - ((void (*)(void*))p)(jl_data_ptr(v)); - finalizer_table.table[i+1] = HT_NOTFOUND; - continue; + // constants + gc_push_root(jl_null, 0); + gc_push_root(jl_true, 0); + gc_push_root(jl_false, 0); +} + +static int n_finalized; + +// find unmarked objects that need to be finalized from the finalizer list "list". +// this must happen last in the mark phase. +// if dryrun == 1, it does not schedule any actual finalization and only marks finalizers +static void post_mark(arraylist_t *list, int dryrun) +{ + n_finalized = 0; + for(size_t i=0; i < list->len; i+=2) { + jl_value_t *v = (jl_value_t*)list->items[i]; + jl_value_t *fin = (jl_value_t*)list->items[i+1]; + int isfreed = !gc_marked(v); + int isold = list == &finalizer_list && gc_bits(v) == GC_MARKED; + if (!dryrun && (isfreed || isold)) { + // remove from this list + if (i < list->len - 2) { + list->items[i] = list->items[list->len-2]; + list->items[i+1] = list->items[list->len-1]; + i -= 2; + } + list->len -= 2; + } + if (isfreed) { + // schedule finalizer or execute right away if it is not julia code + if (!dryrun && gc_typeof(fin) == (jl_value_t*)jl_voidpointer_type) { + void *p = jl_unbox_voidpointer(fin); + if (p) + ((void (*)(void*))p)(jl_data_ptr(v)); + continue; + } + gc_push_root(v, 0); + if (!dryrun) schedule_finalization(v, fin); + n_finalized++; + } + if (!dryrun && isold) { + arraylist_push(&finalizer_list_marked, v); + arraylist_push(&finalizer_list_marked, fin); + gc_bits(fin) = GC_QUEUED; + } + gc_push_root(fin, 0); + } + visit_mark_stack(GC_MARKED_NOESC); +} + +/* + How to debug a missing write barrier : + (or rather how I do it, if you know of a better way update this) + First, reproduce it with GC_VERIFY. It does change the allocation profile so if the error + is rare enough this may not be straightforward. If the backtracking goes well you should know + which object and which of its slots was written to without being caught by the write + barrier. Most times this allows you to take a guess. If this type of object is modified + by C code directly, look for missing gc_wb() on pointer updates. Be aware that there are + innocent looking functions which allocate (and thus trigger marking) only on special cases. + + If you cant find it, you can try the following : + - Ensure that should_timeout() is deterministic instead of clock based. + - Once you have a completly deterministic program which crashes on gc_verify, the addresses + should stay constant between different runs (with same binary, same environment ...). + Do not forget to turn off ASLR (linux: echo 0 > /proc/sys/kernel/randomize_va_space). + At this point you should be able to run under gdb and use a hw watch to look for writes + at the exact addr of the slot (use something like watch *slot_addr if *slot_addr == val). + - If it went well you are now stopped at the exact point the problem is happening. + Backtraces in JIT'd code wont work for me (but I'm not sure they should) so in that + case you can try to jl_throw(something) from gdb. + */ +// this does not yet detect missing writes from marked to marked_noesc +// the error is caught at the first long collection +#ifdef GC_VERIFY +static arraylist_t bits_save[4]; + +// set all mark bits to bits +// record the state of the heap and can replay it in restore() +// restore _must_ be called as this will overwrite parts of the +// freelist in pools +static void clear_mark(int bits) +{ + size_t i; + pool_t* pool; + gcval_t* pv; + if (!verifying) { + for(int i = 0; i < 4; i++) + bits_save[i].len = 0; + } + + bigval_t *bigs[] = { big_objects, big_objects_marked }; + for (int i = 0; i < 2; i++) { + bigval_t *v = bigs[i]; + while (v != NULL) { + void* gcv = &v->_data; + if (!verifying) arraylist_push(&bits_save[gc_bits(gcv)], gcv); + gc_bits(gcv) = bits; + v = v->next; + } + } + for (int h = 0; h < HEAP_COUNT; h++) { + region_t* heap = heaps[h]; + if (!heap) break; + for (int pg_i = 0; pg_i < REGION_PG_COUNT/32; pg_i++) { + uint32_t line = heap->freemap[pg_i]; + if (!!~line) { + for (int j = 0; j < 32; j++) { + if (!((line >> j) & 1)) { + gcpage_t *pg = page_metadata(heap->pages[pg_i*32 + j]); + pool_t *pool = &norm_pools[pg->pool_n]; + pv = (gcval_t*)pg->data; + char *lim = (char*)pv + GC_PAGE_SZ - pool->osize; + while ((char*)pv <= lim) { + if (!verifying) arraylist_push(&bits_save[gc_bits(pv)], pv); + gc_bits(pv) = bits; + pv = (gcval_t*)((char*)pv + pool->osize); + } + } } - gc_push_root(v, 0); - schedule_finalization(v); } - gc_push_root(finalizer_table.table[i+1], 0); } } +} + +static void restore(void) +{ + for(int b = 0; b < 4; b++) { + for(int i = 0; i < bits_save[b].len; i++) { + gc_bits(bits_save[b].items[i]) = b; + } + } +} + +static void gc_verify_track() +{ + do { + arraylist_push(&lostval_parents_done, lostval); + JL_PRINTF(JL_STDOUT, "Now looking for 0x%lx =======\n", lostval); + clear_mark(GC_CLEAN); + pre_mark(); + post_mark(&finalizer_list, 1); + post_mark(&finalizer_list_marked, 1); + if (lostval_parents.len == 0) { + JL_PRINTF(JL_STDOUT, "Could not find the missing link. We missed a toplevel root. This is odd.\n"); + break; + } + jl_value_t* lostval_parent = NULL; + for(int i = 0; i < lostval_parents.len; i++) { + lostval_parent = (jl_value_t*)lostval_parents.items[i]; + int clean_len = bits_save[GC_CLEAN].len; + for(int j = 0; j < clean_len + bits_save[GC_QUEUED].len; j++) { + if (bits_save[j >= clean_len ? GC_QUEUED : GC_CLEAN].items[j >= clean_len ? j - clean_len : j] == lostval_parent) { + lostval = lostval_parent; + lostval_parent = NULL; + break; + } + } + if (lostval_parent != NULL) break; + } + if (lostval_parent == NULL) { // all parents of lostval were also scheduled for deletion + lostval = arraylist_pop(&lostval_parents); + } + else { + JL_PRINTF(JL_STDOUT, "Missing write barrier found !\n"); + JL_PRINTF(JL_STDOUT, "0x%lx was written a reference to 0x%lx that was not recorded\n", lostval_parent, lostval); + JL_PRINTF(JL_STDOUT, "(details above)\n"); + lostval = NULL; + } + restore(); + } while(lostval != NULL); +} - visit_mark_stack(); +static void gc_verify(void) +{ + lostval = NULL; + lostval_parents.len = 0; + lostval_parents_done.len = 0; + check_timeout = 0; + clear_mark(GC_CLEAN); + verifying = 1; + pre_mark(); + post_mark(&finalizer_list, 1); + post_mark(&finalizer_list_marked, 1); + int clean_len = bits_save[GC_CLEAN].len; + for(int i = 0; i < clean_len + bits_save[GC_QUEUED].len; i++) { + gcval_t* v = (gcval_t*)bits_save[i >= clean_len ? GC_QUEUED : GC_CLEAN].items[i >= clean_len ? i - clean_len : i]; + if (gc_marked(v)) { + JL_PRINTF(JL_STDOUT, "Error. Early free of 0x%lx type :", (uptrint_t)v); + jl_(jl_typeof(v)); + JL_PRINTF(JL_STDOUT, "val : "); + jl_(v); + JL_PRINTF(JL_STDOUT, "Let's try to backtrack the missing write barrier :\n"); + lostval = v; + break; + } + } + if (lostval == NULL) { + verifying = 0; + restore(); // we did not miss anything + return; + } + restore(); + gc_verify_track(); + abort(); } +#endif + // collector entry point and control @@ -917,8 +1931,10 @@ DLLEXPORT void jl_gc_enable(void) { is_gc_enabled = 1; } DLLEXPORT void jl_gc_disable(void) { is_gc_enabled = 0; } DLLEXPORT int jl_gc_is_enabled(void) { return is_gc_enabled; } -DLLEXPORT int64_t jl_gc_total_bytes(void) { return total_allocd_bytes + allocd_bytes; } +DLLEXPORT int64_t jl_gc_total_bytes(void) { return total_allocd_bytes + allocd_bytes + collect_interval; } DLLEXPORT uint64_t jl_gc_total_hrtime(void) { return total_gc_time; } +DLLEXPORT int64_t jl_gc_num_pause(void) { return n_pause; } +DLLEXPORT int64_t jl_gc_num_full_sweep(void) { return n_full_sweep; } int64_t diff_gc_total_bytes(void) { @@ -929,103 +1945,314 @@ int64_t diff_gc_total_bytes(void) } void sync_gc_total_bytes(void) {last_gc_total_bytes = jl_gc_total_bytes();} -void jl_gc_ephemeral_on(void) { pools = &ephe_pools[0]; } -void jl_gc_ephemeral_off(void) { pools = &norm_pools[0]; } - #if defined(MEMPROFILE) static void all_pool_stats(void); static void big_obj_stats(void); #endif #ifdef OBJPROFILE -static void print_obj_profile(void) +static void reset_obj_profile() { - for(int i=0; i < obj_counts.size; i+=2) { - if (obj_counts.table[i+1] != HT_NOTFOUND) { - jl_printf(JL_STDERR, "%d ", obj_counts.table[i+1]-1); - jl_static_show(JL_STDERR, (jl_value_t*)obj_counts.table[i]); + for(int g=0; g < 3; g++) { + htable_reset(&obj_counts[g], 0); + htable_reset(&obj_sizes[g], 0); + } +} + +static void print_obj_profile(htable_t nums, htable_t sizes) +{ + for(int i=0; i < nums.size; i+=2) { + if (nums.table[i+1] != HT_NOTFOUND) { + void* ty = nums.table[i]; + int num = (int)nums.table[i+1] - 1; + size_t sz = (int)ptrhash_get(&sizes, ty) - 1; + jl_printf(JL_STDERR, " %6d : %4d kB of ", num, sz/1024); + if (ty == BUFFTY) + jl_printf(JL_STDERR, "buffer"); + else if (ty == MATY) + jl_printf(JL_STDERR, "malloc"); + else + jl_static_show(JL_STDERR, (jl_value_t*)ty); jl_printf(JL_STDERR, "\n"); } } } + +void print_obj_profiles(void) +{ + jl_printf(JL_STDERR, "Transient mark :\n"); + print_obj_profile(obj_counts[0], obj_sizes[0]); + jl_printf(JL_STDERR, "Perm mark :\n"); + print_obj_profile(obj_counts[1], obj_sizes[1]); + jl_printf(JL_STDERR, "Remset :\n"); + print_obj_profile(obj_counts[2], obj_sizes[2]); +} #endif -void jl_gc_collect(void) +int saved_mark_sp = 0; +int sweep_mask = GC_MARKED; +#define MIN_SCAN_BYTES 1024*1024 + +static void gc_mark_task_stack(jl_task_t*,int); + +void prepare_sweep(void) { - size_t actual_allocd = allocd_bytes; - total_allocd_bytes += allocd_bytes; - allocd_bytes = 0; - if (is_gc_enabled) { - JL_SIGATOMIC_BEGIN(); - jl_in_gc = 1; - uint64_t t0 = jl_hrtime(); - gc_mark(); -#ifdef GCTIME - JL_PRINTF(JL_STDERR, "mark time %.3f ms\n", (jl_hrtime()-t0)*1.0e6); +} + +#ifdef GC_VERIFY +static void clear_mark(int); #endif -#if defined(MEMPROFILE) - all_pool_stats(); - big_obj_stats(); + + +void jl_gc_collect(int full) +{ + if (!is_gc_enabled) return; + if (jl_in_gc) return; + jl_in_gc = 1; + JL_SIGATOMIC_BEGIN(); + uint64_t t0 = jl_hrtime(); + int recollect = 0; +#if defined(GC_TIME) + int wb_activations = mark_sp - saved_mark_sp; #endif -#ifdef GCTIME - uint64_t t1 = jl_hrtime(); + int64_t last_perm_scanned_bytes = perm_scanned_bytes; + if (!sweeping) { + + inc_count++; + quick_count++; + + scanned_bytes_goal = inc_count*(live_bytes/gc_inc_steps + mark_sp*sizeof(void*)); + scanned_bytes_goal = scanned_bytes_goal < MIN_SCAN_BYTES ? MIN_SCAN_BYTES : scanned_bytes_goal; + if (gc_inc_steps > 1) + check_timeout = 1; + assert(mark_sp == 0); + + // 1. mark every object in the remset + reset_remset(); + // avoid counting remembered objects & bindings twice in perm_scanned_bytes + for(int i = 0; i < last_remset->len; i++) { + uintptr_t item = (uintptr_t)last_remset->items[i]; + void* ptr = (void*)(item & ~(uintptr_t)1); + objprofile_count(jl_typeof(ptr), 2, 0); + gc_bits(ptr) = GC_MARKED; + } + for (int i = 0; i < rem_bindings.len; i++) { + void *ptr = rem_bindings.items[i]; + gc_bits(gc_val_buf(ptr)) = GC_MARKED; + } + + for (int i = 0; i < last_remset->len; i++) { + uintptr_t item = (uintptr_t)last_remset->items[i]; + void* ptr = (void*)(item & ~(uintptr_t)1); + push_root(ptr, 0, gc_bits(ptr)); + } + + // 2. mark every object in a remembered binding + int n_bnd_refyoung = 0; + for (int i = 0; i < rem_bindings.len; i++) { + void *ptr = rem_bindings.items[i]; + if (gc_push_root(((jl_binding_t*)ptr)->value, 0) == GC_MARKED_NOESC) { + rem_bindings.items[n_bnd_refyoung] = ptr; + n_bnd_refyoung++; + } + } + rem_bindings.len = n_bnd_refyoung; + + // 3. walk roots + pre_mark(); + visit_mark_stack(GC_MARKED_NOESC); + + allocd_bytes_since_sweep += allocd_bytes + (int64_t)collect_interval; + +#if defined(GC_TIME) || defined(GC_FINAL_STATS) + uint64_t mark_pause = jl_hrtime() - t0; #endif - sweep_weak_refs(); - gc_sweep(); -#ifdef GCTIME - JL_PRINTF(JL_STDERR, "sweep time %.3f ms\n", (jl_hrtime()-t1)*1.0e6); +#ifdef GC_TIME + JL_PRINTF(JL_STDOUT, "GC mark pause %.2f ms | scanned %ld kB = %ld + %ld | stack %d -> %d (wb %d) | remset %d %d\n", NS2MS(mark_pause), (scanned_bytes + perm_scanned_bytes)/1024, scanned_bytes/1024, perm_scanned_bytes/1024, saved_mark_sp, mark_sp, wb_activations, last_remset->len, allocd_bytes/1024); + saved_mark_sp = mark_sp; #endif - int nfinal = to_finalize.len; - run_finalizers(); - jl_in_gc = 0; - JL_SIGATOMIC_END(); - total_gc_time += (jl_hrtime()-t0); -#if defined(GC_FINAL_STATS) - total_freed_bytes += freed_bytes; +#ifdef GC_FINAL_STATS + total_mark_time += mark_pause; #endif -#ifdef OBJPROFILE - print_obj_profile(); - htable_reset(&obj_counts, 0); + } + #ifdef GC_TIME + int64_t bonus = -1, SAVE = -1, SAVE2 = -1, SAVE3 = -1, pct = -1; + #endif + int64_t estimate_freed = -1; + +#if defined(GC_TIME) || defined(GC_FINAL_STATS) + uint64_t post_time = 0, finalize_time = 0; +#endif + if(mark_sp == 0 || sweeping) { +#if defined(GC_TIME) || defined(GC_FINAL_STATS) + uint64_t sweep_t0 = jl_hrtime(); +#endif + int64_t actual_allocd = allocd_bytes_since_sweep; + if (!sweeping) { + // marking is over +#if defined(GC_TIME) || defined(GC_FINAL_STATS) + post_time = jl_hrtime(); +#endif + // 4. check for objects to finalize + post_mark(&finalizer_list, 0); + if (prev_sweep_mask == GC_MARKED) { + post_mark(&finalizer_list_marked, 0); + } +#if defined(GC_TIME) || defined(GC_FINAL_STATS) + post_time = jl_hrtime() - post_time; +#endif + estimate_freed = live_bytes - scanned_bytes - perm_scanned_bytes + actual_allocd; + +#ifdef GC_VERIFY + gc_verify(); #endif - // tune collect interval based on current live ratio #if defined(MEMPROFILE) - jl_printf(JL_STDERR, "allocd %ld, freed %ld, interval %ld, ratio %.2f\n", - actual_allocd, freed_bytes, collect_interval, - (double)freed_bytes/(double)actual_allocd); + all_pool_stats(); + big_obj_stats(); +#endif +#ifdef OBJPROFILE + print_obj_profiles(); + reset_obj_profile(); #endif - if (freed_bytes < (7*(actual_allocd/10))) { - if (collect_interval <= 2*(max_collect_interval/5)) - collect_interval = 5*(collect_interval/2); + total_allocd_bytes += allocd_bytes_since_sweep; + if (prev_sweep_mask == GC_MARKED_NOESC) + promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes; + // 5. next collection decision + int not_freed_enough = estimate_freed < (7*(actual_allocd/10)); + if ((full || (not_freed_enough && (promoted_bytes >= default_collect_interval || prev_sweep_mask == GC_MARKED))) && n_pause > 1) { + if (prev_sweep_mask != GC_MARKED || full) { + if (full) recollect = 1; // TODO enable this? + } + if (not_freed_enough) { + if (collect_interval < default_collect_interval) + collect_interval = default_collect_interval; + else if (collect_interval <= 2*(max_collect_interval/5)) { + collect_interval = 5*(collect_interval/2); + } + } + sweep_mask = GC_MARKED; + promoted_bytes = 0; + quick_count = 0; + } else { + collect_interval = default_collect_interval/2; + sweep_mask = GC_MARKED_NOESC; + } + if (sweep_mask == GC_MARKED) + perm_scanned_bytes = 0; + scanned_bytes = 0; + // 5. start sweeping + gc_sweep_once(sweep_mask); + sweeping = 1; } - else { - collect_interval = default_collect_interval; + if (gc_sweep_inc(sweep_mask)) { + // sweeping is over + // 6. if it is a quick sweep, put back the remembered objects in queued state + // so that we don't trigger the barrier again on them. + if (sweep_mask == GC_MARKED_NOESC) { + for (int i = 0; i < remset->len; i++) { + gc_bits(((uintptr_t)remset->items[i] & ~(uintptr_t)1)) = GC_QUEUED; + } + for (int i = 0; i < rem_bindings.len; i++) { + void *ptr = rem_bindings.items[i]; + gc_bits(gc_val_buf(ptr)) = GC_QUEUED; + } + } + else { + remset->len = 0; + rem_bindings.len = 0; + n_full_sweep++; + } + + sweep_weak_refs(); + sweeping = 0; + if (sweep_mask == GC_MARKED) { + tasks.len = 0; + } +#ifdef GC_TIME + SAVE2 = freed_bytes; + SAVE3 = allocd_bytes_since_sweep; + pct = actual_allocd ? (freed_bytes*100)/actual_allocd : -1; +#endif + prev_sweep_mask = sweep_mask; + + + allocd_bytes = -(int64_t)collect_interval; + inc_count = 0; + live_bytes += -freed_bytes + allocd_bytes_since_sweep; + allocd_bytes_since_sweep = 0; + freed_bytes = 0; + +#if defined(GC_FINAL_STATS) || defined(GC_TIME) + finalize_time = jl_hrtime(); +#endif + run_finalizers(); +#if defined(GC_FINAL_STATS) || defined(GC_TIME) + finalize_time = jl_hrtime() - finalize_time; +#endif } - freed_bytes = 0; - // if a lot of objects were finalized, re-run GC to finish freeing - // their storage if possible. - if (nfinal > 100000) - jl_gc_collect(); +#if defined(GC_FINAL_STATS) || defined(GC_TIME) + uint64_t sweep_pause = jl_hrtime() - sweep_t0; +#endif +#ifdef GC_FINAL_STATS + total_sweep_time += sweep_pause - finalize_time - post_time; + total_fin_time += finalize_time + post_time; +#endif +#ifdef GC_TIME + JL_PRINTF(JL_STDOUT, "GC sweep pause %.2f ms live %ld kB (freed %d kB EST %d kB [error %d] = %d%% of allocd %d kB b/r %ld/%ld) (%.2f ms in post_mark, %.2f ms in %d fin) (marked in %d inc) mask %d | next in %d kB\n", NS2MS(sweep_pause), live_bytes/1024, SAVE2/1024, estimate_freed/1024, (SAVE2 - estimate_freed), pct, SAVE3/1024, bonus/1024, SAVE/1024, NS2MS(post_time), NS2MS(finalize_time), n_finalized, inc_count, sweep_mask, -allocd_bytes/1024); +#endif } + n_pause++; + uint64_t pause = jl_hrtime() - t0; + total_gc_time += pause; +#ifdef GC_FINAL_STATS + max_pause = max_pause < pause ? pause : max_pause; +#endif + JL_SIGATOMIC_END(); + jl_in_gc = 0; +#ifdef GC_TIME + if (estimate_freed != SAVE2) { + // this should not happen but it does + // mostly because of gc_counted_* allocations + } +#endif + if (recollect) + jl_gc_collect(0); } // allocator entry points void *allocb(size_t sz) { - void *b; + buff_t *b; sz += sizeof(void*); #ifdef MEMDEBUG - b = alloc_big(sz); + b = (buff_t*)alloc_big(sz); + b->pooled = 0; #else if (sz > 2048) { - b = alloc_big(sz); + b = (buff_t*)alloc_big(sz); + b->pooled = 0; } else { - b = pool_alloc(&pools[szclass(sz)]); + b = (buff_t*)pool_alloc(&pools[szclass(sz)]); + b->pooled = 1; } #endif - return (void*)((void**)b + 1); + return &b->data; +} + +void *reallocb(void *b, size_t sz) +{ + buff_t *buff = gc_val_buf(b); + if (buff->pooled) { + void* b2 = allocb(sz); + memcpy(b2, b, page_metadata(buff)->osize); + return b2; + } else { + bigval_t* bv = (bigval_t*)realloc(bigval_header(buff), sz + (BVOFFS + 1)*sizeof(void*)); + return (char*)bv + (BVOFFS + 1)*sizeof(void*); + } } DLLEXPORT void *allocobj(size_t sz) @@ -1033,9 +2260,10 @@ DLLEXPORT void *allocobj(size_t sz) #ifdef MEMDEBUG return alloc_big(sz); #endif - if (sz > 2048) + if (sz <= 2048) + return pool_alloc(&pools[szclass(sz)]); + else return alloc_big(sz); - return pool_alloc(&pools[szclass(sz)]); } DLLEXPORT void *alloc_2w(void) @@ -1044,9 +2272,9 @@ DLLEXPORT void *alloc_2w(void) return alloc_big(2*sizeof(void*)); #endif #ifdef _P64 - return pool_alloc(&pools[2]); + return _pool_alloc(&pools[2], 2*sizeof(void*)); #else - return pool_alloc(&pools[0]); + return _pool_alloc(&pools[0], 2*sizeof(void*)); #endif } @@ -1056,10 +2284,11 @@ DLLEXPORT void *alloc_3w(void) return alloc_big(3*sizeof(void*)); #endif #ifdef _P64 - return pool_alloc(&pools[4]); + return _pool_alloc(&pools[4], 3*sizeof(void*)); #else - return pool_alloc(&pools[1]); + return _pool_alloc(&pools[1], 3*sizeof(void*)); #endif + } DLLEXPORT void *alloc_4w(void) @@ -1068,7 +2297,7 @@ DLLEXPORT void *alloc_4w(void) return alloc_big(4*sizeof(void*)); #endif #ifdef _P64 - return pool_alloc(&pools[6]); + return _pool_alloc(&pools[6], 4*sizeof(void*)); #else return pool_alloc(&pools[2]); #endif @@ -1083,9 +2312,21 @@ void jl_print_gc_stats(JL_STREAM *s) malloc_stats(); double ptime = clock_now()-process_t0; jl_printf(s, "exec time\t%.5f sec\n", ptime); - jl_printf(s, "gc time \t%.5f sec (%2.1f%%)\n", gct, (gct/ptime)*100); + if (n_pause > 0) { + jl_printf(s, "gc time \t%.5f sec (%2.1f%%) in %d (%d full) collections\n", + NS_TO_S(total_gc_time), (NS_TO_S(total_gc_time)/ptime)*100, n_pause, n_full_sweep); + jl_printf(s, "gc pause \t%.2f ms avg\n\t\t%2.0f ms max\n", + NS2MS(total_gc_time)/n_pause, NS2MS(max_pause)); + jl_printf(s, "\t\t(%2d%% mark, %2d%% sweep, %2d%% finalizers)\n", + (total_mark_time*100)/total_gc_time, (total_sweep_time*100)/total_gc_time, + (total_fin_time*100)/total_gc_time); + } + int i = 0; + while (i < HEAP_COUNT && heaps[i]) i++; + jl_printf(s, "max allocated regions : %d\n", i); struct mallinfo mi = mallinfo(); jl_printf(s, "malloc size\t%d MB\n", mi.uordblks/1024/1024); + jl_printf(s, "max page alloc\t%ld MB\n", max_pg_count*GC_PAGE_SZ/1024/1024); jl_printf(s, "total freed\t%llu b\n", total_freed_bytes); jl_printf(s, "free rate\t%.1f MB/sec\n", (total_freed_bytes/gct)/1024/1024); } @@ -1095,34 +2336,40 @@ void jl_print_gc_stats(JL_STREAM *s) void jl_gc_init(void) { - int szc[N_POOLS] = { 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, - 64, 72, 80, 88, 96, //#=18 - - 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, - - 288, 320, 352, 384, 416, 448, 480, 512, - - 640, 768, 896, 1024, - - 1536, 2048 }; + const int* szc = sizeclasses; int i; + for(i=0; i < N_POOLS; i++) { + assert(szc[i] % 4 == 0); norm_pools[i].osize = szc[i]; - norm_pools[i].pages = NULL; norm_pools[i].freelist = NULL; - - ephe_pools[i].osize = szc[i]; - ephe_pools[i].pages = NULL; - ephe_pools[i].freelist = NULL; + norm_pools[i].newpages = NULL; + norm_pools[i].end_offset = ((GC_PAGE_SZ/szc[i]) - 1)*szc[i]; } - htable_new(&finalizer_table, 0); + collect_interval = default_collect_interval; + allocd_bytes = -default_collect_interval; + + arraylist_new(&finalizer_list, 0); arraylist_new(&to_finalize, 0); arraylist_new(&preserved_values, 0); arraylist_new(&weak_refs, 0); +#ifdef GC_VERIFY + for(int i = 0; i < 4; i++) + arraylist_new(&bits_save[i], 0); + arraylist_new(&lostval_parents, 0); + arraylist_new(&lostval_parents_done, 0); +#endif + arraylist_new(&tasks, 0); + arraylist_new(&rem_bindings, 0); + arraylist_new(remset, 0); + arraylist_new(last_remset, 0); #ifdef OBJPROFILE - htable_new(&obj_counts, 0); + for(int g=0; g<3; g++) { + htable_new(&obj_counts[g], 0); + htable_new(&obj_sizes[g], 0); + } #endif #ifdef GC_FINAL_STATS process_t0 = clock_now(); @@ -1139,59 +2386,68 @@ void jl_gc_init(void) // GC summary stats #if defined(MEMPROFILE) -static size_t pool_stats(pool_t *p, size_t *pwaste) +// TODO repair this +static size_t pool_stats(pool_t *p, size_t *pwaste, size_t *np, size_t *pnold) { gcval_t *v; gcpage_t *pg = p->pages; size_t osize = p->osize; - size_t nused=0, nfree=0, npgs=0; + size_t nused=0, nfree=0, npgs=0, nold = 0; while (pg != NULL) { npgs++; - v = (gcval_t*)&pg->data[0]; + v = (gcval_t*)pg->data; char *lim = (char*)v + GC_PAGE_SZ - osize; + int i = 0; while ((char*)v <= lim) { - if (!v->marked) { + if (!gc_marked(v)) { nfree++; } else { nused++; + if (gc_bits(v) == GC_MARKED) { + nold++; + } } v = (gcval_t*)((char*)v + osize); + i++; } - gcpage_t *nextpg = pg->next; + gcpage_t *nextpg = NULL; pg = nextpg; } *pwaste = npgs*GC_PAGE_SZ - (nused*p->osize); - JL_PRINTF(JL_STDOUT, - "%4d : %7d/%7d objects, %5d pages, %8d bytes, %8d waste\n", - p->osize, - nused, - nused+nfree, - npgs, - nused*p->osize, - *pwaste); + *np = npgs; + *pnold = nold; + if (npgs != 0) { + JL_PRINTF(JL_STDOUT, + "%4d : %7d/%7d objects (%3d%% old), %5d pages, %5d kB, %5d kB waste\n", + p->osize, + nused, + nused+nfree, + nused ? (nold*100)/nused : 0, + npgs, + (nused*p->osize)/1024, + *pwaste/1024); + } return nused*p->osize; } static void all_pool_stats(void) { int i; - size_t nb=0, w, tw=0, no=0, b; + size_t nb=0, w, tw=0, no=0,tp=0, nold=0,noldbytes=0, b, np, nol; for(i=0; i < N_POOLS; i++) { - b = pool_stats(&norm_pools[i], &w); + b = pool_stats(&norm_pools[i], &w, &np, &nol); nb += b; no += (b/norm_pools[i].osize); tw += w; - - b = pool_stats(&ephe_pools[i], &w); - nb += b; - no += (b/ephe_pools[i].osize); - tw += w; + tp += np; + nold += nol; + noldbytes += nol*norm_pools[i].osize; } JL_PRINTF(JL_STDOUT, - "%d objects, %d total allocated, %d total fragments\n", - no, nb, tw); + "%d objects (%d%% old), %d kB (%d%% old) total allocated, %d total fragments (%d%% overhead), in %d pages\n", + no, (nold*100)/no, nb/1024, (noldbytes*100)/nb, tw, (tw*100)/nb, tp); } static void big_obj_stats(void) @@ -1199,12 +2455,22 @@ static void big_obj_stats(void) bigval_t *v = big_objects; size_t nused=0, nbytes=0; while (v != NULL) { - if (v->marked) { + if (gc_marked(&v->_data)) { nused++; nbytes += v->sz; } v = v->next; } + v = big_objects_marked; + size_t nused_old=0, nbytes_old=0; + while (v != NULL) { + if (gc_marked(&v->_data)) { + nused_old++; + nbytes_old += v->sz; + } + v = v->next; + } + mallocarray_t *ma = mallocarrays; while (ma != NULL) { if (gc_marked(ma->a)) { @@ -1214,7 +2480,7 @@ static void big_obj_stats(void) ma = ma->next; } - JL_PRINTF(JL_STDOUT, "%d bytes in %d large objects\n", nbytes, nused); + JL_PRINTF(JL_STDOUT, "%d kB (%d%% old) in %d large objects (%d%% old)\n", (nbytes + nbytes_old)/1024, nbytes + nbytes_old ? (nbytes_old*100)/(nbytes + nbytes_old) : 0, nused + nused_old, nused+nused_old ? (nused_old*100)/(nused + nused_old) : 0); } #endif //MEMPROFILE diff --git a/src/gf.c b/src/gf.c index 2317cd905f58c..df7dfb5c80a3f 100644 --- a/src/gf.c +++ b/src/gf.c @@ -156,7 +156,7 @@ jl_methlist_t *mtcache_hash_lookup(jl_array_t *a, jl_value_t *ty, int tparam) return (jl_methlist_t*)JL_NULL; } -static void mtcache_rehash(jl_array_t **pa) +static void mtcache_rehash(jl_array_t **pa, jl_value_t* parent) { size_t len = (*pa)->nrows; jl_value_t **d = (jl_value_t**)(*pa)->data; @@ -173,11 +173,12 @@ static void mtcache_rehash(jl_array_t **pa) nd[uid & (len*2-1)] = (jl_value_t*)ml; } } + gc_wb(parent, n); *pa = n; } static jl_methlist_t **mtcache_hash_bp(jl_array_t **pa, jl_value_t *ty, - int tparam) + int tparam, jl_value_t* parent) { uptrint_t uid; if (jl_is_datatype(ty) && (uid = ((jl_datatype_t*)ty)->uid)) { @@ -191,7 +192,7 @@ static jl_methlist_t **mtcache_hash_bp(jl_array_t **pa, jl_value_t *ty, if (tparam) t = jl_tparam0(t); if (t == ty) return pml; - mtcache_rehash(pa); + mtcache_rehash(pa, parent); } } return NULL; @@ -309,6 +310,7 @@ jl_function_t *jl_instantiate_method(jl_function_t *f, jl_tuple_t *sp) jl_function_t *nf = jl_new_closure(f->fptr, f->env, NULL); JL_GC_PUSH1(&nf); nf->linfo = jl_add_static_parameters(f->linfo, sp); + gc_wb(nf, nf->linfo); JL_GC_POP(); return nf; } @@ -339,12 +341,13 @@ jl_function_t *jl_reinstantiate_method(jl_function_t *f, jl_lambda_info_t *li) static jl_methlist_t *jl_method_list_insert(jl_methlist_t **pml, jl_tuple_t *type, jl_function_t *method, jl_tuple_t *tvars, - int check_amb, int8_t isstaged); + int check_amb, int8_t isstaged, jl_value_t* parent); jl_function_t *jl_method_cache_insert(jl_methtable_t *mt, jl_tuple_t *type, jl_function_t *method) { jl_methlist_t **pml = &mt->cache; + jl_value_t* cache_array = NULL; if (jl_tuple_len(type) > 0) { jl_value_t *t0 = jl_t0(type); uptrint_t uid=0; @@ -356,22 +359,28 @@ jl_function_t *jl_method_cache_insert(jl_methtable_t *mt, jl_tuple_t *type, if (jl_is_datatype(a0)) uid = ((jl_datatype_t*)a0)->uid; if (uid > 0) { - if (mt->cache_targ == JL_NULL) + if (mt->cache_targ == JL_NULL) { mt->cache_targ = jl_alloc_cell_1d(16); - pml = mtcache_hash_bp(&mt->cache_targ, a0, 1); + gc_wb(mt, mt->cache_targ); + } + pml = mtcache_hash_bp(&mt->cache_targ, a0, 1, (jl_value_t*)mt); + cache_array = (jl_value_t*)mt->cache_targ; goto ml_do_insert; } } if (jl_is_datatype(t0)) uid = ((jl_datatype_t*)t0)->uid; if (uid > 0) { - if (mt->cache_arg1 == JL_NULL) + if (mt->cache_arg1 == JL_NULL) { mt->cache_arg1 = jl_alloc_cell_1d(16); - pml = mtcache_hash_bp(&mt->cache_arg1, t0, 0); + gc_wb(mt, mt->cache_arg1); + } + pml = mtcache_hash_bp(&mt->cache_arg1, t0, 0, (jl_value_t*)mt); + cache_array = (jl_value_t*)mt->cache_arg1; } } ml_do_insert: - return jl_method_list_insert(pml, type, method, jl_null, 0, 0)->func; + return jl_method_list_insert(pml, type, method, jl_null, 0, 0, cache_array ? cache_array : (jl_value_t*)mt)->func; } extern jl_function_t *jl_typeinf_func; @@ -406,6 +415,7 @@ void jl_type_infer(jl_lambda_info_t *li, jl_tuple_t *argtypes, #ifdef ENABLE_INFERENCE jl_value_t *newast = jl_apply(jl_typeinf_func, fargs, 4); li->ast = jl_tupleref(newast, 0); + gc_wb(li, li->ast); li->inferred = 1; #endif li->inInference = 0; @@ -868,8 +878,10 @@ static jl_function_t *cache_method(jl_methtable_t *mt, jl_tuple_t *type, if (method->linfo->unspecialized == NULL) { method->linfo->unspecialized = jl_instantiate_method(method, jl_null); + gc_wb(method->linfo, method->linfo->unspecialized); } newmeth->linfo->unspecialized = method->linfo->unspecialized; + gc_wb(newmeth->linfo, newmeth->linfo->unspecialized); } if (newmeth->linfo != NULL && newmeth->linfo->ast != NULL) { @@ -883,6 +895,7 @@ static jl_function_t *cache_method(jl_methtable_t *mt, jl_tuple_t *type, jl_cell_1d_push(spe, (jl_value_t*)newmeth->linfo); } method->linfo->specializations = spe; + gc_wb(method->linfo, method->linfo->specializations); jl_type_infer(newmeth->linfo, type, method->linfo); } JL_GC_POP(); @@ -1200,7 +1213,7 @@ static int has_unions(jl_tuple_t *type) static jl_methlist_t *jl_method_list_insert(jl_methlist_t **pml, jl_tuple_t *type, jl_function_t *method, jl_tuple_t *tvars, - int check_amb, int8_t isstaged) + int check_amb, int8_t isstaged, jl_value_t* parent) { jl_methlist_t *l, **pl; @@ -1225,13 +1238,16 @@ jl_methlist_t *jl_method_list_insert(jl_methlist_t **pml, jl_tuple_t *type, } JL_SIGATOMIC_BEGIN(); l->sig = type; + gc_wb(l, l->sig); l->tvars = tvars; + gc_wb(l, l->tvars); l->va = (jl_tuple_len(type) > 0 && jl_is_vararg_type(jl_tupleref(type,jl_tuple_len(type)-1))) ? 1 : 0; l->isstaged = isstaged; l->invokes = (struct _jl_methtable_t *)JL_NULL; l->func = method; + gc_wb(l, l->func); JL_SIGATOMIC_END(); return l; } @@ -1239,6 +1255,7 @@ jl_methlist_t *jl_method_list_insert(jl_methlist_t **pml, jl_tuple_t *type, } pl = pml; l = *pml; + jl_value_t *pa = parent; while (l != JL_NULL) { if (jl_args_morespecific((jl_value_t*)type, (jl_value_t*)l->sig)) break; @@ -1248,6 +1265,7 @@ jl_methlist_t *jl_method_list_insert(jl_methlist_t **pml, jl_tuple_t *type, anonymous_sym, method->linfo); } pl = &l->next; + pa = (jl_value_t*)l; l = l->next; } jl_methlist_t *newrec = (jl_methlist_t*)allocobj(sizeof(jl_methlist_t)); @@ -1262,34 +1280,47 @@ jl_methlist_t *jl_method_list_insert(jl_methlist_t **pml, jl_tuple_t *type, newrec->invokes = (struct _jl_methtable_t*)JL_NULL; newrec->next = l; JL_SIGATOMIC_BEGIN(); + JL_GC_PUSH1(&newrec); *pl = newrec; + gc_wb(pa, newrec); // if this contains Union types, methods after it might actually be // more specific than it. we need to re-sort them. if (has_unions(type)) { + jl_value_t* item_parent = (jl_value_t*)newrec; + jl_value_t* next_parent = 0; jl_methlist_t *item = newrec->next, *next; jl_methlist_t **pitem = &newrec->next, **pnext; while (item != JL_NULL) { pl = pml; l = *pml; + pa = parent; next = item->next; pnext = &item->next; + next_parent = (jl_value_t*)item; while (l != newrec->next) { if (jl_args_morespecific((jl_value_t*)item->sig, (jl_value_t*)l->sig)) { // reinsert item earlier in the list *pitem = next; + gc_wb(item_parent, next); item->next = l; + gc_wb(item, item->next); *pl = item; + gc_wb(pa, item); pnext = pitem; + next_parent = item_parent; break; } pl = &l->next; + pa = (jl_value_t*)l; l = l->next; } item = next; pitem = pnext; + item_parent = next_parent; } } + JL_GC_POP(); JL_SIGATOMIC_END(); return newrec; } @@ -1316,21 +1347,26 @@ jl_methlist_t *jl_method_table_insert(jl_methtable_t *mt, jl_tuple_t *type, if (jl_tuple_len(tvars) == 1) tvars = (jl_tuple_t*)jl_t0(tvars); JL_SIGATOMIC_BEGIN(); - jl_methlist_t *ml = jl_method_list_insert(&mt->defs,type,method,tvars,1,isstaged); + jl_methlist_t *ml = jl_method_list_insert(&mt->defs,type,method,tvars,1,isstaged,(jl_value_t*)mt); // invalidate cached methods that overlap this definition remove_conflicting(&mt->cache, (jl_value_t*)type); + gc_wb(mt, mt->cache); if (mt->cache_arg1 != JL_NULL) { for(int i=0; i < jl_array_len(mt->cache_arg1); i++) { jl_methlist_t **pl = &((jl_methlist_t**)jl_array_data(mt->cache_arg1))[i]; - if (*pl && *pl != JL_NULL) + if (*pl && *pl != JL_NULL) { remove_conflicting(pl, (jl_value_t*)type); + gc_wb(mt->cache_arg1, jl_cellref(mt->cache_arg1,i)); + } } } if (mt->cache_targ != JL_NULL) { for(int i=0; i < jl_array_len(mt->cache_targ); i++) { jl_methlist_t **pl = &((jl_methlist_t**)jl_array_data(mt->cache_targ))[i]; - if (*pl && *pl != JL_NULL) + if (*pl && *pl != JL_NULL) { remove_conflicting(pl, (jl_value_t*)type); + gc_wb(mt->cache_targ, jl_cellref(mt->cache_targ,i)); + } } } // update max_args @@ -1357,7 +1393,8 @@ void NORETURN jl_no_method_error(jl_function_t *f, jl_value_t **args, size_t na) static jl_tuple_t *arg_type_tuple(jl_value_t **args, size_t nargs) { jl_tuple_t *tt = jl_alloc_tuple(nargs); - JL_GC_PUSH1(&tt); + jl_value_t *a = NULL; + JL_GC_PUSH2(&tt, &a); size_t i; for(i=0; i < nargs; i++) { jl_value_t *ai = args[i]; @@ -1479,6 +1516,7 @@ static void all_p2c(jl_value_t *ast, jl_tuple_t *tvars) if (jl_is_lambda_info(ast)) { jl_lambda_info_t *li = (jl_lambda_info_t*)ast; li->ast = jl_prepare_ast(li, jl_null); + gc_wb(li, li->ast); parameters_to_closureenv(li->ast, tvars); all_p2c(li->ast, tvars); } @@ -1615,6 +1653,7 @@ JL_CALLABLE(jl_apply_generic) jl_lambda_info_t *li = mfunc->linfo; if (li->unspecialized == NULL) { li->unspecialized = jl_instantiate_method(mfunc, li->sparams); + gc_wb(li, li->unspecialized); } mfunc = li->unspecialized; assert(mfunc != jl_bottom_func); @@ -1709,6 +1748,7 @@ jl_value_t *jl_gf_invoke(jl_function_t *gf, jl_tuple_t *types, jl_lambda_info_t *li = mfunc->linfo; if (li->unspecialized == NULL) { li->unspecialized = jl_instantiate_method(mfunc, li->sparams); + gc_wb(li, li->unspecialized); } mfunc = li->unspecialized; } @@ -1721,8 +1761,9 @@ jl_value_t *jl_gf_invoke(jl_function_t *gf, jl_tuple_t *types, if (m->invokes == JL_NULL) { m->invokes = new_method_table(mt->name); + gc_wb(m, m->invokes); // this private method table has just this one definition - jl_method_list_insert(&m->invokes->defs,m->sig,m->func,m->tvars,0,0); + jl_method_list_insert(&m->invokes->defs,m->sig,m->func,m->tvars,0,0,(jl_value_t*)m->invokes); } tt = arg_type_tuple(args, nargs); @@ -1766,6 +1807,7 @@ void jl_initialize_generic_function(jl_function_t *f, jl_sym_t *name) { f->fptr = jl_apply_generic; f->env = (jl_value_t*)new_method_table(name); + gc_wb(f, f->env); } jl_function_t *jl_new_generic_function(jl_sym_t *name) diff --git a/src/interpreter.c b/src/interpreter.c index 7a1f003c90f2c..d2d4e1c725241 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -151,6 +151,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) jl_lambda_info_t *li = (jl_lambda_info_t*)e; if (jl_boot_file_loaded && li->ast && jl_is_expr(li->ast)) { li->ast = jl_compress_ast(li, li->ast); + gc_wb(li, li->ast); } return (jl_value_t*)jl_new_closure(NULL, (jl_value_t*)jl_null, li); } @@ -192,6 +193,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) } for(int i=0; i < na; i++) { ar[i*2+1] = eval(args[i+1], locals, nl); + gc_wb(ex->args, ar[i*2+1]); } if (na != nreq) { jl_error("wrong number of arguments"); @@ -217,7 +219,9 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) size_t i; for (i=0; i < nl; i++) { if (locals[i*2] == sym) { - return (locals[i*2+1] = eval(args[1], locals, nl)); + locals[i*2+1] = eval(args[1], locals, nl); + gc_wb(jl_current_module, locals[i*2+1]); // not sure about jl_current_module + return locals[i*2+1]; } } jl_binding_t *b = jl_get_binding_wr(jl_current_module, (jl_sym_t*)sym); @@ -252,6 +256,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) else if (ex->head == method_sym) { jl_sym_t *fname = (jl_sym_t*)args[0]; jl_value_t **bp=NULL; + jl_value_t *bp_owner=NULL; jl_binding_t *b=NULL; jl_value_t *gf=NULL; int kw=0; @@ -276,6 +281,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) if (bp == NULL) { b = jl_get_binding_for_method_def(jl_current_module, fname); bp = &b->value; + bp_owner = (jl_value_t*)jl_current_module; } } jl_value_t *atypes=NULL, *meth=NULL; @@ -285,7 +291,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) jl_check_static_parameter_conflicts((jl_lambda_info_t*)args[2], (jl_tuple_t*)jl_t1(atypes), fname); } meth = eval(args[2], locals, nl); - jl_method_def(fname, bp, b, (jl_tuple_t*)atypes, (jl_function_t*)meth, args[3], NULL, kw); + jl_method_def(fname, bp, bp_owner, b, (jl_tuple_t*)atypes, (jl_function_t*)meth, args[3], NULL, kw); JL_GC_POP(); return *bp; } @@ -327,6 +333,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) temp = b->value; check_can_assign_type(b); b->value = (jl_value_t*)dt; + gc_wb_binding(((void**)b)-1, dt); super = eval(args[2], locals, nl); jl_set_datatype_super(dt, super); b->value = temp; @@ -384,11 +391,13 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) // temporarily assign so binding is available for field types check_can_assign_type(b); b->value = (jl_value_t*)dt; + gc_wb_binding(b,dt); JL_TRY { // operations that can fail inside_typedef = 1; dt->types = (jl_tuple_t*)eval(args[4], locals, nl); + gc_wb(dt, dt->types); inside_typedef = 0; jl_check_type_tuple(dt->types, dt->name->name, "type definition"); super = eval(args[3], locals, nl); @@ -426,6 +435,7 @@ static jl_value_t *eval(jl_value_t *e, jl_value_t **locals, size_t nl) f->linfo && f->linfo->ast && jl_is_expr(f->linfo->ast)) { jl_lambda_info_t *li = f->linfo; li->ast = jl_compress_ast(li, li->ast); + gc_wb(li, li->ast); li->name = nm; } jl_set_global(jl_current_module, nm, (jl_value_t*)f); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 6aec85e4ae388..5c7071774136b 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -699,7 +699,7 @@ static Value *emit_pointerset(jl_value_t *e, jl_value_t *x, jl_value_t *i, jl_co else val = emit_unboxed(x,ctx); } - typed_store(thePtr, im1, val, ety, ctx, tbaa_user); + typed_store(thePtr, im1, val, ety, ctx, tbaa_user, NULL); } return mark_julia_type(thePtr, aty); } diff --git a/src/jltypes.c b/src/jltypes.c index dc5fe569730db..b2fa6486e9706 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -245,6 +245,7 @@ jl_tuple_t *jl_compute_type_union(jl_tuple_t *types) size_t n = count_union_components(types); jl_value_t **temp; JL_GC_PUSHARGS(temp, n+1); + memset((char*)temp, 0, sizeof(void*)*(n+1)); size_t idx=0; flatten_type_union(types, temp, &idx); assert(idx == n); @@ -1793,6 +1794,7 @@ static void cache_type_(jl_value_t *type) memcpy(nc->data, ((jl_tuple_t*)cache)->data, sizeof(void*)*jl_tuple_len(cache)); cache = (jl_value_t*)nc; ((jl_datatype_t*)type)->name->cache = cache; + gc_wb(((jl_datatype_t*)type)->name, cache); } assert(jl_is_array(cache)); jl_cell_1d_push((jl_array_t*)cache, (jl_value_t*)type); @@ -1804,6 +1806,7 @@ static void cache_type_(jl_value_t *type) memcpy(nc->data, ((jl_tuple_t*)cache)->data, sizeof(void*) * n); jl_tupleset(nc, n, (jl_value_t*)type); ((jl_datatype_t*)type)->name->cache = (jl_value_t*)nc; + gc_wb(((jl_datatype_t*)type)->name, nc); } } @@ -1960,9 +1963,12 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_value_t **env, size_t n, top.prev = stack; stack = ⊤ ndt->name = tn; + gc_wb(ndt, ndt->name); ndt->super = jl_any_type; ndt->parameters = iparams_tuple; + gc_wb(ndt, ndt->parameters); ndt->names = dt->names; + gc_wb(ndt, ndt->names); ndt->types = jl_null; // to be filled in below ndt->mutabl = dt->mutabl; ndt->abstract = dt->abstract; @@ -1971,11 +1977,14 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_value_t **env, size_t n, ndt->struct_decl = NULL; ndt->size = 0; ndt->alignment = 1; + ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env,n,stack, 1); + gc_wb(ndt, ndt->super); ftypes = dt->types; if (ftypes != NULL) { // recursively instantiate the types of the fields ndt->types = (jl_tuple_t*)inst_type_w_((jl_value_t*)ftypes, env, n, stack, 1); + gc_wb(ndt, ndt->types); if (!isabstract) { if (jl_tuple_len(ftypes) == 0) { ndt->size = dt->size; @@ -2044,9 +2053,11 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) env[i*2+1] = env[i*2]; } t->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, env, n, &top, 1); + gc_wb(t, t->super); if (jl_is_datatype(t)) { jl_datatype_t *st = (jl_datatype_t*)t; st->types = (jl_tuple_t*)inst_type_w_((jl_value_t*)st->types, env, n, &top, 1); + gc_wb(st, st->types); } } diff --git a/src/julia.expmap b/src/julia.expmap index 40edbbba6a1dc..57e8f3e7a5c3a 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -33,6 +33,8 @@ uv_*; add_library_mapping; utf8proc_*; + gc_queue_root; + gc_wb_slow; /* freebsd */ environ; diff --git a/src/julia.h b/src/julia.h index c294cf9553e3b..4fea7ca41f661 100644 --- a/src/julia.h +++ b/src/julia.h @@ -92,7 +92,8 @@ typedef struct { size_t length; #endif - unsigned short ndims:11; + unsigned short ndims:10; + unsigned short pooled:1; unsigned short ptrarray:1; // representation is pointer array /* how - allocation style @@ -415,13 +416,52 @@ extern jl_sym_t *fastmath_sym; extern jl_sym_t *simdloop_sym; extern jl_sym_t *meta_sym; extern jl_sym_t *arrow_sym; extern jl_sym_t *ldots_sym; + +// GC write barrier + +DLLEXPORT void gc_queue_root(void *root); +void gc_queue_binding(void *bnd); +void gc_setmark_buf(void *buf, int); +DLLEXPORT void gc_wb_slow(void* parent, void* ptr); + +static inline void gc_wb_binding(void *bnd, void *val) +{ + if (__unlikely((*(uintptr_t*)bnd & 1) == 1 && (*(uintptr_t*)val & 1) == 0)) + gc_queue_binding(bnd); +} + +static inline void gc_wb(void *parent, void *ptr) +{ + if (__unlikely((*((uintptr_t*)parent) & 1) == 1 && + (*((uintptr_t*)ptr) & 1) == 0)) + gc_queue_root(parent); +} + +static inline void gc_wb_buf(void *parent, void *bufptr) +{ + // if parent is marked and buf is not + if (__unlikely((*((uintptr_t*)parent) & 1) == 1)) + // (*((uintptr_t*)bufptr) & 3) != 1)) + gc_setmark_buf(bufptr, *(uintptr_t*)parent & 3); +} + +static inline void gc_wb_back(void *ptr) +{ + // if ptr is marked + if(__unlikely((*((uintptr_t*)ptr) & 1) == 1)) { + gc_queue_root(ptr); + } +} + + // object accessors ----------------------------------------------------------- #ifdef OVERLAP_TUPLE_LEN #define jl_typeof(v) ((jl_value_t*)((uptrint_t)((jl_value_t*)(v))->type & 0x000ffffffffffffeULL)) #else -#define jl_typeof(v) (((jl_value_t*)(v))->type) +#define jl_typeof(v) ((jl_value_t*)((uptrint_t)((jl_value_t*)(v))->type & ((uintptr_t)~3))) #endif + #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t)) #define jl_tuple_len(t) (((jl_tuple_t*)(t))->length) @@ -451,6 +491,7 @@ STATIC_INLINE jl_value_t *jl_tupleset(void *t, size_t i, void *x) { assert(i < jl_tuple_len(t)); jl_tuple_data(t)[i] = (jl_value_t*)x; + if(x) gc_wb(t, x); return (jl_value_t*)x; } STATIC_INLINE jl_value_t *jl_cellref(void *a, size_t i) @@ -462,6 +503,7 @@ STATIC_INLINE jl_value_t *jl_cellset(void *a, size_t i, void *x) { assert(i < jl_array_len(a)); ((jl_value_t**)(jl_array_data(a)))[i] = (jl_value_t*)x; + if(x) gc_wb(a, x); return (jl_value_t*)x; } @@ -469,6 +511,7 @@ STATIC_INLINE jl_value_t *jl_cellset(void *a, size_t i, void *x) # define jl_t1(t) jl_tupleref(t,1) #define jl_exprarg(e,n) (((jl_value_t**)jl_array_data(((jl_expr_t*)(e))->args))[n]) +#define jl_exprargset(e, n, v) jl_cellset(((jl_expr_t*)(e))->args, n, v) #define jl_fieldref(s,i) jl_get_nth_field(((jl_value_t*)s),i) @@ -484,6 +527,7 @@ STATIC_INLINE jl_value_t *jl_cellset(void *a, size_t i, void *x) #define jl_tparam0(t) jl_tupleref(((jl_datatype_t*)(t))->parameters, 0) #define jl_tparam1(t) jl_tupleref(((jl_datatype_t*)(t))->parameters, 1) + #define jl_cell_data(a) ((jl_value_t**)((jl_array_t*)a)->data) #define jl_string_data(s) ((char*)((jl_array_t*)((jl_value_t**)(s))[1])->data) #define jl_iostr_data(s) ((char*)((jl_array_t*)((jl_value_t**)(s))[1])->data) @@ -500,7 +544,6 @@ STATIC_INLINE jl_value_t *jl_cellset(void *a, size_t i, void *x) #define jl_datatype_size(t) (((jl_datatype_t*)t)->size) // basic predicates ----------------------------------------------------------- - #define jl_is_null(v) (((jl_value_t*)(v)) == ((jl_value_t*)jl_null)) #define jl_is_nothing(v) (((jl_value_t*)(v)) == ((jl_value_t*)jl_nothing)) #define jl_is_tuple(v) jl_typeis(v,jl_tuple_type) @@ -693,7 +736,7 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n); jl_function_t *jl_new_generic_function(jl_sym_t *name); void jl_add_method(jl_function_t *gf, jl_tuple_t *types, jl_function_t *meth, jl_tuple_t *tvars, int8_t isstaged); -DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_binding_t *bnd, +DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_value_t *bp_owner, jl_binding_t *bnd, jl_tuple_t *argtypes, jl_function_t *f, jl_value_t *isstaged, jl_value_t *call_func, int iskw); DLLEXPORT jl_value_t *jl_box_bool(int8_t x); @@ -1082,27 +1125,31 @@ DLLEXPORT int64_t jl_gc_total_bytes(void); DLLEXPORT uint64_t jl_gc_total_hrtime(void); int64_t diff_gc_total_bytes(void); void sync_gc_total_bytes(void); -void jl_gc_ephemeral_on(void); -void jl_gc_ephemeral_off(void); -DLLEXPORT void jl_gc_collect(void); + +DLLEXPORT void jl_gc_collect(int); DLLEXPORT void jl_gc_preserve(jl_value_t *v); DLLEXPORT void jl_gc_unpreserve(void); DLLEXPORT int jl_gc_n_preserved_values(void); + DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f); DLLEXPORT void jl_finalize(jl_value_t *o); DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value); void *jl_gc_managed_malloc(size_t sz); -void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned); +void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned, jl_value_t* owner); void jl_gc_free_array(jl_array_t *a); void jl_gc_track_malloced_array(jl_array_t *a); +void jl_gc_count_allocd(size_t sz); void jl_gc_run_all_finalizers(void); DLLEXPORT void *alloc_2w(void); DLLEXPORT void *alloc_3w(void); DLLEXPORT void *alloc_4w(void); void *allocb(size_t sz); +void *reallocb(void*, size_t); DLLEXPORT void *allocobj(size_t sz); DLLEXPORT void jl_clear_malloc_data(void); +DLLEXPORT int64_t jl_gc_num_pause(void); +DLLEXPORT int64_t jl_gc_num_full_sweep(void); #else @@ -1250,7 +1297,6 @@ void jl_longjmp(jmp_buf _Buf,int _Value); for (i__ca=1, jl_eh_restore_state(&__eh); i__ca; i__ca=0) #endif - // I/O system ----------------------------------------------------------------- #define JL_STREAM uv_stream_t diff --git a/src/julia_internal.h b/src/julia_internal.h index 1873bc81d057a..d914d04ca708a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -10,7 +10,17 @@ extern "C" { STATIC_INLINE jl_value_t *newobj(jl_value_t *type, size_t nfields) { - jl_value_t *jv = (jl_value_t*)allocobj((1+nfields) * sizeof(void*)); + jl_value_t *jv = NULL; + switch (nfields) { + case 1: + jv = (jl_value_t*)alloc_2w(); break; + case 2: + jv = (jl_value_t*)alloc_3w(); break; + case 3: + jv = (jl_value_t*)alloc_4w(); break; + default: + jv = (jl_value_t*)allocobj((1+nfields) * sizeof(void*)); + } jv->type = type; return jv; } diff --git a/src/module.c b/src/module.c index 76698b73191d3..8d7070f074ba7 100644 --- a/src/module.c +++ b/src/module.c @@ -17,8 +17,8 @@ jl_module_t *jl_current_module=NULL; jl_module_t *jl_new_module(jl_sym_t *name) { jl_module_t *m = (jl_module_t*)allocobj(sizeof(jl_module_t)); - JL_GC_PUSH1(&m); m->type = (jl_value_t*)jl_module_type; + JL_GC_PUSH1(&m); assert(jl_is_symbol(name)); m->name = name; m->parent = NULL; @@ -40,6 +40,7 @@ DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name) { jl_module_t *m = jl_new_module(name); m->parent = jl_main_module; + gc_wb(m, m->parent); jl_add_standard_imports(m); return (jl_value_t*)m; } @@ -82,6 +83,7 @@ jl_binding_t *jl_get_binding_wr(jl_module_t *m, jl_sym_t *var) b = new_binding(var); b->owner = m; *bp = b; + gc_wb_buf(m, b); return *bp; } @@ -109,6 +111,7 @@ jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var) b = new_binding(var); b->owner = m; *bp = b; + gc_wb_buf(m, b); return *bp; } @@ -225,6 +228,7 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, nb->owner = b->owner; nb->imported = (explici!=0); *bp = nb; + gc_wb_buf(to, nb); } } } @@ -293,6 +297,7 @@ void jl_module_export(jl_module_t *from, jl_sym_t *s) // don't yet know who the owner is b->owner = NULL; *bp = b; + gc_wb_buf(from, b); } assert(*bp != HT_NOTFOUND); (*bp)->exportp = 1; @@ -330,6 +335,7 @@ void jl_set_global(jl_module_t *m, jl_sym_t *var, jl_value_t *val) jl_binding_t *bp = jl_get_binding_wr(m, var); if (!bp->constp) { bp->value = val; + gc_wb(m, val); } } @@ -339,6 +345,7 @@ void jl_set_const(jl_module_t *m, jl_sym_t *var, jl_value_t *val) if (!bp->constp) { bp->value = val; bp->constp = 1; + gc_wb(m, val); } } @@ -361,6 +368,7 @@ DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs) } } b->value = rhs; + gc_wb_binding(((void**)b)-1, rhs); } DLLEXPORT void jl_declare_constant(jl_binding_t *b) diff --git a/src/options.h b/src/options.h index eba5ea1e4f23b..cf98971d18468 100644 --- a/src/options.h +++ b/src/options.h @@ -33,6 +33,10 @@ // catch invalid accesses. //#define MEMDEBUG +// GC_VERIFY force a full verification gc along with every quick gc to ensure no +// reachable memory is freed +//#define GC_VERIFY + // profiling options // GC_FINAL_STATS prints total GC stats at exit @@ -42,7 +46,7 @@ //#define MEMPROFILE // GCTIME prints time taken by each phase of GC -//#define GCTIME +//#define GC_TIME // OBJPROFILE counts objects by type //#define OBJPROFILE diff --git a/src/table.c b/src/table.c index 310b2d90f3334..ff8e86199b226 100644 --- a/src/table.c +++ b/src/table.c @@ -14,9 +14,16 @@ void jl_idtable_rehash(jl_array_t **pa, size_t newsz) size_t i; void **ol = (void**)(*pa)->data; *pa = jl_alloc_cell_1d(newsz); + // we do not check the write barrier here + // because pa always points to a C stack location + // (see eqtable_put) + // it should be changed if this assumption no longer holds for(i=0; i < sz; i+=2) { if (ol[i+1] != NULL) { (*jl_table_lookup_bp(pa, ol[i])) = ol[i+1]; + gc_wb(*pa, ol[i+1]); + // it is however necessary here because allocation + // can (and will) occur in a recursive call inside table_lookup_bp } } } @@ -40,6 +47,7 @@ static void **jl_table_lookup_bp(jl_array_t **pa, void *key) do { if (tab[index+1] == NULL) { tab[index] = key; + gc_wb(a, key); return &tab[index+1]; } @@ -108,6 +116,7 @@ jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val) { void **bp = jl_table_lookup_bp(&h, key); *bp = val; + gc_wb(h, val); return h; } diff --git a/src/task.c b/src/task.c index e46ebc9148756..703e99121d954 100644 --- a/src/task.c +++ b/src/task.c @@ -7,7 +7,6 @@ #include #include #include -//#include #include #include #include "julia.h" @@ -39,16 +38,16 @@ static int _stack_grows_up; static size_t _frame_offset; struct _probe_data { - intptr_t low_bound; /* below probe on stack */ - intptr_t probe_local; /* local to probe on stack */ - intptr_t high_bound; /* above probe on stack */ - intptr_t prior_local; /* value of probe_local from earlier call */ + intptr_t low_bound; /* below probe on stack */ + intptr_t probe_local; /* local to probe on stack */ + intptr_t high_bound; /* above probe on stack */ + intptr_t prior_local; /* value of probe_local from earlier call */ - jl_jmp_buf probe_env; /* saved environment of probe */ - jl_jmp_buf probe_sameAR; /* second environment saved by same call */ - jl_jmp_buf probe_samePC; /* environment saved on previous call */ + jl_jmp_buf probe_env; /* saved environment of probe */ + jl_jmp_buf probe_sameAR; /* second environment saved by same call */ + jl_jmp_buf probe_samePC; /* environment saved on previous call */ - jl_jmp_buf * ref_probe; /* switches between probes */ + jl_jmp_buf * ref_probe; /* switches between probes */ }; static void boundhigh(struct _probe_data *p) @@ -179,6 +178,10 @@ static void NOINLINE save_stack(jl_task_t *t) } t->ssize = nb; memcpy(buf, (char*)&_x, nb); + // this task's stack could have been modified after + // it was marked by an incremental collection + // move the barrier back instead of walking it again here + gc_wb_back(t); } void NOINLINE restore_stack(jl_task_t *t, jl_jmp_buf *where, char *p) @@ -313,6 +316,7 @@ static void ctx_switch(jl_task_t *t, jl_jmp_buf *where) } t->last = jl_current_task; + gc_wb(t, t->last); jl_current_task = t; #ifdef COPY_STACKS @@ -794,6 +798,7 @@ DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, size_t ssize) char *stk = allocb(ssize+pagesz+(pagesz-1)); t->stkbuf = stk; + gc_wb_buf(t, t->stkbuf); stk = (char*)LLT_ALIGN((uptrint_t)stk, pagesz); // add a guard page to detect stack overflow // the GC might read this area, which is ok, just prevent writes diff --git a/src/toplevel.c b/src/toplevel.c index bb15ff437e884..2afa2c092e71b 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -111,6 +111,9 @@ jl_value_t *jl_eval_module_expr(jl_expr_t *ex) jl_module_t *newm = jl_new_module(name); newm->parent = parent_module; b->value = (jl_value_t*)newm; + + gc_wb(parent_module, newm); + if (parent_module == jl_main_module && name == jl_symbol("Base")) { // pick up Base module during bootstrap jl_old_base_module = jl_base_module; @@ -618,6 +621,7 @@ void jl_set_datatype_super(jl_datatype_t *tt, jl_value_t *super) jl_errorf("invalid subtyping in definition of %s",tt->name->name->name); } tt->super = (jl_datatype_t*)super; + gc_wb(tt, tt->super); if (jl_tuple_len(tt->parameters) > 0) { tt->name->cache = (jl_value_t*)jl_null; jl_reinstantiate_inner_types(tt); @@ -648,7 +652,8 @@ static int type_contains(jl_value_t *ty, jl_value_t *x) void print_func_loc(JL_STREAM *s, jl_lambda_info_t *li); -DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_binding_t *bnd, +DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_value_t *bp_owner, + jl_binding_t *bnd, jl_tuple_t *argtypes, jl_function_t *f, jl_value_t *isstaged, jl_value_t *call_func, int iskw) { @@ -690,12 +695,14 @@ DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_binding_ gf = call_func; name = call_sym; // edit args, insert type first - if (!jl_is_expr(f->linfo->ast)) + if (!jl_is_expr(f->linfo->ast)) { f->linfo->ast = jl_uncompress_ast(f->linfo, f->linfo->ast); + gc_wb(f->linfo, f->linfo->ast); + } jl_array_t *al = jl_lam_args((jl_expr_t*)f->linfo->ast); if (jl_array_len(al) == 0) { al = jl_alloc_cell_1d(1); - jl_exprarg(f->linfo->ast, 0) = (jl_value_t*)al; + jl_exprargset(f->linfo->ast, 0, (jl_value_t*)al); } else { jl_array_grow_beg(al, 1); @@ -714,6 +721,7 @@ DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_binding_ } if (iskw) { bp = (jl_value_t**)&((jl_methtable_t*)((jl_function_t*)gf)->env)->kwsorter; + bp_owner = (jl_value_t*)((jl_function_t*)gf)->env; gf = *bp; } } @@ -747,6 +755,7 @@ DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_binding_ if (*bp == NULL) { gf = (jl_value_t*)jl_new_generic_function(name); *bp = gf; + if (bp_owner) gc_wb(bp_owner, gf); } assert(jl_is_function(f)); assert(jl_is_tuple(argtypes)); @@ -757,6 +766,7 @@ DLLEXPORT jl_value_t *jl_method_def(jl_sym_t *name, jl_value_t **bp, jl_binding_ f->linfo && f->linfo->ast && jl_is_expr(f->linfo->ast)) { jl_lambda_info_t *li = f->linfo; li->ast = jl_compress_ast(li, li->ast); + gc_wb(li, li->ast); } JL_GC_POP(); return gf; diff --git a/test/perf/kernel/perf.jl b/test/perf/kernel/perf.jl index 07d60374aa4ad..7267d18f46650 100644 --- a/test/perf/kernel/perf.jl +++ b/test/perf/kernel/perf.jl @@ -217,3 +217,4 @@ logical_y = map(iseven, 1:length(x)) @timeit (for n=1:100 add1!(x,logical_y) end) "add1_logical" "Increment x_i if y_i is true" @timeit (for n=1:100 devec_add1_logical!(x,logical_y) end) "devec_add1_logical" "Devectorized increment x_i if y_i is true" +maxrss("kernel") diff --git a/test/perf/micro/perf.jl b/test/perf/micro/perf.jl index 6c21ebc6628c8..245e4f30668d4 100644 --- a/test/perf/micro/perf.jl +++ b/test/perf/micro/perf.jl @@ -148,3 +148,5 @@ end printfd(1) @timeit printfd(100000) "printfd" "Printing to a file descriptor" end + +maxrss("micro") diff --git a/test/perf/perfcomp.jl b/test/perf/perfcomp.jl index 3a3d1b8c865ce..c66459910a224 100644 --- a/test/perf/perfcomp.jl +++ b/test/perf/perfcomp.jl @@ -11,7 +11,8 @@ end function main() baseline = readperf(open(ARGS[1])) torun = length(ARGS) > 1 ? ARGS[2] : "all" - io,p = readsfrom(`make -s $torun`) + e = haskey(ENV,"J") ? "JULIA_EXECUTABLE=$(ENV["J"])" : "" + io,p = open(`make $e -s $torun`, "r") newp = readperf(io) names = sort(intersect(keys(baseline),keys(newp))) diff --git a/test/perf/perfutil.jl b/test/perf/perfutil.jl index 0cfc8fbf6b1da..3712b38d44aaa 100644 --- a/test/perf/perfutil.jl +++ b/test/perf/perfutil.jl @@ -1,4 +1,5 @@ -const ntrials = 5 +const mintrials = 5 +const mintime = 2000.0 print_output = isempty(ARGS) codespeed = length(ARGS) > 0 && ARGS[1] == "codespeed" @@ -62,13 +63,17 @@ end macro timeit(ex,name,desc,group...) quote - t = zeros(ntrials) - for i=0:ntrials + t = Float64[] + tot = 0.0 + i = 0 + while i < mintrials || tot < mintime e = 1000*(@elapsed $(esc(ex))) + tot += e if i > 0 # warm up on first iteration - t[i] = e + push!(t, e) end + i += 1 end @output_timings t $name $desc $group end @@ -89,6 +94,18 @@ macro timeit_init(ex,init,name,desc,group...) end end +function maxrss(name) + @linux_only begin + rus = Array(Int64, div(144,8)) + fill!(rus, 0x0) + res = ccall(:getrusage, Int32, (Int32, Ptr{Void}), 0, rus) + if res == 0 + mx = rus[5]/1024 + @printf "julia,%s.mem,%f,%f,%f,%f\n" name mx mx mx 0 + end + end +end + # seed rng for more consistent timings srand(1776) diff --git a/test/perf/shootout/k_nucleotide.jl b/test/perf/shootout/k_nucleotide.jl index e4c75121e869e..0037ecdc78f2c 100644 --- a/test/perf/shootout/k_nucleotide.jl +++ b/test/perf/shootout/k_nucleotide.jl @@ -81,7 +81,7 @@ function k_nucleotide(infile="knucleotide-input.txt") arr1 = sorted_array(count(str, 1)) arr2 = sorted_array(count(str, 2)) - + close(input) # print_knucs(arr1) # print_knucs(arr2) # for s in ["GGT", "GGTA", "GGTATT", "GGTATTTTAATT", "GGTATTTTAATTTATAGT"] diff --git a/test/perf/shootout/perf.jl b/test/perf/shootout/perf.jl index 94a9612042a33..c2aabbe728687 100644 --- a/test/perf/shootout/perf.jl +++ b/test/perf/shootout/perf.jl @@ -41,3 +41,5 @@ include("revcomp.jl") include("spectralnorm.jl") @timeit spectralnorm() "spectralnorm" "Eigenvalue using the power method" + +maxrss("shootout") diff --git a/test/perf/shootout/revcomp.jl b/test/perf/shootout/revcomp.jl index 17e1d5ea70713..713750c660711 100644 --- a/test/perf/shootout/revcomp.jl +++ b/test/perf/shootout/revcomp.jl @@ -54,5 +54,6 @@ function revcomp(infile="revcomp-input.txt") append!(buff, [uint8(revcompdata[char(line[i])]) for i=1:l]) end end + close(input) end