From 7090b236fd39136f53b87e38575d636c0659597a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 12 Dec 2019 15:03:13 -0500 Subject: [PATCH] simplify interpreter-stacktraces code (#34019) The unwinder and debuggers thought that our assembly code here was not quite correct. Rather than attempt to fix that, let the compiler generate it so we don't need to maintain it anymore. This was previously also not particularly optimal for an interpreter to need a couple extra function calls (by indirect pointer too) to setup the call frame, so now we avoid that. This simplifies the design by adding a new flag bit to the existing pgcstack frames. In the future, we may end up generalizing this support to handle stack allocation of arbitrary objects, but for now we implement just enough support for our current needs. It's unclear why dbghelp StackWalk glitches here a couple times (it is reporting the stack pointer instead of the instruction pointer as the return address), but this design is robust against that now (even though I've manually verified that that particular glitch still happens with this patch). fix #33877 --- src/Makefile | 2 +- src/builtins.c | 4 +- src/gc.c | 6 +- src/interpreter-stacktrace.c | 442 ------------------------------ src/interpreter.c | 206 ++++++++------ src/julia.h | 43 +-- src/julia_internal.h | 11 +- src/llvm-final-gc-lowering.cpp | 2 +- src/signal-handling.c | 2 +- src/signals-mach.c | 8 +- src/signals-unix.c | 6 +- src/signals-win.c | 6 +- src/stackwalk.c | 88 +++--- test/llvmpasses/final-lower-gc.ll | 2 +- 14 files changed, 218 insertions(+), 610 deletions(-) delete mode 100644 src/interpreter-stacktrace.c diff --git a/src/Makefile b/src/Makefile index 7a9dd723351ac..9e5c42318c620 100644 --- a/src/Makefile +++ b/src/Makefile @@ -222,7 +222,7 @@ $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h -$(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/interpreter-stacktrace.c $(SRCDIR)/builtin_proto.h +$(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h diff --git a/src/builtins.c b/src/builtins.c index 2deab966aca08..328be6aefd543 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -629,7 +629,9 @@ static jl_value_t *do_apply(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl } if (arg_heap) { // optimization: keep only the first root, free the others - ((void**)roots)[-2] = (void*)(((size_t)1) << 1); +#ifndef __clang_analyzer__ + ((void**)roots)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(1); +#endif } jl_value_t *result = jl_apply(newargs, n); JL_GC_POP(); diff --git a/src/gc.c b/src/gc.c index 8abc328dbf0c5..e101a5fa8d76d 100644 --- a/src/gc.c +++ b/src/gc.c @@ -341,7 +341,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o, static void jl_gc_push_arraylist(jl_ptls_t ptls, arraylist_t *list) { void **items = list->items; - items[0] = (void*)(((uintptr_t)list->len - 2) << 1); + items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2); items[1] = ptls->pgcstack; ptls->pgcstack = (jl_gcframe_t*)items; } @@ -2045,7 +2045,7 @@ stack: { uintptr_t offset = stack->offset; uintptr_t lb = stack->lb; uintptr_t ub = stack->ub; - uint32_t nr = nroots >> 1; + uint32_t nr = nroots >> 2; uintptr_t nptr = 0; while (1) { jl_value_t ***rts = (jl_value_t***)(((void**)s) + 2); @@ -2087,7 +2087,7 @@ stack: { uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub); assert(new_nroots <= UINT32_MAX); nroots = stack->nroots = (uint32_t)new_nroots; - nr = nroots >> 1; + nr = nroots >> 2; continue; } goto pop; diff --git a/src/interpreter-stacktrace.c b/src/interpreter-stacktrace.c deleted file mode 100644 index dacccb5e12e77..0000000000000 --- a/src/interpreter-stacktrace.c +++ /dev/null @@ -1,442 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -// #include'd from interpreter.c - -// Backtrace support -#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_) || defined(_OS_WINDOWS_) -extern uintptr_t __start_jl_interpreter_frame_val; -uintptr_t __start_jl_interpreter_frame = (uintptr_t)&__start_jl_interpreter_frame_val; -extern uintptr_t __stop_jl_interpreter_frame_val; -uintptr_t __stop_jl_interpreter_frame = (uintptr_t)&__stop_jl_interpreter_frame_val; - -#define SECT_INTERP JL_SECTION("jl_interpreter_frame_val") -#if defined(_CPU_X86_) && defined(_OS_WINDOWS_) -#define MANGLE(x) "@" x "@8" -#else -#define MANGLE(x) x -#endif - -#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_) -#if defined(_CPU_ARM_) -# define ASM_FUNCTION_TYPE "%function" -#else -# define ASM_FUNCTION_TYPE "@function" -#endif -#define ASM_ENTRY \ - ".text\n" \ - ".p2align 4,0x90\n" \ - ".global enter_interpreter_frame\n" \ - ".type enter_interpreter_frame," ASM_FUNCTION_TYPE "\n" -#if defined(_OS_LINUX_) -#define ASM_END ".previous\n" -#else -#define ASM_END -#endif -#else -#define ASM_ENTRY \ - ".text\n" \ - ".globl enter_interpreter_frame\n" -#define ASM_END -#endif - -#elif defined(_OS_DARWIN_) -extern uintptr_t __start_jl_interpreter_frame_val __asm("section$start$__TEXT$__jif"); -uintptr_t __start_jl_interpreter_frame = (uintptr_t)&__start_jl_interpreter_frame_val; -extern uintptr_t __stop_jl_interpreter_frame_val __asm("section$end$__TEXT$__jif"); -uintptr_t __stop_jl_interpreter_frame = (uintptr_t)&__stop_jl_interpreter_frame_val; - -#define SECT_INTERP JL_SECTION("__TEXT,__jif") - -#define MANGLE(x) "_" x -#define ASM_ENTRY \ - ".section __TEXT,__text,regular,pure_instructions\n" \ - ".globl _enter_interpreter_frame\n" -#define ASM_END ".previous" - -#else -#define SECT_INTERP -#define NO_INTERP_BT -#warning "Interpreter backtraces not implemented for this platform" -#endif - -#define STR(x) #x -#define XSTR(x) STR(x) - -// This function is special. The unwinder looks for this function to find interpreter -// stack frames. -#ifdef _CPU_X86_64_ - -// Instructions: Make sure that MAX_INTERP_STATE_SIZE is a multiple of -// alignof(struct interpreter_state) and larger than -// sizeof(struct interpreter_state). Additionally, make sure that -// MAX_INTERP_STATE_SIZE+STACK_PADDING+8 is a multiple of 16 to -// ensure the proper stack alignment. -#define MAX_INTERP_STATE_SIZE 72 -#define STACK_PADDING 0 - -static_assert(sizeof(interpreter_state) <= MAX_INTERP_STATE_SIZE, "Stack layout invariants violated."); -static_assert(MAX_INTERP_STATE_SIZE % alignof(interpreter_state) == 0, "Stack layout invariants violated"); -static_assert(((MAX_INTERP_STATE_SIZE + STACK_PADDING + 8) % 16) == 0, "Stack layout invariants violated"); - -#ifdef _OS_WINDOWS_ -size_t TOTAL_STACK_PADDING = STACK_PADDING + 32; -#else -size_t TOTAL_STACK_PADDING = STACK_PADDING; -#endif - -asm( - ASM_ENTRY - MANGLE("enter_interpreter_frame") ":\n" - ".cfi_startproc\n" - "\tsubq $" XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING)", %rsp\n" - ".cfi_def_cfa_offset " XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING)" + 8\n" -#ifdef _OS_WINDOWS_ -#define ARG1_REG "rcx" -#else -#define ARG1_REG "rdi" -#endif - "\tmovq %" ARG1_REG ", %rax\n" - "\tleaq " XSTR(STACK_PADDING) "(%rsp), %" ARG1_REG "\n" - // Zero out the src and mi fields - "\tmovq $0, 0(%" ARG1_REG ")\n" - "\tmovq $0, 8(%" ARG1_REG ")\n" -#ifdef _OS_WINDOWS_ - // Make space for the register parameter area - "\tsubq $32, %rsp\n" -#endif - // The L here conviences the OS X linker not to terminate the unwind info early - "Lenter_interpreter_frame_start_val:\n" - "\tcallq *%rax\n" - "Lenter_interpreter_frame_end_val:\n" -#ifdef _OS_WINDOWS_ - "\taddq $32, %rsp\n" -#endif - "\taddq $" XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING)", %rsp\n" -#ifndef _OS_DARWIN_ - // Somehow this throws off compact unwind info on OS X - ".cfi_def_cfa_offset 8\n" -#endif - "\tretq\n" - ".cfi_endproc\n" - ASM_END - ); - -#define CALLBACK_ABI - -#elif defined(_CPU_X86_) - -#define MAX_INTERP_STATE_SIZE 36 -#ifdef _OS_WINDOWS_ -#define STACK_PADDING 4 -#else -#define STACK_PADDING 8 -#endif - -size_t TOTAL_STACK_PADDING = STACK_PADDING; - -static_assert(sizeof(interpreter_state) <= MAX_INTERP_STATE_SIZE, "Stack layout invariants violated"); -static_assert(MAX_INTERP_STATE_SIZE % alignof(interpreter_state) == 0, "Stack layout invariants violated"); -#ifndef _OS_WINDOWS_ -static_assert((MAX_INTERP_STATE_SIZE + STACK_PADDING + 4) % 16 == 0, "Stack layout invariants violated"); -#endif - -asm( - ASM_ENTRY - MANGLE("enter_interpreter_frame") ":\n" - ".cfi_startproc\n" -#ifdef _OS_WINDOWS_ -/* - * On win32, we set -mincoming-stack-boundary=2. This causes GCC to emit stack - * realignment gadgets into the prologue of every function. Unfortunately for - * us there are two different kinds of such gadgets and since we don't know - * which one the target function is going to use, we can't use the same trick - * as everywhere else. From https://gcc.gnu.org/ml/gcc/2007-12/msg00503.html, - * the two prologues are: - * - * pushl %ebp - * movl %esp, %ebp - * andl $-16, %esp - * - * and - * - * pushl %edi // Save callee save reg edi - * leal 8(%esp), %edi // Save address of parameter frame - * andl $-16, %esp // Align local stack - * pushl $4(%edi) // save return address - * pushl %ebp // save old ebp - * movl %esp, %ebp // point ebp to pseudo frame - * - * From the perspective of the unwinder, the first case looks like a regular - * (without the realignment gadget) stack frame. However, for the second one, - * the compiler deliberately constructs a "fake stack frame" that has an - * incorrect stack address for the previous frame. To work around all of this, - * use ebp based addressing on win32 - */ -#define FP_CAPTURE_OFFSET MAX_INTERP_STATE_SIZE -#define ENTRY_OFFSET 8 - "\tpushl %ebp\n" - ".cfi_def_cfa_offset " XSTR(ENTRY_OFFSET)"\n" - "\tmovl %esp, %ebp\n" -#else -#define ENTRY_OFFSET 4 -#endif - "\tsubl $" XSTR(MAX_INTERP_STATE_SIZE) ", %esp\n" - ".cfi_def_cfa_offset " XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(ENTRY_OFFSET) "\n" - "\tmovl %ecx, %eax\n" - "\tmovl %esp, %ecx\n" - // Zero out the src and mi fields - "\tmovl $0, (%esp)\n" - "\tmovl $0, 4(%esp)\n" - // Restore 16 byte stack alignment - // Technically not necessary on windows, because we don't assume this - // alignment, but let's be nice if we ever start doing that. - "\tsubl $" XSTR(STACK_PADDING) ", %esp\n" - ".cfi_def_cfa_offset " XSTR(STACK_PADDING) " + " XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(ENTRY_OFFSET) "\n" - "Lenter_interpreter_frame_start_val:\n" - "\tcalll *%eax\n" - "Lenter_interpreter_frame_end_val:\n" - "\taddl $" XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING) ", %esp\n" -#ifdef _OS_WINDOWS_ - ".cfi_def_cfa_offset 8\n" - "\tpopl %ebp\n" -#endif - ".cfi_def_cfa_offset 4\n" - "\tret\n" - ".cfi_endproc\n" - ASM_END - ); - -#define CALLBACK_ABI __attribute__((fastcall)) -static_assert(sizeof(interpreter_state) <= MAX_INTERP_STATE_SIZE, "Update assembly code above"); - -#elif defined(_CPU_AARCH64_) - -#define MAX_INTERP_STATE_SIZE 64 -#define STACK_PADDING 16 - -// Check that the interpreter state can fit -static_assert(sizeof(interpreter_state) <= MAX_INTERP_STATE_SIZE, - "Stack layout invariants violated."); -// Check that the alignment of the type is satisfied -// (16 is stack alignment at function boundary) -static_assert(alignof(interpreter_state) <= 16, "Stack layout invariants violated"); -static_assert(STACK_PADDING % alignof(interpreter_state) == 0, - "Stack layout invariants violated"); -// Check that ABI stack alignment requirement is maintained. -static_assert(((MAX_INTERP_STATE_SIZE + STACK_PADDING) % 16) == 0, - "Stack layout invariants violated"); -// Check that the padding is large enough for lr. -static_assert(STACK_PADDING >= sizeof(void*), "Stack layout invariants violated"); - -size_t TOTAL_STACK_PADDING = STACK_PADDING; - -asm( - ASM_ENTRY - MANGLE("enter_interpreter_frame") ":\n" - ".cfi_startproc\n" - // Save lr - "\tstr x30, [sp, #-(" XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING) ")]!\n" - "\t.cfi_def_cfa_offset (" XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING) ")\n" - "\t.cfi_offset 30, -(" XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING) ")\n" - "\tmov x2, x0\n" - // Zero out the src and mi fields - "\tstp xzr, xzr, [sp, " XSTR(STACK_PADDING) "]\n" - "\tadd x0, sp, " XSTR(STACK_PADDING) "\n" - "Lenter_interpreter_frame_start_val:\n" - "\tblr x2\n" - "Lenter_interpreter_frame_end_val:\n" - "\tldr x30, [sp], (" XSTR(MAX_INTERP_STATE_SIZE) " + " XSTR(STACK_PADDING) ")\n" - "\t.cfi_restore 30\n" - "\t.cfi_def_cfa_offset 0\n" - "\tret\n" - ".cfi_endproc\n" - ASM_END - ); - -#define CALLBACK_ABI - -#elif defined(_CPU_ARM_) - -#define MAX_INTERP_STATE_SIZE 48 - -// Check that the interpreter state can fit -static_assert(sizeof(interpreter_state) <= MAX_INTERP_STATE_SIZE, - "Stack layout invariants violated."); -// Check that the alignment of the type is satisfied -// (16 is what we realign the stack to) -static_assert(alignof(interpreter_state) <= 16, "Stack layout invariants violated"); - -size_t TOTAL_STACK_PADDING = 0; - -asm( - ASM_ENTRY - MANGLE("enter_interpreter_frame") ":\n" - ".fnstart\n" - "\tpush {fp, lr}\n" - "\t.save {fp, lr}\n" - "\t.setfp fp, sp, #4\n" - "\tadd fp, sp, #4\n" - "\tmov r2, r0\n" - // Reserve enough space and realign stack to 16bytes - // The realignment is for consistency with every other architectures. - // It isn't strictly necessary since we currently do not rely on it. - "\tsub sp, sp, #" XSTR(MAX_INTERP_STATE_SIZE) "\n" - "\tbic sp, sp, #15\n" - // Zero out the src and mi field - "\tmov ip, #0\n" - "\tstr ip, [sp]\n" - "\tstr ip, [sp, #4]\n" - "\tmov r0, sp\n" - "Lenter_interpreter_frame_start_val:\n" - "\tblx r2\n" - "Lenter_interpreter_frame_end_val:\n" - "\tsub sp, fp, #4\n" - "\tpop {fp, pc}\n" - "\t.fnend\n" - ASM_END - ); - -#define CALLBACK_ABI - -#elif defined(_CPU_PPC64_) -/** - * Implementation notes: - * - * This needs to follow the PPC ELFv2 ABI. Which means that there is a localentry - * and a global entry. The local entry expects r2/TOC to be set correctly, while - * the global entry expects r12 to be set to the function address, and from there - * restores r2/TOC. The function pointer we are getting passed point to the global - * entry and thus we need to set r12 correctly. - * - * - LR is stored in the caller - * - r1/SP is a back-chain that needs to be atomically updated - */ - -#define MAX_INTERP_STATE_SIZE 64 -#define MIN_STACK 32 -#define STACK_PADDING 0 -#define STACK_SIZE (MIN_STACK + MAX_INTERP_STATE_SIZE + STACK_PADDING) - -size_t TOTAL_STACK_PADDING = MIN_STACK; - -// Check that the interpreter state can fit -static_assert(sizeof(interpreter_state) <= MAX_INTERP_STATE_SIZE, - "Stack layout invariants violated."); -// Check that the alignment of the type is satisfied -static_assert(alignof(interpreter_state) <= 16, "Stack layout invariants violated"); -// Check that ABI stack alignment requirement is maintained. -static_assert(STACK_SIZE % 16 == 0, "Stack layout invariants violated"); -static_assert(MIN_STACK % 16 == 0, "Stack layout invariants violated"); - -asm( - ASM_ENTRY - MANGLE("enter_interpreter_frame") ":\n" - "\taddis 2, 12, .TOC.-enter_interpreter_frame@ha\n" - "\taddi 2, 2, .TOC.-enter_interpreter_frame@l\n" - "\t.localentry enter_interpreter_frame, .-enter_interpreter_frame\n" - ".cfi_startproc\n" - // store LR - "\tmflr 0\n" - "\tstd 0, 16(1)\n" - ".cfi_offset lr, 16\n" - // set up stack frame - "\tstdu 1, -" XSTR(STACK_SIZE) "(1)\n" - ".cfi_adjust_cfa_offset " XSTR(STACK_SIZE) "\n" - "\tmtctr 3\n" // move arg1 (func pointer) to ctr - "\tmr 12, 3\n" // move func pointer to r12 if we jump to global entry point - "\tcal 3, " XSTR(MIN_STACK) "(1)\n" // move pointer to INTERP_STATE to arg1 - // zero out src and mi field - "\tli 6, 0\n" - "\tstd 6, 0(3)\n" - "\tstd 6, 8(3)\n" - // store TOC - "\tstd 2, 24(1)\n" - "Lenter_interpreter_frame_start_val:\n" - "\tbctrl\n" - "Lenter_interpreter_frame_end_val:\n" - // restore TOC - "\tld 2, 24(1)\n" - // restore stack frame - "\tld 1, 0(1)\n" - // restore LR - "\tld 0, 16(1)\n" - "\tmtlr 0\n" - ".cfi_same_value lr\n" - "\tblr\n" - ".cfi_endproc\n" - ASM_END - ); - -#define CALLBACK_ABI - -#else -#warning "Interpreter backtraces not implemented for this platform" -#define NO_INTERP_BT -#endif - -#ifndef NO_INTERP_BT -extern uintptr_t enter_interpreter_frame_start_val asm("Lenter_interpreter_frame_start_val"); -extern uintptr_t enter_interpreter_frame_end_val asm("Lenter_interpreter_frame_end_val"); -uintptr_t enter_interpreter_frame_start = (uintptr_t)&enter_interpreter_frame_start_val; -uintptr_t enter_interpreter_frame_end = (uintptr_t)&enter_interpreter_frame_end_val; - -JL_DLLEXPORT int jl_is_interpreter_frame(uintptr_t ip) -{ - return __start_jl_interpreter_frame <= ip && ip <= __stop_jl_interpreter_frame; -} - -JL_DLLEXPORT int jl_is_enter_interpreter_frame(uintptr_t ip) -{ - return enter_interpreter_frame_start <= ip && ip <= enter_interpreter_frame_end; -} - -JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_entry, uintptr_t sp, - uintptr_t fp, size_t space_remaining) -{ -#ifdef FP_CAPTURE_OFFSET - interpreter_state *s = (interpreter_state *)(fp-FP_CAPTURE_OFFSET); -#else - interpreter_state *s = (interpreter_state *)(sp+TOTAL_STACK_PADDING); -#endif - int need_module = !s->mi; - int required_space = need_module ? 4 : 3; - if (space_remaining < required_space) - return 0; // Should not happen - size_t njlvalues = need_module ? 2 : 1; - uintptr_t entry_tags = jl_bt_entry_descriptor(njlvalues, 0, JL_BT_INTERP_FRAME_TAG, s->ip); - bt_entry[0].uintptr = JL_BT_NON_PTR_ENTRY; - bt_entry[1].uintptr = entry_tags; - bt_entry[2].jlvalue = s->mi ? (jl_value_t*)s->mi : - s->src ? (jl_value_t*)s->src : (jl_value_t*)jl_nothing; - if (need_module) { - // If we only have a CodeInfo (s->src), we are in a top level thunk and - // need to record the module separately. - bt_entry[3].jlvalue = (jl_value_t*)s->module; - } - return required_space; -} - -extern void * CALLBACK_ABI enter_interpreter_frame(void * CALLBACK_ABI (*callback)(interpreter_state *, void *), void *arg); -#else -JL_DLLEXPORT int jl_is_interpreter_frame(uintptr_t ip) -{ - return 0; -} - -JL_DLLEXPORT int jl_is_enter_interpreter_frame(uintptr_t ip) -{ - return 0; -} - -JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_entry, uintptr_t sp, - uintptr_t fp, size_t space_remaining) -{ - // Leave bt_entry[0] as the native instruction ptr - return 0; -} -#define CALLBACK_ABI -void *NOINLINE enter_interpreter_frame(void *(*callback)(interpreter_state *, void *), void *arg) { - interpreter_state state = {}; - return callback(&state, arg); -} -#endif diff --git a/src/interpreter.c b/src/interpreter.c index 396b590beed2b..177a2a1d17067 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -25,7 +25,48 @@ typedef struct { int continue_at; // statement index to jump to after leaving exception handler (0 if none) } interpreter_state; -#include "interpreter-stacktrace.c" + +// general alloca rules are incompatible on C and C++, so define a macro that deals with the difference +#ifdef __cplusplus +#define JL_CPPALLOCA(var,n) \ + var = (decltype(var))alloca((n)) +#else +#define JL_CPPALLOCA(var,n) \ + JL_GCC_IGNORE_START("-Wc++-compat") \ + var = alloca((n)); \ + JL_GCC_IGNORE_STOP +#endif + +#ifdef __clang_analyzer__ + +extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT; + +// This is necessary, because otherwise the analyzer considers this undefined +// behavior and terminates the exploration +#define JL_GC_PUSHFRAME(frame,n) \ + JL_CPPALLOCA(frame, sizeof(*frame)+((n) * sizeof(jl_value_t*))); \ + memset(&frame[1], 0, sizeof(void*) * n); \ + _JL_GC_PUSHARGS((jl_value_t**)&frame[1], n); + +#else + +#define JL_GC_ENCODE_PUSHFRAME(n) ((((size_t)(n))<<2)|2) + +#define JL_GC_PUSHFRAME(frame,n) \ + JL_CPPALLOCA(frame, sizeof(*frame)+(((n)+3)*sizeof(jl_value_t*))); \ + ((void**)&frame[1])[0] = NULL; \ + ((void**)&frame[1])[1] = (void*)JL_GC_ENCODE_PUSHFRAME(n); \ + ((void**)&frame[1])[2] = jl_pgcstack; \ + memset(&((void**)&frame[1])[3], 0, (n)*sizeof(jl_value_t*)); \ + jl_pgcstack = (jl_gcframe_t*)&(((void**)&frame[1])[1]) + +// we define this separately so that we can populate the frame before we add it to the backtrace +// it's recommended to mark the containing function with NOINLINE, though not essential +#define JL_GC_ENABLEFRAME(frame) \ + ((void**)&frame[1])[0] = __builtin_frame_address(0); + +#endif + static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s); static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int toplevel); @@ -37,7 +78,7 @@ int jl_is_toplevel_only_expr(jl_value_t *e); extern int inside_typedef; // this is a heuristic for allowing "redefining" a type to something identical -SECT_INTERP static int equiv_type(jl_datatype_t *dta, jl_datatype_t *dtb) +static int equiv_type(jl_datatype_t *dta, jl_datatype_t *dtb) { if (!(jl_typeof(dta) == jl_typeof(dtb) && dta->name->name == dtb->name->name && @@ -99,7 +140,7 @@ SECT_INTERP static int equiv_type(jl_datatype_t *dta, jl_datatype_t *dtb) return 0; } -SECT_INTERP static void check_can_assign_type(jl_binding_t *b, jl_value_t *rhs) +static void check_can_assign_type(jl_binding_t *b, jl_value_t *rhs) { if (b->constp && b->value != NULL && jl_typeof(b->value) != jl_typeof(rhs)) jl_errorf("invalid redefinition of constant %s", @@ -109,7 +150,7 @@ SECT_INTERP static void check_can_assign_type(jl_binding_t *b, jl_value_t *rhs) void jl_reinstantiate_inner_types(jl_datatype_t *t); void jl_reset_instantiate_inner_types(jl_datatype_t *t); -SECT_INTERP void jl_set_datatype_super(jl_datatype_t *tt, jl_value_t *super) +void jl_set_datatype_super(jl_datatype_t *tt, jl_value_t *super) { if (!jl_is_datatype(super) || !jl_is_abstracttype(super) || tt->name == ((jl_datatype_t*)super)->name || @@ -317,7 +358,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s) // expression evaluator -SECT_INTERP static jl_value_t *do_call(jl_value_t **args, size_t nargs, interpreter_state *s) +static jl_value_t *do_call(jl_value_t **args, size_t nargs, interpreter_state *s) { jl_value_t **argv; assert(nargs >= 1); @@ -330,7 +371,7 @@ SECT_INTERP static jl_value_t *do_call(jl_value_t **args, size_t nargs, interpre return result; } -SECT_INTERP static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state *s) +static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state *s) { jl_value_t **argv; assert(nargs >= 2); @@ -345,7 +386,7 @@ SECT_INTERP static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interp return result; } -SECT_INTERP jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e) +jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e) { jl_value_t *v = jl_get_global(m, e); if (v == NULL) @@ -353,23 +394,23 @@ SECT_INTERP jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e) return v; } -SECT_INTERP static int jl_source_nslots(jl_code_info_t *src) JL_NOTSAFEPOINT +static int jl_source_nslots(jl_code_info_t *src) JL_NOTSAFEPOINT { return jl_array_len(src->slotflags); } -SECT_INTERP static int jl_source_nssavalues(jl_code_info_t *src) JL_NOTSAFEPOINT +static int jl_source_nssavalues(jl_code_info_t *src) JL_NOTSAFEPOINT { return jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes); } -SECT_INTERP static void eval_stmt_value(jl_value_t *stmt, interpreter_state *s) +static void eval_stmt_value(jl_value_t *stmt, interpreter_state *s) { jl_value_t *res = eval_value(stmt, s); s->locals[jl_source_nslots(s->src) + s->ip] = res; } -SECT_INTERP static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s) +static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s) { jl_code_info_t *src = s->src; if (jl_is_ssavalue(e)) { @@ -521,7 +562,7 @@ SECT_INTERP static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s) } // phi nodes don't behave like proper instructions, so we require a special interpreter to handle them -SECT_INTERP static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_t to) +static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_t to) { size_t from = s->ip; size_t ip = to; @@ -602,7 +643,7 @@ SECT_INTERP static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size return ip; } -SECT_INTERP static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int toplevel) +static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int toplevel) { jl_handler_t __eh; size_t ns = jl_array_len(stmts); @@ -813,124 +854,111 @@ jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi) // interpreter entry points -struct jl_interpret_call_args { - jl_method_instance_t *mi; - jl_value_t *f; - jl_value_t **args; - uint32_t nargs; -}; - -SECT_INTERP CALLBACK_ABI void *jl_interpret_call_callback(interpreter_state *s, void *vargs) +jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *codeinst) { - struct jl_interpret_call_args *args = - (struct jl_interpret_call_args *)vargs; - JL_GC_PROMISE_ROOTED(args); - jl_code_info_t *src = jl_code_for_interpreter(args->mi); - + interpreter_state *s; + jl_method_instance_t *mi = codeinst->def; + jl_code_info_t *src = jl_code_for_interpreter(mi); jl_array_t *stmts = src->code; assert(jl_typeis(stmts, jl_array_any_type)); - jl_value_t **locals; - JL_GC_PUSHARGS(locals, jl_source_nslots(src) + jl_source_nssavalues(src) + 2); + unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src) + 2; + JL_GC_PUSHFRAME(s, nroots); + jl_value_t **locals = (jl_value_t**)&s[1] + 3; locals[0] = (jl_value_t*)src; locals[1] = (jl_value_t*)stmts; s->locals = locals + 2; s->src = src; - if (jl_is_module(args->mi->def.value)) { - s->module = args->mi->def.module; + if (jl_is_module(mi->def.value)) { + s->module = mi->def.module; } else { - s->module = args->mi->def.method->module; - size_t nargs = args->mi->def.method->nargs; - int isva = args->mi->def.method->isva ? 1 : 0; + s->module = mi->def.method->module; + size_t defargs = mi->def.method->nargs; + int isva = mi->def.method->isva ? 1 : 0; size_t i; - s->locals[0] = args->f; - for (i = 1; i < nargs - isva; i++) - s->locals[i] = args->args[i - 1]; + s->locals[0] = f; + assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs); + for (i = 1; i < defargs - isva; i++) + s->locals[i] = args[i - 1]; if (isva) { - assert(nargs >= 2); - s->locals[nargs - 1] = jl_f_tuple(NULL, &args->args[nargs - 2], args->nargs + 2 - nargs); + assert(defargs >= 2); + s->locals[defargs - 1] = jl_f_tuple(NULL, &args[defargs - 2], nargs + 2 - defargs); } } - s->sparam_vals = args->mi->sparam_vals; + s->sparam_vals = mi->sparam_vals; s->preevaluation = 0; s->continue_at = 0; - s->mi = args->mi; + s->mi = mi; + JL_GC_ENABLEFRAME(s); jl_value_t *r = eval_body(stmts, s, 0, 0); JL_GC_POP(); - return (void*)r; + return r; } -SECT_INTERP jl_value_t *jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *codeinst) +jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t *src) { - struct jl_interpret_call_args callback_args = { codeinst->def, f, args, nargs }; - return (jl_value_t*)enter_interpreter_frame(jl_interpret_call_callback, (void *)&callback_args); -} - -struct jl_interpret_toplevel_thunk_args { - jl_module_t *m; - jl_code_info_t *src; -}; -SECT_INTERP CALLBACK_ABI void *jl_interpret_toplevel_thunk_callback(interpreter_state *s, void *vargs) { - struct jl_interpret_toplevel_thunk_args *args = - (struct jl_interpret_toplevel_thunk_args*)vargs; - JL_GC_PROMISE_ROOTED(args); - jl_array_t *stmts = args->src->code; + interpreter_state *s; + unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src); + JL_GC_PUSHFRAME(s, nroots); + jl_array_t *stmts = src->code; assert(jl_typeis(stmts, jl_array_any_type)); - jl_value_t **locals; - JL_GC_PUSHARGS(locals, jl_source_nslots(args->src) + jl_source_nssavalues(args->src)); - s->src = args->src; - s->locals = locals; - s->module = args->m; + s->src = src; + s->locals = (jl_value_t**)&s[1] + 3; + s->module = m; s->sparam_vals = jl_emptysvec; s->continue_at = 0; s->mi = NULL; + JL_GC_ENABLEFRAME(s); size_t last_age = jl_get_ptls_states()->world_age; jl_value_t *r = eval_body(stmts, s, 0, 1); jl_get_ptls_states()->world_age = last_age; JL_GC_POP(); - return (void*)r; -} - -SECT_INTERP jl_value_t *jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t *src) -{ - struct jl_interpret_toplevel_thunk_args args = { m, src }; - return (jl_value_t *)enter_interpreter_frame(jl_interpret_toplevel_thunk_callback, (void*)&args); + return r; } // deprecated: do not use this method in new code // it uses special scoping / evaluation / error rules // which should instead be handled in lowering -struct interpret_toplevel_expr_in_args { - jl_module_t *m; - jl_value_t *e; - jl_code_info_t *src; - jl_svec_t *sparam_vals; -}; - -SECT_INTERP CALLBACK_ABI void *jl_interpret_toplevel_expr_in_callback(interpreter_state *s, void *vargs) +jl_value_t *NOINLINE jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e, jl_code_info_t *src, jl_svec_t *sparam_vals) { - struct interpret_toplevel_expr_in_args *args = - (struct interpret_toplevel_expr_in_args*)vargs; - JL_GC_PROMISE_ROOTED(args); - s->src = args->src; - s->module = args->m; - s->sparam_vals = args->sparam_vals; - s->preevaluation = (s->sparam_vals != NULL); + interpreter_state *s; + JL_GC_PUSHFRAME(s, 0); + s->src = src; + s->module = m; + s->sparam_vals = sparam_vals; + s->preevaluation = (sparam_vals != NULL); s->continue_at = 0; s->mi = NULL; - jl_value_t *v = eval_value(args->e, s); + JL_GC_ENABLEFRAME(s); + jl_value_t *v = eval_value(e, s); assert(v); - return (void*)v; + JL_GC_POP(); + return v; } -SECT_INTERP jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e, jl_code_info_t *src, jl_svec_t *sparam_vals) +JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_entry, + void *stateend, size_t space_remaining) { - struct interpret_toplevel_expr_in_args args = { - m, e, src, sparam_vals - }; - return (jl_value_t *)enter_interpreter_frame(jl_interpret_toplevel_expr_in_callback, (void*)&args); + interpreter_state *s = &((interpreter_state*)stateend)[-1]; + int need_module = !s->mi; + int required_space = need_module ? 4 : 3; + if (space_remaining < required_space) + return 0; // Should not happen + size_t njlvalues = need_module ? 2 : 1; + uintptr_t entry_tags = jl_bt_entry_descriptor(njlvalues, 0, JL_BT_INTERP_FRAME_TAG, s->ip); + bt_entry[0].uintptr = JL_BT_NON_PTR_ENTRY; + bt_entry[1].uintptr = entry_tags; + bt_entry[2].jlvalue = s->mi ? (jl_value_t*)s->mi : + s->src ? (jl_value_t*)s->src : (jl_value_t*)jl_nothing; + if (need_module) { + // If we only have a CodeInfo (s->src), we are in a top level thunk and + // need to record the module separately. + bt_entry[3].jlvalue = (jl_value_t*)s->module; + } + return required_space; } + #ifdef __cplusplus } #endif diff --git a/src/julia.h b/src/julia.h index c4e05766afa40..9f88983b3caf6 100644 --- a/src/julia.h +++ b/src/julia.h @@ -709,43 +709,46 @@ extern void _JL_GC_PUSHARGS(jl_value_t **, size_t) JL_NOTSAFEPOINT; // This is necessary, because otherwise the analyzer considers this undefined // behavior and terminates the exploration #define JL_GC_PUSHARGS(rts_var, n) \ - rts_var = (jl_value_t **)alloca(sizeof(void*) * n); \ - memset(rts_var,0,sizeof(void*) * n); \ - _JL_GC_PUSHARGS(rts_var, n); + rts_var = (jl_value_t **)alloca(sizeof(void*) * (n)); \ + memset(rts_var, 0, sizeof(void*) * (n)); \ + _JL_GC_PUSHARGS(rts_var, (n)); extern void JL_GC_POP() JL_NOTSAFEPOINT; #else -#define JL_GC_PUSH1(arg1) \ - void *__gc_stkf[] = {(void*)3, jl_pgcstack, arg1}; \ +#define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<2) +#define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<2)|1) + +#define JL_GC_PUSH1(arg1) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(1), jl_pgcstack, arg1}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; -#define JL_GC_PUSH2(arg1, arg2) \ - void *__gc_stkf[] = {(void*)5, jl_pgcstack, arg1, arg2}; \ +#define JL_GC_PUSH2(arg1, arg2) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(2), jl_pgcstack, arg1, arg2}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; -#define JL_GC_PUSH3(arg1, arg2, arg3) \ - void *__gc_stkf[] = {(void*)7, jl_pgcstack, arg1, arg2, arg3}; \ +#define JL_GC_PUSH3(arg1, arg2, arg3) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(3), jl_pgcstack, arg1, arg2, arg3}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; -#define JL_GC_PUSH4(arg1, arg2, arg3, arg4) \ - void *__gc_stkf[] = {(void*)9, jl_pgcstack, arg1, arg2, arg3, arg4}; \ +#define JL_GC_PUSH4(arg1, arg2, arg3, arg4) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(4), jl_pgcstack, arg1, arg2, arg3, arg4}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; -#define JL_GC_PUSH5(arg1, arg2, arg3, arg4, arg5) \ - void *__gc_stkf[] = {(void*)11, jl_pgcstack, arg1, arg2, arg3, arg4, arg5}; \ +#define JL_GC_PUSH5(arg1, arg2, arg3, arg4, arg5) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(5), jl_pgcstack, arg1, arg2, arg3, arg4, arg5}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; -#define JL_GC_PUSH6(arg1, arg2, arg3, arg4, arg5, arg6) \ - void *__gc_stkf[] = {(void*)13, jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6}; \ +#define JL_GC_PUSH6(arg1, arg2, arg3, arg4, arg5, arg6) \ + void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(6), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6}; \ jl_pgcstack = (jl_gcframe_t*)__gc_stkf; -#define JL_GC_PUSHARGS(rts_var,n) \ - rts_var = ((jl_value_t**)alloca(((n)+2)*sizeof(jl_value_t*)))+2; \ - ((void**)rts_var)[-2] = (void*)(((size_t)(n))<<1); \ - ((void**)rts_var)[-1] = jl_pgcstack; \ - memset((void*)rts_var, 0, (n)*sizeof(jl_value_t*)); \ +#define JL_GC_PUSHARGS(rts_var,n) \ + rts_var = ((jl_value_t**)alloca(((n)+2)*sizeof(jl_value_t*)))+2; \ + ((void**)rts_var)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(n); \ + ((void**)rts_var)[-1] = jl_pgcstack; \ + memset((void*)rts_var, 0, (n)*sizeof(jl_value_t*)); \ jl_pgcstack = (jl_gcframe_t*)&(((void**)rts_var)[-2]) #define JL_GC_POP() (jl_pgcstack = jl_pgcstack->prev) diff --git a/src/julia_internal.h b/src/julia_internal.h index c97c70af90825..3274749859c98 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -742,9 +742,9 @@ size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTS // Record backtrace from a signal handler. `ctx` is the context of the code // which was asynchronously interrupted. size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, - int add_interp_frames) JL_NOTSAFEPOINT; + jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT; #ifdef LIBOSXUNWIND -size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, int add_interp_frames) JL_NOTSAFEPOINT; +size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT; #endif JL_DLLEXPORT jl_value_t *jl_get_backtrace(void); void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data, size_t *bt_size); @@ -766,10 +766,9 @@ STATIC_INLINE char *jl_copy_str(char **to, const char *from) memcpy(*to, from, len); return *to; } -JL_DLLEXPORT int jl_is_interpreter_frame(uintptr_t ip) JL_NOTSAFEPOINT; -JL_DLLEXPORT int jl_is_enter_interpreter_frame(uintptr_t ip) JL_NOTSAFEPOINT; -JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_data, uintptr_t sp, - uintptr_t fp, size_t space_remaining) JL_NOTSAFEPOINT; + +JL_DLLEXPORT size_t jl_capture_interp_frame(jl_bt_element_t *bt_data, + void *frameend, size_t space_remaining) JL_NOTSAFEPOINT; // Exception stack: a stack of pairs of (exception,raw_backtrace). // The stack may be traversed and accessed with the functions below. diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 5baf62c54bca0..8ffa46b83df41 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -119,7 +119,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) builder.SetInsertPoint(&*(++BasicBlock::iterator(target))); Instruction *inst = builder.CreateStore( - ConstantInt::get(T_size, nRoots << 1), + ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)), builder.CreateBitCast( builder.CreateConstGEP1_32(gcframe, 0), T_size->getPointerTo())); diff --git a/src/signal-handling.c b/src/signal-handling.c index 43d133f6e394c..4d0bdbcbc566b 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -233,7 +233,7 @@ void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data, if (context) { // Must avoid extended backtrace frames here unless we're sure bt_data // is properly rooted. - *bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, 0); + *bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL); } for (i = 0; i < n; i += jl_bt_entry_size(bt_data + i)) { jl_print_bt_entry_codeloc(bt_data + i); diff --git a/src/signals-mach.c b/src/signals-mach.c index 24b02ead82970..33f8f3545299f 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -142,7 +142,7 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio if (!ptls2->safe_restore) { assert(exception); ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, - (bt_context_t*)&state, 1); + (bt_context_t*)&state, ptls2->pgcstack); ptls2->sig_exception = exception; } jl_call_in_state(ptls2, &state, &jl_sig_throw); @@ -448,10 +448,10 @@ void *mach_profile_listener(void *arg) if (forceDwarf == 0) { // Save the backtrace - bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, 0); + bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL); } else if (forceDwarf == 1) { - bt_size_cur += rec_backtrace_ctx_dwarf((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, 0); + bt_size_cur += rec_backtrace_ctx_dwarf((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL); } else if (forceDwarf == -1) { jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n"); @@ -459,7 +459,7 @@ void *mach_profile_listener(void *arg) forceDwarf = -2; #else - bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, 0); + bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL); #endif // Mark the end of this block with 0 diff --git a/src/signals-unix.c b/src/signals-unix.c index a397136b7679b..cd885f7669853 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -176,7 +176,7 @@ static void jl_throw_in_ctx(jl_ptls_t ptls, jl_value_t *e, int sig, void *sigctx { if (!ptls->safe_restore) ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, - jl_to_bt_context(sigctx), 1); + jl_to_bt_context(sigctx), ptls->pgcstack); ptls->sig_exception = e; jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx); } @@ -670,7 +670,7 @@ static void *signal_listener(void *arg) if (critical) { bt_size += rec_backtrace_ctx(bt_data + bt_size, JL_MAX_BT_SIZE / jl_n_threads - 1, - signal_context, 0); + signal_context, NULL); bt_data[bt_size++].uintptr = 0; } @@ -689,7 +689,7 @@ static void *signal_listener(void *arg) } else { // Get backtrace data bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, - bt_size_max - bt_size_cur - 1, signal_context, 0); + bt_size_max - bt_size_cur - 1, signal_context, NULL); } ptls->safe_restore = old_buf; diff --git a/src/signals-win.c b/src/signals-win.c index fb913293d45c8..ce995308290c2 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -104,7 +104,7 @@ static void JL_NORETURN start_backtrace_fiber(void) { jl_ptls_t ptls = jl_get_ptls_states(); // collect the backtrace - ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, error_ctx, 1); + ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, error_ctx, ptls->pgcstack); // switch back to the execution fiber jl_setcontext(&error_return_fiber); abort(); @@ -130,7 +130,7 @@ void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread) assert(excpt != NULL); ptls->bt_size = 0; if (excpt != jl_stackovf_exception) { - ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread, 1); + ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread, ptls->pgcstack); } else if (have_backtrace_fiber) { error_ctx = ctxThread; @@ -345,7 +345,7 @@ static DWORD WINAPI profile_bt( LPVOID lparam ) } // Get backtrace data bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, - bt_size_max - bt_size_cur - 1, &ctxThread, 0); + bt_size_max - bt_size_cur - 1, &ctxThread, NULL); // Mark the end of this block with 0 bt_data_prof[bt_size_cur].uintptr = 0; bt_size_cur++; diff --git a/src/stackwalk.c b/src/stackwalk.c index f088f1b7b69a2..9bb81a220d7c1 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -25,7 +25,27 @@ extern "C" { #endif static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *context) JL_NOTSAFEPOINT; -static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintptr_t *fp) JL_NOTSAFEPOINT; +static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp) JL_NOTSAFEPOINT; + +static jl_gcframe_t *is_enter_interpreter_frame(jl_gcframe_t **ppgcstack, uintptr_t sp) JL_NOTSAFEPOINT +{ + jl_gcframe_t *pgcstack = *ppgcstack; + while (pgcstack != NULL) { + jl_gcframe_t *prev = pgcstack->prev; + if (pgcstack->nroots & 2) { // tagged frame + uintptr_t frame_fp = ((uintptr_t*)pgcstack)[-1]; + if (frame_fp == 0) + continue; // frame wasn't fully initialized yet + if (frame_fp >= sp) + break; // stack grows down, so frame pointer is monotonically increasing + *ppgcstack = prev; + return pgcstack; + } + *ppgcstack = pgcstack = prev; + } + return NULL; +} + // Record backtrace entries into bt_data by stepping cursor with jl_unw_step // until the outermost frame is encountered or the buffer bt_data is (close to) @@ -37,7 +57,7 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintpt // entries. If `sp != NULL`, the stack pointer corresponding `bt_data[i]` is // stored in `sp[i]`. // -// Flag `add_interp_frames==1` should be set to record an extended backtrace +// `*ppgcstack` should be given if you want to record extended backtrace // entries in `bt_data` for each julia interpreter frame. // // Flag `from_signal_handler==1` should be set if the cursor was obtained by @@ -46,14 +66,13 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintpt // jl_unw_stepn will return 1 if there are more frames to come. The number of // elements written to bt_data (and sp if non-NULL) are returned in bt_size. int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *bt_size, - uintptr_t *sp, size_t maxsize, int skip, int add_interp_frames, + uintptr_t *sp, size_t maxsize, int skip, jl_gcframe_t **ppgcstack, int from_signal_handler) JL_NOTSAFEPOINT { volatile size_t n = 0; volatile int need_more_space = 0; uintptr_t return_ip = 0; uintptr_t thesp = 0; - uintptr_t thefp = 0; #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) assert(!jl_in_stackwalk); jl_in_stackwalk = 1; @@ -72,12 +91,12 @@ int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *bt_size, #endif int have_more_frames = 1; while (have_more_frames) { - if (n + JL_BT_MAX_ENTRY_SIZE > maxsize) { + if (n + JL_BT_MAX_ENTRY_SIZE + 1 > maxsize) { // Postpone advancing the cursor: may need more space need_more_space = 1; break; } - have_more_frames = jl_unw_step(cursor, &return_ip, &thesp, &thefp); + have_more_frames = jl_unw_step(cursor, &return_ip, &thesp); if (skip > 0) { skip--; continue; @@ -122,15 +141,25 @@ int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *bt_size, call_ip = 0; } jl_bt_element_t *bt_entry = bt_data + n; - size_t entry_sz = 0; - if (add_interp_frames && jl_is_enter_interpreter_frame(call_ip) && - (entry_sz = jl_capture_interp_frame(bt_entry, thesp, thefp, maxsize-n)) != 0) { - n += entry_sz; - } else { - bt_entry->uintptr = call_ip; - n++; + jl_gcframe_t *pgcstack; + if ((pgcstack = is_enter_interpreter_frame(ppgcstack, thesp))) { + size_t add = jl_capture_interp_frame(bt_entry, (void*)((char*)pgcstack - sizeof(void*)), maxsize - n); + n += add; + bt_entry += add; + while ((pgcstack = is_enter_interpreter_frame(ppgcstack, thesp))) { + // If the compiler got inlining-happy, or the user tried to + // push multiple frames (or the unwinder got very + // confused), we could end up here. That doesn't happen + // now, so just ignore this possibility. If we want this, + // we can work on adding support for it later. + } } + bt_entry->uintptr = call_ip; + n++; } + // NOTE: if we have some pgcstack entries remaining (because the + // unwinder failed and returned !have_more_frames early), we could + // consider still appending those frames here #if !defined(_OS_WINDOWS_) } else { @@ -150,13 +179,13 @@ int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *bt_size, } NOINLINE size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, - bt_context_t *context, int add_interp_frames) JL_NOTSAFEPOINT + bt_context_t *context, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT { bt_cursor_t cursor; if (!jl_unw_init(&cursor, context)) return 0; size_t bt_size = 0; - jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, 0, add_interp_frames, 1); + jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, 0, &pgcstack, 1); return bt_size; } @@ -170,11 +199,12 @@ NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip bt_context_t context; memset(&context, 0, sizeof(context)); jl_unw_get(&context); + jl_gcframe_t *pgcstack = jl_pgcstack; bt_cursor_t cursor; if (!jl_unw_init(&cursor, &context)) return 0; size_t bt_size = 0; - jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, skip + 1, 1, 0); + jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, skip + 1, &pgcstack, 0); return bt_size; } @@ -204,6 +234,7 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip) bt_cursor_t cursor; memset(&context, 0, sizeof(context)); jl_unw_get(&context); + jl_gcframe_t *pgcstack = jl_pgcstack; if (jl_unw_init(&cursor, &context)) { // Skip frame for jl_backtrace_from_here itself skip += 1; @@ -218,7 +249,7 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip) } size_t size_incr = 0; have_more_frames = jl_unw_stepn(&cursor, (jl_bt_element_t*)jl_array_data(ip) + offset, - &size_incr, sp_ptr, maxincr, skip, 1, 0); + &size_incr, sp_ptr, maxincr, skip, &pgcstack, 0); skip = 0; offset += size_incr; } @@ -436,14 +467,12 @@ static int readable_pointer(LPCVOID pointer) return 1; } -static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintptr_t *fp) +static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp) { // Might be called from unmanaged thread. #ifndef _CPU_X86_64_ *ip = (uintptr_t)cursor->stackframe.AddrPC.Offset; *sp = (uintptr_t)cursor->stackframe.AddrStack.Offset; - if (fp) - *fp = (uintptr_t)cursor->stackframe.AddrFrame.Offset; if (*ip == 0) { if (!readable_pointer((LPCVOID)*sp)) return 0; @@ -458,8 +487,6 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintpt #else *ip = (uintptr_t)cursor->Rip; *sp = (uintptr_t)cursor->Rsp; - if (fp) - *fp = (uintptr_t)cursor->Rbp; if (*ip == 0) { if (!readable_pointer((LPCVOID)*sp)) return 0; @@ -508,7 +535,7 @@ static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *context) return unw_init_local(cursor, context) == 0; } -static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintptr_t *fp) +static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp) { unw_word_t reg; if (unw_get_reg(cursor, UNW_REG_IP, ®) < 0) @@ -517,27 +544,18 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintpt if (unw_get_reg(cursor, UNW_REG_SP, ®) < 0) return 0; *sp = reg; -#ifdef UNW_REG_FP - if (unw_get_reg(cursor, UNW_REG_FP, ®) < 0) - return 0; - if (fp) - *fp = reg; -#else - if (fp) - *fp = 0; -#endif return unw_step(cursor) > 0; } #ifdef LIBOSXUNWIND NOINLINE size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, - bt_context_t *context, int add_interp_frames) + bt_context_t *context, jl_gcframe_t *pgcstack) { size_t bt_size = 0; bt_cursor_t cursor; if (unw_init_local_dwarf(&cursor, context) != UNW_ESUCCESS) return 0; - jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, 0, add_interp_frames, 1); + jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, 0, &pgcstack, 1); return bt_size; } #endif @@ -549,7 +567,7 @@ static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *context) return 0; } -static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintptr_t *fp) +static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp) { return 0; } diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll index f4d126471ebe0..fd57092b53dc1 100644 --- a/test/llvmpasses/final-lower-gc.ll +++ b/test/llvmpasses/final-lower-gc.ll @@ -26,7 +26,7 @@ top: %ptls = call %jl_value_t*** @julia.ptls_states() ; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 0 ; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast %jl_value_t addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* -; CHECK-DAG: store i64 4, i64* [[GCFRAME_SIZE_PTR2]], !tbaa !0 +; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], !tbaa !0 ; CHECK-DAG: [[GCFRAME_SLOT:%.*]] = getelementptr %jl_value_t**, %jl_value_t*** %ptls, i32 0 ; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 1 ; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast %jl_value_t addrspace(10)** [[PREV_GCFRAME_PTR]] to %jl_value_t***