This patch will add stack unwinding support for x86_64 CPU arch. It follows the tech path of ppc64_get_stack_frame() & ppc64_get_cpu_reg() in ppc64.c, to get and consume the cpu regs. One different case is, we need to handle inactive_task_frame structure specifically in x86_64. If inactive_task_frame structure enabled, for inactive tasks, 7 regs can be get from inactive_task_frame in stack, and sp need to rewind back to skip inactive_task_frame structure (See code comments in x86_64.c:x86_64_get_stack_frame()); for active tasks, we get regs by the original way. If inactive_task_frame structure is not enabled, for inactive tasks, the stack frame is organized as linked list, and sp/bp can be get from stack; for active tasks, we get regs by the original way. Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> --- defs.h | 19 +++- x86_64.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 276 insertions(+), 27 deletions(-) diff --git a/defs.h b/defs.h index 7026e5c..221dff0 100644 --- a/defs.h +++ b/defs.h @@ -2241,6 +2241,20 @@ struct offset_table { /* stash of commonly-used offsets */ long mnt_namespace_nr_mounts; long mount_mnt_node; long log_caller_id; + long inactive_task_frame_r15; + long inactive_task_frame_r14; + long inactive_task_frame_r13; + long inactive_task_frame_r12; + long inactive_task_frame_flags; + long inactive_task_frame_si; + long inactive_task_frame_di; + long inactive_task_frame_bx; + long thread_struct_es; + long thread_struct_ds; + long thread_struct_fsbase; + long thread_struct_gsbase; + long thread_struct_fs; + long thread_struct_gs; }; struct size_table { /* stash of commonly-used sizes */ @@ -8007,7 +8021,7 @@ extern int have_full_symbols(void); /* * Register numbers must be in sync with gdb/features/i386/64bit-core.c - * to make crash_target->fetch_registers() ---> machdep->get_cpu_reg() + * to make crash_target->fetch_registers() ---> machdep->get_current_task_reg() * working properly. */ enum x86_64_regnum { @@ -8051,6 +8065,9 @@ enum x86_64_regnum { FOSEG_REGNUM, FOOFF_REGNUM, FOP_REGNUM, + FS_BASE_REGNUM = 152, + GS_BASE_REGNUM, + ORIG_RAX_REGNUM, LAST_REGNUM }; diff --git a/x86_64.c b/x86_64.c index 502817d..6d42fe4 100644 --- a/x86_64.c +++ b/x86_64.c @@ -126,7 +126,7 @@ static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char *); static void x86_64_framesize_debug(struct bt_info *); static void x86_64_get_active_set(void); static int x86_64_get_kvaddr_ranges(struct vaddr_range *); -static int x86_64_get_cpu_reg(int, int, const char *, int, void *); +static int x86_64_get_current_task_reg(int, const char *, int, void *); static int x86_64_verify_paddr(uint64_t); static void GART_init(void); static void x86_64_exception_stacks_init(void); @@ -143,6 +143,29 @@ struct machine_specific x86_64_machine_specific = { 0 }; static const char *exception_functions_orig[]; static const char *exception_functions_5_8[]; +/* Use this hardwired version -- sometimes the + * debuginfo doesn't pick this up even though + * it exists in the kernel; it shouldn't change. + */ +struct x86_64_user_regs_struct { + unsigned long r15, r14, r13, r12, bp, bx; + unsigned long r11, r10, r9, r8, ax, cx, dx; + unsigned long si, di, orig_ax, ip, cs; + unsigned long flags, sp, ss, fs_base; + unsigned long gs_base, ds, es, fs, gs; +}; + +struct user_regs_bitmap_struct { + struct { + unsigned long r15, r14, r13, r12, bp, bx; + unsigned long r11, r10, r9, r8, ax, cx, dx; + unsigned long si, di, orig_ax, ip, cs; + unsigned long flags, sp, ss, fs_base; + unsigned long gs_base, ds, es, fs, gs; + }; + ulong bitmap; +}; + /* * Do all necessary machine-specific setup here. This is called several * times during initialization. @@ -195,7 +218,7 @@ x86_64_init(int when) machdep->machspec->irq_eframe_link = UNINITIALIZED; machdep->machspec->irq_stack_gap = UNINITIALIZED; machdep->get_kvaddr_ranges = x86_64_get_kvaddr_ranges; - machdep->get_cpu_reg = x86_64_get_cpu_reg; + machdep->get_current_task_reg = x86_64_get_current_task_reg; if (machdep->cmdline_args[0]) parse_cmdline_args(); if ((string = pc->read_vmcoreinfo("relocate"))) { @@ -500,6 +523,12 @@ x86_64_init(int when) MEMBER_OFFSET_INIT(thread_struct_rsp, "thread_struct", "sp"); if (INVALID_MEMBER(thread_struct_rsp0)) MEMBER_OFFSET_INIT(thread_struct_rsp0, "thread_struct", "sp0"); + MEMBER_OFFSET_INIT(thread_struct_es, "thread_struct", "es"); + MEMBER_OFFSET_INIT(thread_struct_ds, "thread_struct", "ds"); + MEMBER_OFFSET_INIT(thread_struct_fsbase, "thread_struct", "fsbase"); + MEMBER_OFFSET_INIT(thread_struct_gsbase, "thread_struct", "gsbase"); + MEMBER_OFFSET_INIT(thread_struct_fs, "thread_struct", "fs"); + MEMBER_OFFSET_INIT(thread_struct_gs, "thread_struct", "gs"); STRUCT_SIZE_INIT(tss_struct, "tss_struct"); MEMBER_OFFSET_INIT(tss_struct_ist, "tss_struct", "ist"); if (INVALID_MEMBER(tss_struct_ist)) { @@ -583,17 +612,6 @@ x86_64_init(int when) "user_regs_struct", "r15"); STRUCT_SIZE_INIT(user_regs_struct, "user_regs_struct"); if (!VALID_STRUCT(user_regs_struct)) { - /* Use this hardwired version -- sometimes the - * debuginfo doesn't pick this up even though - * it exists in the kernel; it shouldn't change. - */ - struct x86_64_user_regs_struct { - unsigned long r15, r14, r13, r12, bp, bx; - unsigned long r11, r10, r9, r8, ax, cx, dx; - unsigned long si, di, orig_ax, ip, cs; - unsigned long flags, sp, ss, fs_base; - unsigned long gs_base, ds, es, fs, gs; - }; ASSIGN_SIZE(user_regs_struct) = sizeof(struct x86_64_user_regs_struct); ASSIGN_OFFSET(user_regs_struct_rip) = @@ -891,7 +909,7 @@ x86_64_dump_machdep_table(ulong arg) fprintf(fp, " is_page_ptr: x86_64_is_page_ptr()\n"); fprintf(fp, " verify_paddr: x86_64_verify_paddr()\n"); fprintf(fp, " get_kvaddr_ranges: x86_64_get_kvaddr_ranges()\n"); - fprintf(fp, " get_cpu_reg: x86_64_get_cpu_reg()\n"); + fprintf(fp, "get_current_task_reg: x86_64_get_current_task_reg()\n"); fprintf(fp, " init_kernel_pgd: x86_64_init_kernel_pgd()\n"); fprintf(fp, "clear_machdep_cache: x86_64_clear_machdep_cache()\n"); fprintf(fp, " xendump_p2m_create: %s\n", PVOPS_XEN() ? @@ -4970,22 +4988,117 @@ x86_64_eframe_verify(struct bt_info *bt, long kvaddr, long cs, long ss, return FALSE; } +#define GET_REG_FROM_INACTIVE_TASK_FRAME(reg) \ +({ \ + ulong offset, reg_value = 0, rsp; \ + if (VALID_MEMBER(inactive_task_frame_bp)) { \ + offset = OFFSET(task_struct_thread) + OFFSET(thread_struct_rsp); \ + readmem(bt->task + offset, KVADDR, &rsp, \ + sizeof(ulong), "thread_struct.rsp", FAULT_ON_ERROR); \ + readmem(rsp + OFFSET(inactive_task_frame_##reg), KVADDR, ®_value, \ + sizeof(ulong), "inactive_task_frame.##reg", FAULT_ON_ERROR); \ + } \ + reg_value; \ +}) + /* * Get a stack frame combination of pc and ra from the most relevent spot. */ static void x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp) { - if (bt->flags & BT_DUMPFILE_SEARCH) - return x86_64_get_dumpfile_stack_frame(bt, pcp, spp); + struct user_regs_bitmap_struct *ur_bitmap; if (bt->flags & BT_SKIP_IDLE) bt->flags &= ~BT_SKIP_IDLE; + if (pcp) + *pcp = x86_64_get_pc(bt); + if (spp) + *spp = x86_64_get_sp(bt); + ur_bitmap = (struct user_regs_bitmap_struct *)GETBUF(sizeof(*ur_bitmap)); + memset(ur_bitmap, 0, sizeof(struct user_regs_bitmap_struct)); + + if (VALID_MEMBER(inactive_task_frame_bp)) { + if (!is_task_active(bt->task)) { + /* + * For inactive tasks in live and dumpfile, regs can be + * get from inactive_task_frame struct. + */ + ur_bitmap->r15 = GET_REG_FROM_INACTIVE_TASK_FRAME(r15); + ur_bitmap->r14 = GET_REG_FROM_INACTIVE_TASK_FRAME(r14); + ur_bitmap->r13 = GET_REG_FROM_INACTIVE_TASK_FRAME(r13); + ur_bitmap->r12 = GET_REG_FROM_INACTIVE_TASK_FRAME(r12); + ur_bitmap->bx = GET_REG_FROM_INACTIVE_TASK_FRAME(bx); + ur_bitmap->bp = GET_REG_FROM_INACTIVE_TASK_FRAME(bp); + /* + For inactive tasks: + crash> task -x 1|grep sp + sp = 0xffffc90000013d00 + crash> rd ffffc90000013d00 32 + ffffc90000013d00: ffff888104dad4a8 0000000000000000 r15,r14 + ffffc90000013d10: ffff888100280000 ffff888100216500 r13,r12 + ffffc90000013d20: ffff888100217018 ffff88817fd2c800 rbx,rbp + ffffc90000013d30: ffffffff81a6a1b3 ffffc90000013de0 saved_rip,... + ffffc90000013d40: ffff888100000004 99ccbf53ea493000 + ffffc90000013d50: ffff888100216500 ffff888100216500 + + crash> dis __schedule + ... + 0xffffffff81a6a1ab <__schedule+507>: mov %r13,%rsi + 0xffffffff81a6a1ae <__schedule+510>: call 0xffffffff81003490 <__switch_to_asm> + 0xffffffff81a6a1b3 <__schedule+515>: mov %rax,%rdi <<=== saved_rip + ... + crash> dis __switch_to_asm + 0xffffffff81003490 <__switch_to_asm>: push %rbp + 0xffffffff81003491 <__switch_to_asm+1>: push %rbx + 0xffffffff81003492 <__switch_to_asm+2>: push %r12 + 0xffffffff81003494 <__switch_to_asm+4>: push %r13 + 0xffffffff81003496 <__switch_to_asm+6>: push %r14 + 0xffffffff81003498 <__switch_to_asm+8>: push %r15 + 0xffffffff8100349a <__switch_to_asm+10>: mov %rsp,0x14d8(%rdi) + ... + Now saved_rip = ffffffff81a6a1b3, and we are starting + the stack unwind at saved_rip, which is function __schedule() + instead of function __switch_to_asm(), so the stack pointer should + be rewind from ffffc90000013d00 back to ffffc90000013d38, + aka *spp += 7 * reg_len. Otherwise we are unwinding function + __schedule() but with __switch_to_asm()'s stack frame, which + will fail. + */ + *spp += 7 * sizeof(unsigned long); + ur_bitmap->bitmap += 0x3f; + } else { + /* + * For active tasks in dumpfile, we get regs through the + * original way. For active tasks in live, we only get + * ip and sp in the end of the function. + */ + if (bt->flags & BT_DUMPFILE_SEARCH) { + FREEBUF(ur_bitmap); + bt->need_free = FALSE; + return x86_64_get_dumpfile_stack_frame(bt, pcp, spp); + } + } + } else { + if (!is_task_active(bt->task)) { + readmem(*spp, KVADDR, &(ur_bitmap->bp), + sizeof(ulong), "ur_bitmap->bp", FAULT_ON_ERROR); + ur_bitmap->bitmap += 0x10; + } else { + if (bt->flags & BT_DUMPFILE_SEARCH) { + FREEBUF(ur_bitmap); + bt->need_free = FALSE; + return x86_64_get_dumpfile_stack_frame(bt, pcp, spp); + } + } + } - if (pcp) - *pcp = x86_64_get_pc(bt); - if (spp) - *spp = x86_64_get_sp(bt); + ur_bitmap->ip = *pcp; + ur_bitmap->sp = *spp; + ur_bitmap->bitmap += 0x50000; + + bt->machdep = ur_bitmap; + bt->need_free = TRUE; } /* @@ -6457,6 +6570,14 @@ x86_64_ORC_init(void) MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp"); MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr"); + MEMBER_OFFSET_INIT(inactive_task_frame_r15, "inactive_task_frame", "r15"); + MEMBER_OFFSET_INIT(inactive_task_frame_r14, "inactive_task_frame", "r14"); + MEMBER_OFFSET_INIT(inactive_task_frame_r13, "inactive_task_frame", "r13"); + MEMBER_OFFSET_INIT(inactive_task_frame_r12, "inactive_task_frame", "r12"); + MEMBER_OFFSET_INIT(inactive_task_frame_flags, "inactive_task_frame", "flags"); + MEMBER_OFFSET_INIT(inactive_task_frame_si, "inactive_task_frame", "si"); + MEMBER_OFFSET_INIT(inactive_task_frame_di, "inactive_task_frame", "di"); + MEMBER_OFFSET_INIT(inactive_task_frame_bx, "inactive_task_frame", "bx"); orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added at 6.3 */ orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* removed at 6.4 */ @@ -9069,17 +9190,128 @@ x86_64_get_kvaddr_ranges(struct vaddr_range *vrp) return cnt; } +#define REG_CASE(R, r) \ + case R##_REGNUM: \ + if (size != sizeof(ur_bitmap->r)) { \ + ret = FALSE; break; \ + } else { \ + memcpy(value, &ur_bitmap->r, size); \ + ret = TRUE; break; \ + } + static int -x86_64_get_cpu_reg(int cpu, int regno, const char *name, +x86_64_get_current_task_reg(int regno, const char *name, int size, void *value) { - if (regno >= LAST_REGNUM) - return FALSE; + struct bt_info bt_info, bt_setup; + struct task_context *tc; + struct user_regs_bitmap_struct *ur_bitmap; + ulong ip, sp; + bool ret = FALSE; - if (VMSS_DUMPFILE()) - return vmware_vmss_get_cpu_reg(cpu, regno, name, size, value); + switch (regno) { + case RAX_REGNUM ... GS_REGNUM: + case FS_BASE_REGNUM ... ORIG_RAX_REGNUM: + break; + default: + return FALSE; + } - return FALSE; + tc = CURRENT_CONTEXT(); + if (!tc) + return FALSE; + + if (VMSS_DUMPFILE()) + return vmware_vmss_get_cpu_reg(tc->processor, regno, name, size, value); + + BZERO(&bt_setup, sizeof(struct bt_info)); + clone_bt_info(&bt_setup, &bt_info, tc); + fill_stackbuf(&bt_info); + + // reusing the get_dumpfile_regs function to get pt regs structure + get_dumpfile_regs(&bt_info, &sp, &ip); + if (bt_info.stackbuf) + FREEBUF(bt_info.stackbuf); + ur_bitmap = (struct user_regs_bitmap_struct *)bt_info.machdep; + if (!ur_bitmap) + return FALSE; + + /* Get all registers from elf notes*/ + if (!bt_info.need_free) { + goto get_all; + } + + /* Get subset registers from stack frame*/ + switch (ur_bitmap->bitmap) { + case 0x5003f: + switch (regno) { + case R12_REGNUM ... R15_REGNUM: + case RBP_REGNUM: + case RBX_REGNUM: + case RIP_REGNUM: + case RSP_REGNUM: + break; + default: + return FALSE; + } + break; + case 0x50010: + switch (regno) { + case RBP_REGNUM: + case RIP_REGNUM: + case RSP_REGNUM: + break; + default: + return FALSE; + } + break; + case 0x50000: + switch (regno) { + case RIP_REGNUM: + case RSP_REGNUM: + break; + default: + return FALSE; + } + break; + } + +get_all: + switch (regno) { + REG_CASE(RAX, ax); + REG_CASE(RBX, bx); + REG_CASE(RCX, cx); + REG_CASE(RDX, dx); + REG_CASE(RSI, si); + REG_CASE(RDI, di); + REG_CASE(RBP, bp); + REG_CASE(RSP, sp); + REG_CASE(R8, r8); + REG_CASE(R9, r9); + REG_CASE(R10, r10); + REG_CASE(R11, r11); + REG_CASE(R12, r12); + REG_CASE(R13, r13); + REG_CASE(R14, r14); + REG_CASE(R15, r15); + REG_CASE(RIP, ip); + REG_CASE(EFLAGS, flags); + REG_CASE(CS, cs); + REG_CASE(SS, ss); + REG_CASE(DS, ds); + REG_CASE(ES, es); + REG_CASE(FS, fs); + REG_CASE(GS, gs); + REG_CASE(FS_BASE, fs_base); + REG_CASE(GS_BASE, gs_base); + REG_CASE(ORIG_RAX, orig_ax); + } + + if (bt_info.need_free) { + FREEBUF(ur_bitmap); + bt_info.need_free = FALSE; + } + return ret; } /* -- 2.40.1 -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki