With kernel patch [1], x86_64 will add extra padding for kernel stack, as a result, the pt_regs will be shift down by the offset of padding. Without the patch, the values of registers read from pt_regs will be incorrect. Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by Kconfig, according to kernel code comment [2], the value may be made dynamicly later. In addition there might be systems compiled without Kconfig avaliable. So in this patch, we will calculate the value of TOP_OF_KERNEL_STACK_PADDING. The calculation is as follows: 1) in startup_64(), there is a lea instruction as: leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp 2) in rewind_stack_and_make_dead(), there is a lea instruction as: leaq -PTREGS_SIZE(%rax), %rsp The disassembled 2 instructions will be like: 1) 0xffffffff93a0007d <startup_64+3>: lea 0x1e03ec4(%rip),%rsp # 0xffffffff95803f48 ^^^^^^^^^^^^^^^^^^^^ 2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>: lea -0xa8(%rax),%rsp ^^^^ 0xffffffff95803f48 is the value of (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of PTREGS_SIZE, __end_init_task can be get by symbol reading. [1]: https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-bot2@tip-bot2/ [2]: https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/thread_info.h#L34 Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> --- x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/x86_64.c b/x86_64.c index 0c21eb8..43a31c2 100644 --- a/x86_64.c +++ b/x86_64.c @@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong); static orc_entry *orc_module_find(ulong); static ulong ip_table_to_vaddr(ulong); static void orc_dump(ulong); +static long top_of_kernel_stack_padding(void); struct machine_specific x86_64_machine_specific = { 0 }; @@ -4089,7 +4090,8 @@ in_exception_stack: user_mode_eframe = bt->stacktop - SIZE(pt_regs); if (last_process_stack_eframe < user_mode_eframe) x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf + - (bt->stacktop - bt->stackbase) - SIZE(pt_regs), + (bt->stacktop - bt->stackbase) - SIZE(pt_regs) - + top_of_kernel_stack_padding(), bt, ofp); } @@ -4410,7 +4412,8 @@ in_exception_stack: user_mode_eframe = bt->stacktop - SIZE(pt_regs); if (last_process_stack_eframe < user_mode_eframe) x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf + - (bt->stacktop - bt->stackbase) - SIZE(pt_regs), + (bt->stacktop - bt->stackbase) - SIZE(pt_regs) - + top_of_kernel_stack_padding(), bt, ofp); } @@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry) return SWP_OFFSET(entry); } +static long +top_of_kernel_stack_padding(void) +{ + char buf1[BUFSIZE]; + char *cursor; + long final_value, ptregs_size_value; + char *arglist[MAXARGS]; + bool found = FALSE; + + static long kernel_stack_padding = -1; + + if (kernel_stack_padding >= 0) + return kernel_stack_padding; + + /* + * startup_64: + * ... + * mov %rsi,%r15 + * leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp + */ + sprintf(buf1, "disass /r startup_64"); + open_tmpfile2(); + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) { + kernel_stack_padding = 0; + goto out; + } + + rewind(pc->tmpfile2); + while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) { + // machine code of "mov %rsi,%r15" + if (strstr(buf1, "49 89 f7")) + found = TRUE; + } + if (!found || !(cursor = strstr(buf1, "# 0x"))) { + kernel_stack_padding = 0; + goto out; + } + + parse_line(cursor, arglist); + final_value = stol(arglist[1], FAULT_ON_ERROR, NULL); + + /* + * rewind_stack_and_make_dead: + * ... + * leaq -PTREGS_SIZE(%rax), %rsp + */ + found = FALSE; + rewind(pc->tmpfile2); + sprintf(buf1, "disass rewind_stack_and_make_dead"); + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) { + kernel_stack_padding = 0; + goto out; + } + rewind(pc->tmpfile2); + while (fgets(buf1, BUFSIZE, pc->tmpfile2)) { + // find leaq -PTREGS_SIZE(%rax), %rsp + if (strstr(buf1, "lea") && (cursor = strstr(buf1, "-0x"))) { + parse_line(cursor, arglist); + char *p = strchr(arglist[0], '('); + *p = '\0'; + ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR, NULL); + found = TRUE; + break; + } + } + if (!found) { + kernel_stack_padding = 0; + goto out; + } + + struct syment *s = symbol_search("__end_init_task"); + kernel_stack_padding = s->value - final_value - ptregs_size_value; +out: + close_tmpfile2(); + return kernel_stack_padding; +} + #endif /* X86_64 */ -- 2.40.1 -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki