Hi Lianbo, On Mon, May 27, 2024 at 11:30 AM Lianbo Jiang <lijiang@xxxxxxxxxx> wrote: > > Hi, Tao > > Thank you for the fix. > > On 5/23/24 12:06 PM, devel-request@xxxxxxxxxxxxxxxxxxxxxxxxxxx wrote: > > Date: Thu, 23 May 2024 12:06:03 +0800 > > From: Tao Liu<ltao@xxxxxxxxxx> > > Subject: [PATCH] x86_64: Add > > top_of_kernel_stack_padding for kernel stack > > To:devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx > > Cc: Tao Liu<ltao@xxxxxxxxxx> > > Message-ID:<20240523040603.10304-1-ltao@xxxxxxxxxx> > > Content-Type: text/plain; charset="US-ASCII"; x-default=true > > > > With kernel patch [1], x86_64 will add extra padding for kernel stack, > > as a result, the pt_regs will be shift down by the offset of padding. > > Without the patch, the values of registers read from pt_regs will be > > incorrect. > > > > Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by > > Kconfig, according to kernel code comment [2], the value may be made > > dynamicly later. In addition there might be systems compiled without > > Kconfig avaliable. So in this patch, we will calculate the value of > > TOP_OF_KERNEL_STACK_PADDING. > > > > The calculation is as follows: > > > > 1) in startup_64(), there is a lea instruction as: > > leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp > > > > 2) in rewind_stack_and_make_dead(), there is a lea instruction as: > > leaq -PTREGS_SIZE(%rax), %rsp > > > > The disassembled 2 instructions will be like: > > > > 1) 0xffffffff93a0007d <startup_64+3>: lea 0x1e03ec4(%rip),%rsp # 0xffffffff95803f48 > > ^^^^^^^^^^^^^^^^^^^^ > > 2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>: lea -0xa8(%rax),%rsp > > ^^^^ > > 0xffffffff95803f48 is the value of (__end_init_task - > > TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of > > PTREGS_SIZE, __end_init_task can be get by symbol reading. > > Calculating the value of TOP_OF_KERNEL_STACK_PADDING, which looks good, but it heavily relies on compiler. > Normally we would use this way unless there is no other choice. > > How about the following changes? Although it doesn't handle the case that the value is dynamic, let's see > how to change in the kernel in future, and then consider how to reflect it in crash-utility. > Sure, looks good to me, so let's go with this, and update it later when kernel changes. Thanks, Tao Liu > > diff --git a/defs.h b/defs.h > index 01f316e67dde..42d875965256 100644 > --- a/defs.h > +++ b/defs.h > @@ -2414,6 +2414,7 @@ struct size_table { /* stash of commonly-used sizes */ > long maple_tree; > long maple_node; > long module_memory; > + long fred_frame; > }; > > struct array_table { > diff --git a/kernel.c b/kernel.c > index 1728b70c1b5c..cd3d6044cc9a 100644 > --- a/kernel.c > +++ b/kernel.c > @@ -668,6 +668,7 @@ kernel_init() > STRUCT_SIZE_INIT(softirq_state, "softirq_state"); > STRUCT_SIZE_INIT(softirq_action, "softirq_action"); > STRUCT_SIZE_INIT(desc_struct, "desc_struct"); > + STRUCT_SIZE_INIT(fred_frame, "fred_frame"); > > STRUCT_SIZE_INIT(char_device_struct, "char_device_struct"); > if (VALID_STRUCT(char_device_struct)) { > diff --git a/x86_64.c b/x86_64.c > index 0c21eb827e4a..6777c93e6b47 100644 > --- a/x86_64.c > +++ b/x86_64.c > @@ -4086,10 +4086,11 @@ in_exception_stack: > > if (!irq_eframe && !is_kernel_thread(bt->tc->task) && > (GET_STACKBASE(bt->tc->task) == bt->stackbase)) { > + long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0; > user_mode_eframe = bt->stacktop - SIZE(pt_regs); > if (last_process_stack_eframe < user_mode_eframe) > x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf + > - (bt->stacktop - bt->stackbase) - SIZE(pt_regs), > + (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs), > bt, ofp); > } > > @@ -4407,10 +4408,11 @@ in_exception_stack: > > if (!irq_eframe && !is_kernel_thread(bt->tc->task) && > (GET_STACKBASE(bt->tc->task) == bt->stackbase)) { > + long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0; > user_mode_eframe = bt->stacktop - SIZE(pt_regs); > if (last_process_stack_eframe < user_mode_eframe) > x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf + > - (bt->stacktop - bt->stackbase) - SIZE(pt_regs), > + (bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs), > bt, ofp); > } > > Thanks > Lianbo > > > [1]:https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-bot2@tip-bot2/ > > [2]:https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/thread_info.h#L34 > > > > Signed-off-by: Tao Liu<ltao@xxxxxxxxxx> > > --- > > x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > 1 file changed, 82 insertions(+), 2 deletions(-) > > > > diff --git a/x86_64.c b/x86_64.c > > index 0c21eb8..43a31c2 100644 > > --- a/x86_64.c > > +++ b/x86_64.c > > @@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong); > > static orc_entry *orc_module_find(ulong); > > static ulong ip_table_to_vaddr(ulong); > > static void orc_dump(ulong); > > +static long top_of_kernel_stack_padding(void); > > > > struct machine_specific x86_64_machine_specific = { 0 }; > > > > @@ -4089,7 +4090,8 @@ in_exception_stack: > > user_mode_eframe = bt->stacktop - SIZE(pt_regs); > > if (last_process_stack_eframe < user_mode_eframe) > > x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf + > > - (bt->stacktop - bt->stackbase) - SIZE(pt_regs), > > + (bt->stacktop - bt->stackbase) - SIZE(pt_regs) - > > + top_of_kernel_stack_padding(), > > bt, ofp); > > } > > > > @@ -4410,7 +4412,8 @@ in_exception_stack: > > user_mode_eframe = bt->stacktop - SIZE(pt_regs); > > if (last_process_stack_eframe < user_mode_eframe) > > x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf + > > - (bt->stacktop - bt->stackbase) - SIZE(pt_regs), > > + (bt->stacktop - bt->stackbase) - SIZE(pt_regs) - > > + top_of_kernel_stack_padding(), > > bt, ofp); > > } > > > > @@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry) > > return SWP_OFFSET(entry); > > } > > > > +static long > > +top_of_kernel_stack_padding(void) > > +{ > > + char buf1[BUFSIZE]; > > + char *cursor; > > + long final_value, ptregs_size_value; > > + char *arglist[MAXARGS]; > > + bool found = FALSE; > > + > > + static long kernel_stack_padding = -1; > > + > > + if (kernel_stack_padding >= 0) > > + return kernel_stack_padding; > > + > > + /* > > + * startup_64: > > + * ... > > + * mov %rsi,%r15 > > + * leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp > > + */ > > + sprintf(buf1, "disass /r startup_64"); > > + open_tmpfile2(); > > + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) { > > + kernel_stack_padding = 0; > > + goto out; > > + } > > + > > + rewind(pc->tmpfile2); > > + while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) { > > + // machine code of "mov %rsi,%r15" > > + if (strstr(buf1, "49 89 f7")) > > + found = TRUE; > > + } > > + if (!found || !(cursor = strstr(buf1, "# 0x"))) { > > + kernel_stack_padding = 0; > > + goto out; > > + } > > + > > + parse_line(cursor, arglist); > > + final_value = stol(arglist[1], FAULT_ON_ERROR, NULL); > > + > > + /* > > + * rewind_stack_and_make_dead: > > + * ... > > + * leaq -PTREGS_SIZE(%rax), %rsp > > + */ > > + found = FALSE; > > + rewind(pc->tmpfile2); > > + sprintf(buf1, "disass rewind_stack_and_make_dead"); > > + if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) { > > + kernel_stack_padding = 0; > > + goto out; > > + } > > + rewind(pc->tmpfile2); > > + while (fgets(buf1, BUFSIZE, pc->tmpfile2)) { > > + // find leaq -PTREGS_SIZE(%rax), %rsp > > + if (strstr(buf1, "lea") && (cursor = strstr(buf1, "-0x"))) { > > + parse_line(cursor, arglist); > > + char *p = strchr(arglist[0], '('); > > + *p = '\0'; > > + ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR, NULL); > > + found = TRUE; > > + break; > > + } > > + } > > + if (!found) { > > + kernel_stack_padding = 0; > > + goto out; > > + } > > + > > + struct syment *s = symbol_search("__end_init_task"); > > + kernel_stack_padding = s->value - final_value - ptregs_size_value; > > +out: > > + close_tmpfile2(); > > + return kernel_stack_padding; > > +} > > + > > #endif /* X86_64 */ > > -- 2.40.1 > -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki