[Crash-utility] Re: [PATCH] x86_64: Add top_of_kernel_stack_padding for kernel stack

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi, Tao

Thank you for the fix.

On 5/23/24 12:06 PM, devel-request@xxxxxxxxxxxxxxxxxxxxxxxxxxx wrote:
Date: Thu, 23 May 2024 12:06:03 +0800
From: Tao Liu<ltao@xxxxxxxxxx>
Subject:  [PATCH] x86_64: Add
	top_of_kernel_stack_padding for kernel stack
To:devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx
Cc: Tao Liu<ltao@xxxxxxxxxx>
Message-ID:<20240523040603.10304-1-ltao@xxxxxxxxxx>
Content-Type: text/plain; charset="US-ASCII"; x-default=true

With kernel patch [1], x86_64 will add extra padding for kernel stack,
as a result, the pt_regs will be shift down by the offset of padding.
Without the patch, the values of registers read from pt_regs will be
incorrect.

Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by
Kconfig, according to kernel code comment [2], the value may be made
dynamicly later. In addition there might be systems compiled without
Kconfig avaliable. So in this patch, we will calculate the value of
TOP_OF_KERNEL_STACK_PADDING.

The calculation is as follows:

1) in startup_64(), there is a lea instruction as:
    leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp

2) in rewind_stack_and_make_dead(), there is a lea instruction as:
    leaq	-PTREGS_SIZE(%rax), %rsp

The disassembled 2 instructions will be like:

1) 0xffffffff93a0007d <startup_64+3>:      lea    0x1e03ec4(%rip),%rsp        # 0xffffffff95803f48
                                                                               ^^^^^^^^^^^^^^^^^^^^
2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>:     lea    -0xa8(%rax),%rsp
                                                                    ^^^^
0xffffffff95803f48 is the value of (__end_init_task -
TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of
PTREGS_SIZE, __end_init_task can be get by symbol reading.

Calculating the value of TOP_OF_KERNEL_STACK_PADDING, which looks good, but it heavily relies on compiler.
Normally we would use this way unless there is no other choice.

How about the following changes? Although it doesn't handle the case that the value is dynamic, let's see
how to change in the kernel in future, and then consider how to reflect it in crash-utility.


diff --git a/defs.h b/defs.h
index 01f316e67dde..42d875965256 100644
--- a/defs.h
+++ b/defs.h
@@ -2414,6 +2414,7 @@ struct size_table {         /* stash of commonly-used sizes */
 	long maple_tree;
 	long maple_node;
 	long module_memory;
+	long fred_frame;
 };
struct array_table {
diff --git a/kernel.c b/kernel.c
index 1728b70c1b5c..cd3d6044cc9a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -668,6 +668,7 @@ kernel_init()
 	STRUCT_SIZE_INIT(softirq_state, "softirq_state");
 	STRUCT_SIZE_INIT(softirq_action, "softirq_action");
 	STRUCT_SIZE_INIT(desc_struct, "desc_struct");
+	STRUCT_SIZE_INIT(fred_frame, "fred_frame");
STRUCT_SIZE_INIT(char_device_struct, "char_device_struct");
 	if (VALID_STRUCT(char_device_struct)) {
diff --git a/x86_64.c b/x86_64.c
index 0c21eb827e4a..6777c93e6b47 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -4086,10 +4086,11 @@ in_exception_stack:
if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
             (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
+		long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
 		user_mode_eframe = bt->stacktop - SIZE(pt_regs);
 		if (last_process_stack_eframe < user_mode_eframe)
                 	x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
-                        	(bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+				(bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
                         	bt, ofp);
 	}
@@ -4407,10 +4408,11 @@ in_exception_stack: if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
             (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
+		long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) : 0;
 		user_mode_eframe = bt->stacktop - SIZE(pt_regs);
 		if (last_process_stack_eframe < user_mode_eframe)
                 	x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
-                        	(bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+				(bt->stacktop - stack_padding_size - bt->stackbase) - SIZE(pt_regs),
                         	bt, ofp);
 	}

Thanks
Lianbo

[1]:https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-bot2@tip-bot2/
[2]:https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/thread_info.h#L34

Signed-off-by: Tao Liu<ltao@xxxxxxxxxx>
---
  x86_64.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
  1 file changed, 82 insertions(+), 2 deletions(-)

diff --git a/x86_64.c b/x86_64.c
index 0c21eb8..43a31c2 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong);
  static orc_entry *orc_module_find(ulong);
  static ulong ip_table_to_vaddr(ulong);
  static void orc_dump(ulong);
+static long top_of_kernel_stack_padding(void);
struct machine_specific x86_64_machine_specific = { 0 }; @@ -4089,7 +4090,8 @@ in_exception_stack:
  		user_mode_eframe = bt->stacktop - SIZE(pt_regs);
  		if (last_process_stack_eframe < user_mode_eframe)
                  	x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
-                        	(bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+				(bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
+				top_of_kernel_stack_padding(),
                          	bt, ofp);
  	}
@@ -4410,7 +4412,8 @@ in_exception_stack:
  		user_mode_eframe = bt->stacktop - SIZE(pt_regs);
  		if (last_process_stack_eframe < user_mode_eframe)
                  	x86_64_exception_frame(EFRAME_PRINT, 0, bt->stackbuf +
-                        	(bt->stacktop - bt->stackbase) - SIZE(pt_regs),
+				(bt->stacktop - bt->stackbase) - SIZE(pt_regs) -
+				top_of_kernel_stack_padding(),
                          	bt, ofp);
  	}
@@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry)
  	return SWP_OFFSET(entry);
  }
+static long
+top_of_kernel_stack_padding(void)
+{
+	char buf1[BUFSIZE];
+	char *cursor;
+	long final_value, ptregs_size_value;
+	char *arglist[MAXARGS];
+	bool found = FALSE;
+
+	static long kernel_stack_padding = -1;
+
+	if (kernel_stack_padding >= 0)
+		return kernel_stack_padding;
+
+	/*
+	* startup_64:
+	* ...
+	* mov %rsi,%r15
+	* leaq	(__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
+	*/
+	sprintf(buf1, "disass /r startup_64");
+	open_tmpfile2();
+	if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
+		kernel_stack_padding = 0;
+		goto out;
+	}
+
+	rewind(pc->tmpfile2);
+	while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) {
+		// machine code of "mov %rsi,%r15"
+		if (strstr(buf1, "49 89 f7"))
+			found = TRUE;
+	}
+	if (!found || !(cursor = strstr(buf1, "# 0x"))) {
+		kernel_stack_padding = 0;
+		goto out;
+	}
+	
+	parse_line(cursor, arglist);
+	final_value = stol(arglist[1], FAULT_ON_ERROR, NULL);
+
+	/*
+	* rewind_stack_and_make_dead:
+	* ...
+	* leaq	-PTREGS_SIZE(%rax), %rsp
+	*/
+	found = FALSE;
+	rewind(pc->tmpfile2);
+	sprintf(buf1, "disass rewind_stack_and_make_dead");
+	if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
+		kernel_stack_padding = 0;
+		goto out;
+	}
+	rewind(pc->tmpfile2);
+	while (fgets(buf1, BUFSIZE, pc->tmpfile2)) {
+		// find leaq -PTREGS_SIZE(%rax), %rsp
+		if (strstr(buf1, "lea") && (cursor = strstr(buf1, "-0x"))) {
+			parse_line(cursor, arglist);
+			char *p = strchr(arglist[0], '(');
+			*p = '\0';
+			ptregs_size_value = stol(arglist[0] + 1, FAULT_ON_ERROR, NULL);
+			found = TRUE;
+			break;
+		}
+	}
+	if (!found) {
+		kernel_stack_padding = 0;
+		goto out;
+	}
+
+	struct syment *s = symbol_search("__end_init_task");
+	kernel_stack_padding = s->value - final_value - ptregs_size_value;
+out:
+	close_tmpfile2();
+	return kernel_stack_padding;
+}
+
  #endif  /* X86_64 */
-- 2.40.1
--
Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux