Re: [PATCH] x86_64: Fix "bt" command printing stale entries on Linux 6.4 and later

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Lianbo,

I thought of an idea that makes this patch better, especially support 
transitional versions too, so please hold on.

Thanks,
Kazu

On 2023/05/16 16:25, HAGIO KAZUHITO(萩尾 一仁) wrote:
> From: Kazuhito Hagio <k-hagio-ab@xxxxxxx>
> 
> Kernel commit fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in
> two"), which is contained in Linux 6.4 and later kernels, changed
> ORC_TYPE_CALL macro from 0 to 2.  As a result, the "bt" command cannot
> use ORC entries and displays stale entries in a call trace.
> 
>    crash> bt 1
>    PID: 1        TASK: ffff93cd06294180  CPU: 51   COMMAND: "systemd"
>     #0 [ffffb72bc00cbc98] __schedule at ffffffff86e52aae
>     #1 [ffffb72bc00cbd00] schedule at ffffffff86e52f6a
>     #2 [ffffb72bc00cbd18] schedule_hrtimeout_range_clock at ffffffff86e58ef5
>     #3 [ffffb72bc00cbd88] ep_poll at ffffffff8669624d
>     #4 [ffffb72bc00cbe28] do_epoll_wait at ffffffff86696371
>     #5 [ffffb72bc00cbe30] do_timerfd_settime at ffffffff8669902b        <<
>     #6 [ffffb72bc00cbe60] __x64_sys_epoll_wait at ffffffff86696bf0
>     #7 [ffffb72bc00cbeb0] do_syscall_64 at ffffffff86e3feb9
>     #8 [ffffb72bc00cbee0] __task_pid_nr_ns at ffffffff863330d7          <<
>     #9 [ffffb72bc00cbf08] syscall_exit_to_user_mode at ffffffff86e466b2 << stale entries
>    #10 [ffffb72bc00cbf18] do_syscall_64 at ffffffff86e3fec9             <<
>    #11 [ffffb72bc00cbf50] entry_SYSCALL_64_after_hwframe at ffffffff870000aa
> 
> Also, struct orc_entry in kernel has changed, and debugging information
> for ORC unwinder can be displayed incorrectly.
> 
> To fix these,
> (1) introduce "kernel_orc_entry_6_4" structure corresponding to 6.4 and
>      abstruct layer "orc_entry" structure in crash,
> (2) switch ORC_TYPE_CALL to 2 or 0 with kernel's orc_entry structure.
> 
> Signed-off-by: Kazuhito Hagio <k-hagio-ab@xxxxxxx>
> ---
>   defs.h   |  24 +++++++++++--
>   x86_64.c | 104 ++++++++++++++++++++++++++++++++++++++++++-------------
>   2 files changed, 102 insertions(+), 26 deletions(-)
> 
> diff --git a/defs.h b/defs.h
> index 211fc9d55d33..0cdfa295f964 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -6356,6 +6356,25 @@ typedef struct __attribute__((__packed__)) {
>           unsigned int end:1;
>   } kernel_orc_entry;
>   
> +typedef struct __attribute__((__packed__)) {
> +        signed short sp_offset;
> +        signed short bp_offset;
> +        unsigned int sp_reg:4;
> +        unsigned int bp_reg:4;
> +        unsigned int type:3;
> +        unsigned int signal:1;
> +} kernel_orc_entry_6_4;
> +
> +typedef struct orc_entry {
> +        signed short sp_offset;
> +        signed short bp_offset;
> +        unsigned int sp_reg;
> +        unsigned int bp_reg;
> +        unsigned int type;
> +        unsigned int end;
> +        unsigned int signal;
> +} orc_entry;
> +
>   struct ORC_data {
>   	int module_ORC;
>   	uint lookup_num_blocks;
> @@ -6366,10 +6385,10 @@ struct ORC_data {
>   	ulong orc_lookup;
>   	ulong ip_entry;
>   	ulong orc_entry;
> -	kernel_orc_entry kernel_orc_entry;
> +	orc_entry orc_entry_data;
>   };
>   
> -#define ORC_TYPE_CALL                   0
> +#define ORC_TYPE_CALL                   ((machdep->flags & ORC_6_4) ? 2 : 0)
>   #define ORC_TYPE_REGS                   1
>   #define ORC_TYPE_REGS_IRET              2
>   #define UNWIND_HINT_TYPE_SAVE           3
> @@ -6446,6 +6465,7 @@ struct machine_specific {
>   #define ORC         (0x4000)
>   #define KPTI        (0x8000)
>   #define L1TF       (0x10000)
> +#define ORC_6_4    (0x20000)
>   
>   #define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4|VM_5LEVEL)
>   
> diff --git a/x86_64.c b/x86_64.c
> index 5019c69e452e..7d863848e178 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -132,9 +132,9 @@ static void GART_init(void);
>   static void x86_64_exception_stacks_init(void);
>   static int in_START_KERNEL_map(ulong);
>   static ulong orc_ip(ulong);
> -static kernel_orc_entry *__orc_find(ulong, ulong, uint, ulong);
> -static kernel_orc_entry *orc_find(ulong);
> -static kernel_orc_entry *orc_module_find(ulong);
> +static orc_entry *__orc_find(ulong, ulong, uint, ulong);
> +static orc_entry *orc_find(ulong);
> +static orc_entry *orc_module_find(ulong);
>   static ulong ip_table_to_vaddr(ulong);
>   static void orc_dump(ulong);
>   
> @@ -806,6 +806,8 @@ x86_64_dump_machdep_table(ulong arg)
>   		fprintf(fp, "%sFRAMESIZE_DEBUG", others++ ? "|" : "");
>   	if (machdep->flags & ORC)
>   		fprintf(fp, "%sORC", others++ ? "|" : "");
> +	if (machdep->flags & ORC_6_4)
> +		fprintf(fp, "%sORC_6_4", others++ ? "|" : "");
>   	if (machdep->flags & FRAMEPOINTER)
>   		fprintf(fp, "%sFRAMEPOINTER", others++ ? "|" : "");
>   	if (machdep->flags & GART_REGION)
> @@ -988,16 +990,20 @@ x86_64_dump_machdep_table(ulong arg)
>   		fprintf(fp, "                    orc_lookup: %lx\n", ms->orc.orc_lookup);
>   		fprintf(fp, "                      ip_entry: %lx\n", ms->orc.ip_entry);
>   		fprintf(fp, "                     orc_entry: %lx\n", ms->orc.orc_entry);
> -		fprintf(fp, "              kernel_orc_entry:\n");
> -		fprintf(fp, "                       sp_offset: %d\n", ms->orc.kernel_orc_entry.sp_offset);
> -		fprintf(fp, "                       bp_offset: %d\n", ms->orc.kernel_orc_entry.bp_offset);
> -		fprintf(fp, "                          sp_reg: %d\n", ms->orc.kernel_orc_entry.sp_reg);
> -		fprintf(fp, "                          bp_reg: %d\n", ms->orc.kernel_orc_entry.bp_reg);
> -		fprintf(fp, "                            type: %d\n", ms->orc.kernel_orc_entry.type);
> +		fprintf(fp, "                orc_entry_data:\n");
> +		fprintf(fp, "                       sp_offset: %d\n", ms->orc.orc_entry_data.sp_offset);
> +		fprintf(fp, "                       bp_offset: %d\n", ms->orc.orc_entry_data.bp_offset);
> +		fprintf(fp, "                          sp_reg: %d\n", ms->orc.orc_entry_data.sp_reg);
> +		fprintf(fp, "                          bp_reg: %d\n", ms->orc.orc_entry_data.bp_reg);
> +		fprintf(fp, "                            type: %d\n", ms->orc.orc_entry_data.type);
>   		if (MEMBER_EXISTS("orc_entry", "end"))
> -			fprintf(fp, "                             end: %d\n", ms->orc.kernel_orc_entry.end);
> +			fprintf(fp, "                             end: %d\n", ms->orc.orc_entry_data.end);
>   		else
>   			fprintf(fp, "                             end: (n/a)\n");
> +		if (MEMBER_EXISTS("orc_entry", "signal"))
> +			fprintf(fp, "                          signal: %d\n", ms->orc.orc_entry_data.signal);
> +		else
> +			fprintf(fp, "                          signal: (n/a)\n");
>   	}
>   	fprintf(fp, "                      pto: %s",
>   		machdep->flags & PT_REGS_INIT ? "\n" : "(uninitialized)\n");
> @@ -6391,7 +6397,8 @@ x86_64_ORC_init(void)
>   	    !MEMBER_EXISTS("orc_entry", "sp_reg") ||
>   	    !MEMBER_EXISTS("orc_entry", "bp_reg") ||
>   	    !MEMBER_EXISTS("orc_entry", "type") ||
> -	    SIZE(orc_entry) != sizeof(kernel_orc_entry)) {
> +	    (SIZE(orc_entry) != sizeof(kernel_orc_entry) &&
> +	     SIZE(orc_entry) != sizeof(kernel_orc_entry_6_4))) {
>   		error(WARNING, "ORC unwinder: orc_entry structure has changed\n");
>   		return;
>   	}
> @@ -6439,6 +6446,10 @@ x86_64_ORC_init(void)
>   	MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
>   	MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");
>   
> +	if (MEMBER_EXISTS("orc_entry", "signal") && /* added at 6.3 */
> +	    !MEMBER_EXISTS("orc_entry", "end"))	    /* removed at 6.4 with type change */
> +		machdep->flags |= ORC_6_4;
> +
>   	machdep->flags |= ORC;
>   }
>   
> @@ -8521,7 +8532,7 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
>   	int reterror;
>   	int arg_exists;
>   	int exception;
> -	kernel_orc_entry *korc;
> +	orc_entry *korc;
>   
>   	if (!(bt->flags & BT_FRAMESIZE_DEBUG)) {
>   		if ((bt->flags & BT_FRAMESIZE_IGNORE_MASK) ||
> @@ -8613,6 +8624,8 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
>   				korc->sp_reg, korc->bp_reg, korc->type);
>   			if (MEMBER_EXISTS("orc_entry", "end"))
>   				fprintf(fp, " end: %d", korc->end);
> +			if (MEMBER_EXISTS("orc_entry", "signal"))
> +				fprintf(fp, " signal: %d", korc->signal);
>   			fprintf(fp, "\n");
>   		}
>   
> @@ -9117,7 +9130,43 @@ orc_ip(ulong ip)
>   	return (ip + ip_entry);
>   }
>   
> -static kernel_orc_entry *
> +static orc_entry *
> +orc_get_entry(struct ORC_data *orc)
> +{
> +	struct orc_entry *entry = &orc->orc_entry_data;
> +
> +	if (machdep->flags & ORC_6_4) {
> +		kernel_orc_entry_6_4 korc;
> +
> +		if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry_6_4),
> +				"kernel orc_entry", RETURN_ON_ERROR|QUIET))
> +			return NULL;
> +
> +		entry->sp_offset = korc.sp_offset;
> +		entry->bp_offset = korc.bp_offset;
> +		entry->sp_reg = korc.sp_reg;
> +		entry->bp_reg = korc.bp_reg;
> +		entry->type = korc.type;
> +		entry->signal = korc.signal;
> +	} else {
> +		kernel_orc_entry korc;
> +
> +		if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry),
> +				"kernel orc_entry", RETURN_ON_ERROR|QUIET))
> +			return NULL;
> +
> +		entry->sp_offset = korc.sp_offset;
> +		entry->bp_offset = korc.bp_offset;
> +		entry->sp_reg = korc.sp_reg;
> +		entry->bp_reg = korc.bp_reg;
> +		entry->type = korc.type;
> +		entry->end = korc.end;
> +	}
> +
> +	return entry;
> +}
> +
> +static orc_entry *
>   __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
>   {
>   	int index;
> @@ -9127,7 +9176,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
>   	int *ip_table = (int *)ip_table_ptr;
>   	struct ORC_data *orc = &machdep->machspec->orc;
>   	ulong vaddr;
> -	kernel_orc_entry *korc;
> +	orc_entry *korc;
>   
>   	if (CRASHDEBUG(2)) {
>   		int i, ip_entry;
> @@ -9171,11 +9220,11 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
>   
>   	orc->ip_entry = (ulong)found;
>   	orc->orc_entry = u_table_ptr + (index * SIZE(orc_entry));
> -	if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry,
> -	    sizeof(kernel_orc_entry), "kernel orc_entry", RETURN_ON_ERROR|QUIET))
> +
> +	if (!orc_get_entry(orc))
>   		return NULL;
>   
> -	korc = &orc->kernel_orc_entry;
> +	korc = &orc->orc_entry_data;
>   
>   	if (CRASHDEBUG(2)) {
>   		fprintf(fp, "  found: %lx  index: %d\n", (ulong)found, index);
> @@ -9184,6 +9233,8 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
>   			orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
>   		if (MEMBER_EXISTS("orc_entry", "end"))
>   			fprintf(fp, " end: %d", korc->end);
> +		if (MEMBER_EXISTS("orc_entry", "signal"))
> +			fprintf(fp, " signal: %d", korc->signal);
>   		fprintf(fp, "\n");
>   	}
>   
> @@ -9195,7 +9246,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
>   #define LOOKUP_START_IP         (unsigned long)kt->stext
>   #define LOOKUP_STOP_IP          (unsigned long)kt->etext
>   
> -static kernel_orc_entry *
> +static orc_entry *
>   orc_find(ulong ip)
>   {
>   	unsigned int idx, start, stop;
> @@ -9265,7 +9316,7 @@ orc_find(ulong ip)
>   		orc->__start_orc_unwind + (start * SIZE(orc_entry)), stop - start, ip);
>   }
>   
> -static kernel_orc_entry *
> +static orc_entry *
>   orc_module_find(ulong ip)
>   {
>   	struct load_module *lm;
> @@ -9312,7 +9363,7 @@ static void
>   orc_dump(ulong ip)
>   {
>   	struct ORC_data *orc = &machdep->machspec->orc;
> -	kernel_orc_entry *korc;
> +	orc_entry *korc;
>   	ulong vaddr, offset;
>   	struct syment *sp, *orig;
>   
> @@ -9335,18 +9386,23 @@ next_in_func:
>   		fprintf(fp, "%s+%ld -> ", sp->name, offset);
>   	else
>   		fprintf(fp, "(unresolved) -> ");
> -	if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry, sizeof(kernel_orc_entry),
> -	    "kernel orc_entry", RETURN_ON_ERROR))
> +
> +	if (!orc_get_entry(orc))
>   		error(FATAL, "cannot read orc_entry\n");
> -	korc = &orc->kernel_orc_entry;
> +	korc = &orc->orc_entry_data;
>   	fprintf(fp, "orc: %lx  spo: %d bpo: %d spr: %d bpr: %d type: %d",
>   			orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
>   	if (MEMBER_EXISTS("orc_entry", "end"))
>   		fprintf(fp, " end: %d", korc->end);
> +	if (MEMBER_EXISTS("orc_entry", "signal"))
> +		fprintf(fp, " signal: %d", korc->signal);
>   	fprintf(fp, "\n");
>   
>   	orc->ip_entry += sizeof(int);
> -	orc->orc_entry += sizeof(kernel_orc_entry);
> +	if (machdep->flags & ORC_6_4)
> +		orc->orc_entry += sizeof(kernel_orc_entry_6_4);
> +	else
> +		orc->orc_entry += sizeof(kernel_orc_entry);
>   	vaddr = ip_table_to_vaddr(orc->ip_entry);
>   	if ((sp = value_search(vaddr, &offset)))
>   		if (sp == orig)
--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux