[Crash-utility] Re: [PATCH v1 1/3] ppc64: Fix bt printing error stack trace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Tao,

On 24/09/22 10:25PM, Aditya Gupta wrote:
> Hi Tao,
> 
> 
> On 18/09/24 05:12, Tao Liu wrote:
> > A error stack trace of bt cmd observed:
> > 
> > crash> bt 1
> > PID: 1        TASK: c000000003714b80  CPU: 2    COMMAND: "systemd"
> >   #0 [c0000000037735c0] _end at c0000000037154b0  (unreliable)
> >   #1 [c000000003773770] __switch_to at c00000000001fa9c
> >   #2 [c0000000037737d0] __schedule at c00000000112e4ec
> >   #3 [c0000000037738b0] schedule at c00000000112ea80
> >   ...
> > 
> > The #0 stack trace is incorrect, the function address shouldn't exceed _end.
> > The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from
> > STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For
> > CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read a
> > wrong value from stack when ABI_V2 enabled.
> > 
> > To determine if ABI_V2 enabled is tricky. This patch do it by check the
> > following:
> > 
> > In arch/powerpc/include/asm/ppc_asm.h:
> >    #ifdef CONFIG_PPC64_ELF_ABI_V2
> >    #define STK_GOT		24
> >    #else
> >    #define STK_GOT		40
> > 
> > In arch/powerpc/kernel/tm.S:
> >    _GLOBAL(tm_reclaim)
> > 	mfcr	r5
> > 	mflr	r0
> > 	stw	r5, 8(r1)
> > 	std	r0, 16(r1)
> > 	std	r2, STK_GOT(r1)
> > 	...
> > 
> > So a disassemble on tm_reclaim, and extract the STK_GOT value from std
> > instruction is used as the approach.
> > 
> > After the patch:
> > crash> bt 1
> > PID: 1        TASK: c000000003714b80  CPU: 2    COMMAND: "systemd"
> >   #0 [c0000000037737d0] __schedule at c00000000112e4ec
> >   #1 [c0000000037738b0] schedule at c00000000112ea80
> >   ...
> > 
> > Signed-off-by: Tao Liu <ltao@xxxxxxxxxx>
> > ---
> >   defs.h  |  1 +
> >   ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> >   2 files changed, 58 insertions(+), 3 deletions(-)
> > 
> > diff --git a/defs.h b/defs.h
> > index 2231cb6..d5cb8cc 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t {
> >   #define MSR_PR_LG	14	/* Problem State / Privilege Level */
> >   				/* Used to find the user or kernel-mode frame*/
> > +#define STACK_SWITCH_FRAME_REGS         48
> >   #define STACK_FRAME_OVERHEAD            112
> >   #define EXCP_FRAME_MARKER               0x7265677368657265
> > diff --git a/ppc64.c b/ppc64.c
> > index e8930a1..6e5f155 100644
> > --- a/ppc64.c
> > +++ b/ppc64.c
> > @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud);
> >   static ulong pmd_page_vaddr_l4(ulong pmd);
> >   static int is_opal_context(ulong sp, ulong nip);
> >   void opalmsg(void);
> > +static bool is_ppc64_elf_abi_v2(void);
> >   static int is_opal_context(ulong sp, ulong nip)
> >   {
> > @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task)
> >   	return sp;
> >   }
> > +static bool
> > +is_ppc64_elf_abi_v2(void)
> > +{
> 
> 
> Thanks for fixing these issues on ppc64 !
> 
> 
> It is good. I am also checking if there is a way to implement
> 'is_ppc64_elf_abi_v2' by getting ABI version from the ELF notes, as 'file'
> command somehow does know:
> 
> $ file vmlinux-little-endian vmlinux-little-endian: ELF 64-bit LSB
> executable, 64-bit PowerPC or cisco 7500, OpenPOWER ELF V2 ABI, version 1
> (SYSV), statically linked,
> BuildID[sha1]=f6449970e6af6cddeed1e260393e2377ccc0f369, not stripped
> 
> That might be more dependable (if we can do that).

How about this approach ?

I feel this might be more dependable, as the file command also uses
this, as well as fadump (PowerPC firmware-assisted dump) in the kernel.

> As a sidenote, there is still some issue either in patch #1 or #3
> causing all secondary CPUs (where swapper task is running) to show all
> threads as '_end'

What do you think ?

Either way what you chose, I do realise the idea to utilise the
'std r2, STK_GOT(r1)' in such a way, was interesting and smart.

    static bool
    is_ppc64_elf_abi_v2(void)
    {
    	const char *kernel_file = pc->namelist;
    	int fd, swap;
    	char eheader[BUFSIZE];
    	Elf64_Ehdr *elf64;
    
    	if ((fd = open(kernel_file, O_RDONLY)) < 0) {
    		error(INFO, "%s: %s\n", kernel_file, strerror(errno));
    		return FALSE;
    	}
    	if (read(fd, eheader, BUFSIZE) != BUFSIZE) {
            error(INFO, "%s: %s\n", kernel_file, strerror(errno));
    		close(fd);
    		return FALSE;
    	}  
    	close(fd);
    
    	if (!STRNEQ(eheader, ELFMAG) || eheader[EI_VERSION] != EV_CURRENT)
    		return FALSE;
    
    	elf64 = (Elf64_Ehdr *)&eheader[0];
    
    	swap = (((eheader[EI_DATA] == ELFDATA2LSB) && 
    	     (__BYTE_ORDER == __BIG_ENDIAN)) ||
    	    ((eheader[EI_DATA] == ELFDATA2MSB) && 
    	     (__BYTE_ORDER == __LITTLE_ENDIAN)));
    
    	/*
    	 * The ABI version is stored in E_FLAGS in the ELF header, atleast for
    	 * ppc64 binaries.
    	 *
    	 * These logic is also used by the 'file' command, which states this in
    	 * its magic file:
    	 *
    	 *     >18	leshort		20		PowerPC or cisco 4500,
    	 *     >18	leshort		21		64-bit PowerPC or cisco 7500,
    	 *     >>48	lelong		0		Unspecified or Power ELF V1 ABI,
    	 *     >>48	lelong		1		Power ELF V1 ABI,
    	 *     >>48	lelong		2		OpenPOWER ELF V2 ABI,
    	 *
    	 * The '>18' above means, data at 18 (0x12), which is basically
    	 * 'elf64->e_machine', which will be 20 for PPC, and 21 for PPC64
    	 *
    	 * Similarly, '>>48' is offset 48 (0x30) in the file, which is
    	 * basically 'elf64->e_flags', which has the ELF ABI version
    	 */
    	if (elf64->e_flags == 2) {
    		/* ABI v2 */
    		return TRUE;
    	} else if (elf64->e_flags == 1) {
    		/* ABI v1 */
    		return FALSE;
    	}
    
    	error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
    	return FALSE;
    }

Also, we can those values in the ELF header:

    $ hexdump vmlinux-little-endian
    0000000 457f 464c 0102 0001 0000 0000 0000 0000
    0000010 0002 0015 (= 21 = E_MACHINE)  0001 0000 0000 0000 0000 c000
    0000020 0040 0000 0000 0000 8bf8 0040 0000 0000
    0000030 0002 (= E_FLAGS = ABI)   0000 0040 0038 0002 0040 0027 0026
    0000040 0001 0000 0007 0000 0000 0001 0000 0000

Also, I see 'pc->namelist' is basically the kernel filename, is that
correct ? Can that be renamed to kernel_filename or something ?

Thanks,
Aditya Gupta

> 
> Thanks,
> 
> Aditya Gupta
> 
> > +	char buf1[BUFSIZE];
> > +	char *pos1, *pos2;
> > +	int errflag = 0;
> > +	ulong stk_got = 0;
> > +	static bool ret = false;
> > +	static bool checked = false;
> > +
> > +	if (checked == true || !symbol_exists("tm_reclaim"))
> > +		return ret;
> > +
> > +	sprintf(buf1, "x/16i tm_reclaim");
> > +	open_tmpfile();
> > +	if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR))
> > +		goto out;
> > +	checked = true;
> > +	rewind(pc->tmpfile);
> > +	while (fgets(buf1, BUFSIZE, pc->tmpfile)) {
> > +		// "std	r2, STK_GOT(r1)" is expected
> > +		if (strstr(buf1, "std") &&
> > +		    strstr(buf1, "(r1)") &&
> > +		    (pos1 = strstr(buf1, "r2,"))) {
> > +			pos1 += strlen("r2,");
> > +			for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; pos2++);
> > +			*pos2 = '\0';
> > +			stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag);
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (!errflag) {
> > +		switch (stk_got) {
> > +		case 24:
> > +			ret = true;
> > +		case 40:
> > +			goto out;
> > +		}
> > +	}
> > +	error(WARNING, "Unstable elf_abi v1/v2 detection.\n");
> > +out:
> > +	close_tmpfile();
> > +	return ret;
> > +}
> >   /*
> >    *  get the SP and PC values for idle tasks.
> > @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp)
> >   	sp = ppc64_get_sp(task);
> >   	if (!INSTACK(sp, bt))
> >   		goto out;
> > -	readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, &regs,
> > -		sizeof(struct ppc64_pt_regs),
> > -		"PPC64 pt_regs", FAULT_ON_ERROR);
> > +
> > +	if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) {
> > +		readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, &regs,
> > +			sizeof(struct ppc64_pt_regs),
> > +			"PPC64 pt_regs", FAULT_ON_ERROR);
> > +	} else {
> > +		readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, &regs,
> > +			sizeof(struct ppc64_pt_regs),
> > +			"PPC64 pt_regs", FAULT_ON_ERROR);
> > +	}
> > +
> >   	ip = regs.nip;
> >   	closest = closest_symbol(ip);
> >   	if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to")) {
--
Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux