Hi Tao, On 24/09/22 10:25PM, Aditya Gupta wrote: > Hi Tao, > > > On 18/09/24 05:12, Tao Liu wrote: > > A error stack trace of bt cmd observed: > > > > crash> bt 1 > > PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable) > > #1 [c000000003773770] __switch_to at c00000000001fa9c > > #2 [c0000000037737d0] __schedule at c00000000112e4ec > > #3 [c0000000037738b0] schedule at c00000000112ea80 > > ... > > > > The #0 stack trace is incorrect, the function address shouldn't exceed _end. > > The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from > > STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For > > CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read a > > wrong value from stack when ABI_V2 enabled. > > > > To determine if ABI_V2 enabled is tricky. This patch do it by check the > > following: > > > > In arch/powerpc/include/asm/ppc_asm.h: > > #ifdef CONFIG_PPC64_ELF_ABI_V2 > > #define STK_GOT 24 > > #else > > #define STK_GOT 40 > > > > In arch/powerpc/kernel/tm.S: > > _GLOBAL(tm_reclaim) > > mfcr r5 > > mflr r0 > > stw r5, 8(r1) > > std r0, 16(r1) > > std r2, STK_GOT(r1) > > ... > > > > So a disassemble on tm_reclaim, and extract the STK_GOT value from std > > instruction is used as the approach. > > > > After the patch: > > crash> bt 1 > > PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > #0 [c0000000037737d0] __schedule at c00000000112e4ec > > #1 [c0000000037738b0] schedule at c00000000112ea80 > > ... > > > > Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> > > --- > > defs.h | 1 + > > ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > > 2 files changed, 58 insertions(+), 3 deletions(-) > > > > diff --git a/defs.h b/defs.h > > index 2231cb6..d5cb8cc 100644 > > --- a/defs.h > > +++ b/defs.h > > @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t { > > #define MSR_PR_LG 14 /* Problem State / Privilege Level */ > > /* Used to find the user or kernel-mode frame*/ > > +#define STACK_SWITCH_FRAME_REGS 48 > > #define STACK_FRAME_OVERHEAD 112 > > #define EXCP_FRAME_MARKER 0x7265677368657265 > > diff --git a/ppc64.c b/ppc64.c > > index e8930a1..6e5f155 100644 > > --- a/ppc64.c > > +++ b/ppc64.c > > @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud); > > static ulong pmd_page_vaddr_l4(ulong pmd); > > static int is_opal_context(ulong sp, ulong nip); > > void opalmsg(void); > > +static bool is_ppc64_elf_abi_v2(void); > > static int is_opal_context(ulong sp, ulong nip) > > { > > @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task) > > return sp; > > } > > +static bool > > +is_ppc64_elf_abi_v2(void) > > +{ > > > Thanks for fixing these issues on ppc64 ! > > > It is good. I am also checking if there is a way to implement > 'is_ppc64_elf_abi_v2' by getting ABI version from the ELF notes, as 'file' > command somehow does know: > > $ file vmlinux-little-endian vmlinux-little-endian: ELF 64-bit LSB > executable, 64-bit PowerPC or cisco 7500, OpenPOWER ELF V2 ABI, version 1 > (SYSV), statically linked, > BuildID[sha1]=f6449970e6af6cddeed1e260393e2377ccc0f369, not stripped > > That might be more dependable (if we can do that). How about this approach ? I feel this might be more dependable, as the file command also uses this, as well as fadump (PowerPC firmware-assisted dump) in the kernel. > As a sidenote, there is still some issue either in patch #1 or #3 > causing all secondary CPUs (where swapper task is running) to show all > threads as '_end' What do you think ? Either way what you chose, I do realise the idea to utilise the 'std r2, STK_GOT(r1)' in such a way, was interesting and smart. static bool is_ppc64_elf_abi_v2(void) { const char *kernel_file = pc->namelist; int fd, swap; char eheader[BUFSIZE]; Elf64_Ehdr *elf64; if ((fd = open(kernel_file, O_RDONLY)) < 0) { error(INFO, "%s: %s\n", kernel_file, strerror(errno)); return FALSE; } if (read(fd, eheader, BUFSIZE) != BUFSIZE) { error(INFO, "%s: %s\n", kernel_file, strerror(errno)); close(fd); return FALSE; } close(fd); if (!STRNEQ(eheader, ELFMAG) || eheader[EI_VERSION] != EV_CURRENT) return FALSE; elf64 = (Elf64_Ehdr *)&eheader[0]; swap = (((eheader[EI_DATA] == ELFDATA2LSB) && (__BYTE_ORDER == __BIG_ENDIAN)) || ((eheader[EI_DATA] == ELFDATA2MSB) && (__BYTE_ORDER == __LITTLE_ENDIAN))); /* * The ABI version is stored in E_FLAGS in the ELF header, atleast for * ppc64 binaries. * * These logic is also used by the 'file' command, which states this in * its magic file: * * >18 leshort 20 PowerPC or cisco 4500, * >18 leshort 21 64-bit PowerPC or cisco 7500, * >>48 lelong 0 Unspecified or Power ELF V1 ABI, * >>48 lelong 1 Power ELF V1 ABI, * >>48 lelong 2 OpenPOWER ELF V2 ABI, * * The '>18' above means, data at 18 (0x12), which is basically * 'elf64->e_machine', which will be 20 for PPC, and 21 for PPC64 * * Similarly, '>>48' is offset 48 (0x30) in the file, which is * basically 'elf64->e_flags', which has the ELF ABI version */ if (elf64->e_flags == 2) { /* ABI v2 */ return TRUE; } else if (elf64->e_flags == 1) { /* ABI v1 */ return FALSE; } error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); return FALSE; } Also, we can those values in the ELF header: $ hexdump vmlinux-little-endian 0000000 457f 464c 0102 0001 0000 0000 0000 0000 0000010 0002 0015 (= 21 = E_MACHINE) 0001 0000 0000 0000 0000 c000 0000020 0040 0000 0000 0000 8bf8 0040 0000 0000 0000030 0002 (= E_FLAGS = ABI) 0000 0040 0038 0002 0040 0027 0026 0000040 0001 0000 0007 0000 0000 0001 0000 0000 Also, I see 'pc->namelist' is basically the kernel filename, is that correct ? Can that be renamed to kernel_filename or something ? Thanks, Aditya Gupta > > Thanks, > > Aditya Gupta > > > + char buf1[BUFSIZE]; > > + char *pos1, *pos2; > > + int errflag = 0; > > + ulong stk_got = 0; > > + static bool ret = false; > > + static bool checked = false; > > + > > + if (checked == true || !symbol_exists("tm_reclaim")) > > + return ret; > > + > > + sprintf(buf1, "x/16i tm_reclaim"); > > + open_tmpfile(); > > + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) > > + goto out; > > + checked = true; > > + rewind(pc->tmpfile); > > + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > > + // "std r2, STK_GOT(r1)" is expected > > + if (strstr(buf1, "std") && > > + strstr(buf1, "(r1)") && > > + (pos1 = strstr(buf1, "r2,"))) { > > + pos1 += strlen("r2,"); > > + for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; pos2++); > > + *pos2 = '\0'; > > + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); > > + break; > > + } > > + } > > + > > + if (!errflag) { > > + switch (stk_got) { > > + case 24: > > + ret = true; > > + case 40: > > + goto out; > > + } > > + } > > + error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > > +out: > > + close_tmpfile(); > > + return ret; > > +} > > /* > > * get the SP and PC values for idle tasks. > > @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp) > > sp = ppc64_get_sp(task); > > if (!INSTACK(sp, bt)) > > goto out; > > - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > - sizeof(struct ppc64_pt_regs), > > - "PPC64 pt_regs", FAULT_ON_ERROR); > > + > > + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) { > > + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s, > > + sizeof(struct ppc64_pt_regs), > > + "PPC64 pt_regs", FAULT_ON_ERROR); > > + } else { > > + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > + sizeof(struct ppc64_pt_regs), > > + "PPC64 pt_regs", FAULT_ON_ERROR); > > + } > > + > > ip = regs.nip; > > closest = closest_symbol(ip); > > if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to")) { -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki