Hi Aditya & lianbo, On Tue, Sep 24, 2024 at 2:28 AM Aditya Gupta <adityag@xxxxxxxxxxxxx> wrote: > > Hi Tao, > > On 24/09/22 10:25PM, Aditya Gupta wrote: > > Hi Tao, > > > > > > On 18/09/24 05:12, Tao Liu wrote: > > > A error stack trace of bt cmd observed: > > > > > > crash> bt 1 > > > PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > > #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable) > > > #1 [c000000003773770] __switch_to at c00000000001fa9c > > > #2 [c0000000037737d0] __schedule at c00000000112e4ec > > > #3 [c0000000037738b0] schedule at c00000000112ea80 > > > ... > > > > > > The #0 stack trace is incorrect, the function address shouldn't exceed _end. > > > The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from > > > STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For > > > CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read a > > > wrong value from stack when ABI_V2 enabled. > > > > > > To determine if ABI_V2 enabled is tricky. This patch do it by check the > > > following: > > > > > > In arch/powerpc/include/asm/ppc_asm.h: > > > #ifdef CONFIG_PPC64_ELF_ABI_V2 > > > #define STK_GOT 24 > > > #else > > > #define STK_GOT 40 > > > > > > In arch/powerpc/kernel/tm.S: > > > _GLOBAL(tm_reclaim) > > > mfcr r5 > > > mflr r0 > > > stw r5, 8(r1) > > > std r0, 16(r1) > > > std r2, STK_GOT(r1) > > > ... > > > > > > So a disassemble on tm_reclaim, and extract the STK_GOT value from std > > > instruction is used as the approach. > > > > > > After the patch: > > > crash> bt 1 > > > PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > > #0 [c0000000037737d0] __schedule at c00000000112e4ec > > > #1 [c0000000037738b0] schedule at c00000000112ea80 > > > ... > > > > > > Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> > > > --- > > > defs.h | 1 + > > > ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > > > 2 files changed, 58 insertions(+), 3 deletions(-) > > > > > > diff --git a/defs.h b/defs.h > > > index 2231cb6..d5cb8cc 100644 > > > --- a/defs.h > > > +++ b/defs.h > > > @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t { > > > #define MSR_PR_LG 14 /* Problem State / Privilege Level */ > > > /* Used to find the user or kernel-mode frame*/ > > > +#define STACK_SWITCH_FRAME_REGS 48 > > > #define STACK_FRAME_OVERHEAD 112 > > > #define EXCP_FRAME_MARKER 0x7265677368657265 > > > diff --git a/ppc64.c b/ppc64.c > > > index e8930a1..6e5f155 100644 > > > --- a/ppc64.c > > > +++ b/ppc64.c > > > @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud); > > > static ulong pmd_page_vaddr_l4(ulong pmd); > > > static int is_opal_context(ulong sp, ulong nip); > > > void opalmsg(void); > > > +static bool is_ppc64_elf_abi_v2(void); > > > static int is_opal_context(ulong sp, ulong nip) > > > { > > > @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task) > > > return sp; > > > } > > > +static bool > > > +is_ppc64_elf_abi_v2(void) > > > +{ > > > > > > Thanks for fixing these issues on ppc64 ! > > > > > > It is good. I am also checking if there is a way to implement > > 'is_ppc64_elf_abi_v2' by getting ABI version from the ELF notes, as 'file' > > command somehow does know: > > > > $ file vmlinux-little-endian vmlinux-little-endian: ELF 64-bit LSB > > executable, 64-bit PowerPC or cisco 7500, OpenPOWER ELF V2 ABI, version 1 > > (SYSV), statically linked, > > BuildID[sha1]=f6449970e6af6cddeed1e260393e2377ccc0f369, not stripped > > > > That might be more dependable (if we can do that). > > How about this approach ? > > I feel this might be more dependable, as the file command also uses > this, as well as fadump (PowerPC firmware-assisted dump) in the kernel. > > > As a sidenote, there is still some issue either in patch #1 or #3 > > causing all secondary CPUs (where swapper task is running) to show all > > threads as '_end' > > What do you think ? I think your approach will work, thanks for your code! Please see my comments inlined in the code: > > Either way what you chose, I do realise the idea to utilise the > 'std r2, STK_GOT(r1)' in such a way, was interesting and smart. > > static bool > is_ppc64_elf_abi_v2(void) > { > const char *kernel_file = pc->namelist; > int fd, swap; > char eheader[BUFSIZE]; > Elf64_Ehdr *elf64; > > if ((fd = open(kernel_file, O_RDONLY)) < 0) { > error(INFO, "%s: %s\n", kernel_file, strerror(errno)); > return FALSE; > } > if (read(fd, eheader, BUFSIZE) != BUFSIZE) { > error(INFO, "%s: %s\n", kernel_file, strerror(errno)); > close(fd); > return FALSE; > } > close(fd); > > if (!STRNEQ(eheader, ELFMAG) || eheader[EI_VERSION] != EV_CURRENT) > return FALSE; > > elf64 = (Elf64_Ehdr *)&eheader[0]; > > swap = (((eheader[EI_DATA] == ELFDATA2LSB) && > (__BYTE_ORDER == __BIG_ENDIAN)) || > ((eheader[EI_DATA] == ELFDATA2MSB) && > (__BYTE_ORDER == __LITTLE_ENDIAN))); The above code seems duplicated with the one in is_kernel(), in fact, is_kernel() will be invoked at the very beginning to determine if one file is vmlinux, so I would suggest to add our abi_v2 check here, like: in ppc64.c: int ppc64_elf_abi = 0; static bool is_ppc64_elf_abi_v2(void) { if (ppc64_elf_abi == 1) ... else if (ppc64_elf_abi == 2) ... else ... } in symbols.c:is_kernel(): ... } else if ((elf64->e_ident[EI_CLASS] == ELFCLASS64) && ((swap16(elf64->e_type, swap) == ET_EXEC) || ... case EM_PPC64: if (machine_type_mismatch(file, "PPC64", NULL, 0)) goto bailout; + else + ppc64_elf_abi = swap32(elf64->e_flags, swap); break; So we can initial the abi version at crash start only once, and get rid of the code duplication. > > /* > * The ABI version is stored in E_FLAGS in the ELF header, atleast for > * ppc64 binaries. > * > * These logic is also used by the 'file' command, which states this in > * its magic file: > * > * >18 leshort 20 PowerPC or cisco 4500, > * >18 leshort 21 64-bit PowerPC or cisco 7500, > * >>48 lelong 0 Unspecified or Power ELF V1 ABI, > * >>48 lelong 1 Power ELF V1 ABI, > * >>48 lelong 2 OpenPOWER ELF V2 ABI, > * > * The '>18' above means, data at 18 (0x12), which is basically > * 'elf64->e_machine', which will be 20 for PPC, and 21 for PPC64 > * > * Similarly, '>>48' is offset 48 (0x30) in the file, which is > * basically 'elf64->e_flags', which has the ELF ABI version > */ > if (elf64->e_flags == 2) { > /* ABI v2 */ > return TRUE; > } else if (elf64->e_flags == 1) { > /* ABI v1 */ > return FALSE; > } > > error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > return FALSE; > } > > Also, we can those values in the ELF header: > > $ hexdump vmlinux-little-endian > 0000000 457f 464c 0102 0001 0000 0000 0000 0000 > 0000010 0002 0015 (= 21 = E_MACHINE) 0001 0000 0000 0000 0000 c000 > 0000020 0040 0000 0000 0000 8bf8 0040 0000 0000 > 0000030 0002 (= E_FLAGS = ABI) 0000 0040 0038 0002 0040 0027 0026 > 0000040 0001 0000 0007 0000 0000 0001 0000 0000 > I agree with the elf header check on the vmlinux file, it is much simpler. Also I compiled the kernel with CONFIG_PPC64_ELF_ABI_V1 on, the file cmd will give: Power ELF V1 ABI string. So the approach can work. > Also, I see 'pc->namelist' is basically the kernel filename, is that > correct ? Can that be renamed to kernel_filename or something ? > Yes, by reading through the code, pc->namelist is vmlinux, and pc->namelist_debug is the debug symbol if vmlinux doesn't have the symbol. But I didn't see the need for the rename. Thanks, Tao Liu > Thanks, > Aditya Gupta > > > > > Thanks, > > > > Aditya Gupta > > > > > + char buf1[BUFSIZE]; > > > + char *pos1, *pos2; > > > + int errflag = 0; > > > + ulong stk_got = 0; > > > + static bool ret = false; > > > + static bool checked = false; > > > + > > > + if (checked == true || !symbol_exists("tm_reclaim")) > > > + return ret; > > > + > > > + sprintf(buf1, "x/16i tm_reclaim"); > > > + open_tmpfile(); > > > + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) > > > + goto out; > > > + checked = true; > > > + rewind(pc->tmpfile); > > > + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > > > + // "std r2, STK_GOT(r1)" is expected > > > + if (strstr(buf1, "std") && > > > + strstr(buf1, "(r1)") && > > > + (pos1 = strstr(buf1, "r2,"))) { > > > + pos1 += strlen("r2,"); > > > + for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; pos2++); > > > + *pos2 = '\0'; > > > + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); > > > + break; > > > + } > > > + } > > > + > > > + if (!errflag) { > > > + switch (stk_got) { > > > + case 24: > > > + ret = true; > > > + case 40: > > > + goto out; > > > + } > > > + } > > > + error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > > > +out: > > > + close_tmpfile(); > > > + return ret; > > > +} > > > /* > > > * get the SP and PC values for idle tasks. > > > @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp) > > > sp = ppc64_get_sp(task); > > > if (!INSTACK(sp, bt)) > > > goto out; > > > - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > > - sizeof(struct ppc64_pt_regs), > > > - "PPC64 pt_regs", FAULT_ON_ERROR); > > > + > > > + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) { > > > + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s, > > > + sizeof(struct ppc64_pt_regs), > > > + "PPC64 pt_regs", FAULT_ON_ERROR); > > > + } else { > > > + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > > + sizeof(struct ppc64_pt_regs), > > > + "PPC64 pt_regs", FAULT_ON_ERROR); > > > + } > > > + > > > ip = regs.nip; > > > closest = closest_symbol(ip); > > > if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to")) { > -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki