Hi Aditya, On Tue, Oct 8, 2024 at 10:18 PM Aditya Gupta <adityag@xxxxxxxxxxxxx> wrote: > > Hi Tao, > > > On 04/10/24 10:40, Tao Lou wrote: > > Hi Aditya & lianbo, > > > > On Tue, Sep 24, 2024 at 2:28 AM Aditya Gupta <adityag@xxxxxxxxxxxxx> wrote: > >> Hi Tao, > >> > >> On 24/09/22 10:25PM, Aditya Gupta wrote: > >>> Hi Tao, > >>> > >>> > >>> On 18/09/24 05:12, Tao Liu wrote: > >>>> A error stack trace of bt cmd observed: > >>>> > >>>> crash> bt 1 > >>>> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > >>>> #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable) > >>>> #1 [c000000003773770] __switch_to at c00000000001fa9c > >>>> #2 [c0000000037737d0] __schedule at c00000000112e4ec > >>>> #3 [c0000000037738b0] schedule at c00000000112ea80 > >>>> ... > >>>> > >>>> The #0 stack trace is incorrect, the function address shouldn't exceed _end. > >>>> The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from > >>>> STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For > >>>> CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read a > >>>> wrong value from stack when ABI_V2 enabled. > >>>> > >>>> To determine if ABI_V2 enabled is tricky. This patch do it by check the > >>>> following: > >>>> > >>>> In arch/powerpc/include/asm/ppc_asm.h: > >>>> #ifdef CONFIG_PPC64_ELF_ABI_V2 > >>>> #define STK_GOT 24 > >>>> #else > >>>> #define STK_GOT 40 > >>>> > >>>> In arch/powerpc/kernel/tm.S: > >>>> _GLOBAL(tm_reclaim) > >>>> mfcr r5 > >>>> mflr r0 > >>>> stw r5, 8(r1) > >>>> std r0, 16(r1) > >>>> std r2, STK_GOT(r1) > >>>> ... > >>>> > >>>> So a disassemble on tm_reclaim, and extract the STK_GOT value from std > >>>> instruction is used as the approach. > >>>> > >>>> After the patch: > >>>> crash> bt 1 > >>>> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > >>>> #0 [c0000000037737d0] __schedule at c00000000112e4ec > >>>> #1 [c0000000037738b0] schedule at c00000000112ea80 > >>>> ... > >>>> > >>>> Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> > >>>> --- > >>>> defs.h | 1 + > >>>> ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > >>>> 2 files changed, 58 insertions(+), 3 deletions(-) > >>>> > >>>> diff --git a/defs.h b/defs.h > >>>> index 2231cb6..d5cb8cc 100644 > >>>> --- a/defs.h > >>>> +++ b/defs.h > >>>> @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t { > >>>> #define MSR_PR_LG 14 /* Problem State / Privilege Level */ > >>>> /* Used to find the user or kernel-mode frame*/ > >>>> +#define STACK_SWITCH_FRAME_REGS 48 > >>>> #define STACK_FRAME_OVERHEAD 112 > >>>> #define EXCP_FRAME_MARKER 0x7265677368657265 > >>>> diff --git a/ppc64.c b/ppc64.c > >>>> index e8930a1..6e5f155 100644 > >>>> --- a/ppc64.c > >>>> +++ b/ppc64.c > >>>> @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud); > >>>> static ulong pmd_page_vaddr_l4(ulong pmd); > >>>> static int is_opal_context(ulong sp, ulong nip); > >>>> void opalmsg(void); > >>>> +static bool is_ppc64_elf_abi_v2(void); > >>>> static int is_opal_context(ulong sp, ulong nip) > >>>> { > >>>> @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task) > >>>> return sp; > >>>> } > >>>> +static bool > >>>> +is_ppc64_elf_abi_v2(void) > >>>> +{ > >>> > >>> Thanks for fixing these issues on ppc64 ! > >>> > >>> > >>> It is good. I am also checking if there is a way to implement > >>> 'is_ppc64_elf_abi_v2' by getting ABI version from the ELF notes, as 'file' > >>> command somehow does know: > >>> > >>> $ file vmlinux-little-endian vmlinux-little-endian: ELF 64-bit LSB > >>> executable, 64-bit PowerPC or cisco 7500, OpenPOWER ELF V2 ABI, version 1 > >>> (SYSV), statically linked, > >>> BuildID[sha1]=f6449970e6af6cddeed1e260393e2377ccc0f369, not stripped > >>> > >>> That might be more dependable (if we can do that). > >> How about this approach ? > >> > >> I feel this might be more dependable, as the file command also uses > >> this, as well as fadump (PowerPC firmware-assisted dump) in the kernel. > >> > >>> As a sidenote, there is still some issue either in patch #1 or #3 > >>> causing all secondary CPUs (where swapper task is running) to show all > >>> threads as '_end' > >> What do you think ? > > I think your approach will work, thanks for your code! Please see my > > comments inlined in the code: > > > >> Either way what you chose, I do realise the idea to utilise the > >> 'std r2, STK_GOT(r1)' in such a way, was interesting and smart. > >> > >> static bool > >> is_ppc64_elf_abi_v2(void) > >> { > >> const char *kernel_file = pc->namelist; > >> int fd, swap; > >> char eheader[BUFSIZE]; > >> Elf64_Ehdr *elf64; > >> > >> if ((fd = open(kernel_file, O_RDONLY)) < 0) { > >> error(INFO, "%s: %s\n", kernel_file, strerror(errno)); > >> return FALSE; > >> } > >> if (read(fd, eheader, BUFSIZE) != BUFSIZE) { > >> error(INFO, "%s: %s\n", kernel_file, strerror(errno)); > >> close(fd); > >> return FALSE; > >> } > >> close(fd); > >> > >> if (!STRNEQ(eheader, ELFMAG) || eheader[EI_VERSION] != EV_CURRENT) > >> return FALSE; > >> > >> elf64 = (Elf64_Ehdr *)&eheader[0]; > >> > >> swap = (((eheader[EI_DATA] == ELFDATA2LSB) && > >> (__BYTE_ORDER == __BIG_ENDIAN)) || > >> ((eheader[EI_DATA] == ELFDATA2MSB) && > >> (__BYTE_ORDER == __LITTLE_ENDIAN))); > > The above code seems duplicated with the one in is_kernel(), in fact, > > is_kernel() will be invoked at the very beginning to determine if one > > file is vmlinux, so I would suggest to add our abi_v2 check here, > > like: > > > > in ppc64.c: > > int ppc64_elf_abi = 0; > > > > static bool is_ppc64_elf_abi_v2(void) > > { > > if (ppc64_elf_abi == 1) > > ... > > else if (ppc64_elf_abi == 2) > > ... > > else > > ... > > } > > > > in symbols.c:is_kernel(): > > ... > > } else if ((elf64->e_ident[EI_CLASS] == ELFCLASS64) && > > ((swap16(elf64->e_type, swap) == ET_EXEC) || > > ... > > case EM_PPC64: > > if (machine_type_mismatch(file, "PPC64", NULL, 0)) > > goto bailout; > > + else > > + ppc64_elf_abi = swap32(elf64->e_flags, swap); > > break; > > > > So we can initial the abi version at crash start only once, and get > > rid of the code duplication. > > Yes, I actually copied pasted the initial part from is_kernel. > > The approach is good, just to clarify, we know that is_kernel will be > called everytime before this check right ? Actually I used another approach rather than is_kernel(), see [1] for v2 patchset of ppc64 fixing. The reason is keeping a ppc64 specific variable in the multi-cpu arch used function is_kernel(), it is hard to make the code as clean. [1]: https://www.mail-archive.com/devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx/msg01170.html Thanks, Tao Liu > > > Thanks, > Aditya Gupta > > > > >> /* > >> * The ABI version is stored in E_FLAGS in the ELF header, atleast for > >> * ppc64 binaries. > >> * > >> * These logic is also used by the 'file' command, which states this in > >> * its magic file: > >> * > >> * >18 leshort 20 PowerPC or cisco 4500, > >> * >18 leshort 21 64-bit PowerPC or cisco 7500, > >> * >>48 lelong 0 Unspecified or Power ELF V1 ABI, > >> * >>48 lelong 1 Power ELF V1 ABI, > >> * >>48 lelong 2 OpenPOWER ELF V2 ABI, > >> * > >> * The '>18' above means, data at 18 (0x12), which is basically > >> * 'elf64->e_machine', which will be 20 for PPC, and 21 for PPC64 > >> * > >> * Similarly, '>>48' is offset 48 (0x30) in the file, which is > >> * basically 'elf64->e_flags', which has the ELF ABI version > >> */ > >> if (elf64->e_flags == 2) { > >> /* ABI v2 */ > >> return TRUE; > >> } else if (elf64->e_flags == 1) { > >> /* ABI v1 */ > >> return FALSE; > >> } > >> > >> error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > >> return FALSE; > >> } > >> > >> Also, we can those values in the ELF header: > >> > >> $ hexdump vmlinux-little-endian > >> 0000000 457f 464c 0102 0001 0000 0000 0000 0000 > >> 0000010 0002 0015 (= 21 = E_MACHINE) 0001 0000 0000 0000 0000 c000 > >> 0000020 0040 0000 0000 0000 8bf8 0040 0000 0000 > >> 0000030 0002 (= E_FLAGS = ABI) 0000 0040 0038 0002 0040 0027 0026 > >> 0000040 0001 0000 0007 0000 0000 0001 0000 0000 > >> > > I agree with the elf header check on the vmlinux file, it is much > > simpler. Also I compiled the kernel with CONFIG_PPC64_ELF_ABI_V1 on, > > the file cmd will give: Power ELF V1 ABI string. So the approach can > > work. > > > >> Also, I see 'pc->namelist' is basically the kernel filename, is that > >> correct ? Can that be renamed to kernel_filename or something ? > >> > > Yes, by reading through the code, pc->namelist is vmlinux, and > > pc->namelist_debug is the debug symbol if vmlinux doesn't have the > > symbol. But I didn't see the need for the rename. > > > > Thanks, > > Tao Liu > > > >> Thanks, > >> Aditya Gupta > >> > >>> Thanks, > >>> > >>> Aditya Gupta > >>> > >>>> + char buf1[BUFSIZE]; > >>>> + char *pos1, *pos2; > >>>> + int errflag = 0; > >>>> + ulong stk_got = 0; > >>>> + static bool ret = false; > >>>> + static bool checked = false; > >>>> + > >>>> + if (checked == true || !symbol_exists("tm_reclaim")) > >>>> + return ret; > >>>> + > >>>> + sprintf(buf1, "x/16i tm_reclaim"); > >>>> + open_tmpfile(); > >>>> + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) > >>>> + goto out; > >>>> + checked = true; > >>>> + rewind(pc->tmpfile); > >>>> + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > >>>> + // "std r2, STK_GOT(r1)" is expected > >>>> + if (strstr(buf1, "std") && > >>>> + strstr(buf1, "(r1)") && > >>>> + (pos1 = strstr(buf1, "r2,"))) { > >>>> + pos1 += strlen("r2,"); > >>>> + for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; pos2++); > >>>> + *pos2 = '\0'; > >>>> + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); > >>>> + break; > >>>> + } > >>>> + } > >>>> + > >>>> + if (!errflag) { > >>>> + switch (stk_got) { > >>>> + case 24: > >>>> + ret = true; > >>>> + case 40: > >>>> + goto out; > >>>> + } > >>>> + } > >>>> + error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > >>>> +out: > >>>> + close_tmpfile(); > >>>> + return ret; > >>>> +} > >>>> /* > >>>> * get the SP and PC values for idle tasks. > >>>> @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp) > >>>> sp = ppc64_get_sp(task); > >>>> if (!INSTACK(sp, bt)) > >>>> goto out; > >>>> - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > >>>> - sizeof(struct ppc64_pt_regs), > >>>> - "PPC64 pt_regs", FAULT_ON_ERROR); > >>>> + > >>>> + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) { > >>>> + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s, > >>>> + sizeof(struct ppc64_pt_regs), > >>>> + "PPC64 pt_regs", FAULT_ON_ERROR); > >>>> + } else { > >>>> + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > >>>> + sizeof(struct ppc64_pt_regs), > >>>> + "PPC64 pt_regs", FAULT_ON_ERROR); > >>>> + } > >>>> + > >>>> ip = regs.nip; > >>>> closest = closest_symbol(ip); > >>>> if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to")) { > -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki