On Tue, Oct 8, 2024 at 10:46 PM Tao Liu <ltao@xxxxxxxxxx> wrote: > > Hi Aditya, > > On Tue, Oct 8, 2024 at 10:18 PM Aditya Gupta <adityag@xxxxxxxxxxxxx> wrote: > > > > Hi Tao, > > > > > > On 04/10/24 10:40, Tao Lou wrote: > > > Hi Aditya & lianbo, > > > > > > On Tue, Sep 24, 2024 at 2:28 AM Aditya Gupta <adityag@xxxxxxxxxxxxx> wrote: > > >> Hi Tao, > > >> > > >> On 24/09/22 10:25PM, Aditya Gupta wrote: > > >>> Hi Tao, > > >>> > > >>> > > >>> On 18/09/24 05:12, Tao Liu wrote: > > >>>> A error stack trace of bt cmd observed: > > >>>> > > >>>> crash> bt 1 > > >>>> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > >>>> #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable) > > >>>> #1 [c000000003773770] __switch_to at c00000000001fa9c > > >>>> #2 [c0000000037737d0] __schedule at c00000000112e4ec > > >>>> #3 [c0000000037738b0] schedule at c00000000112ea80 > > >>>> ... > > >>>> > > >>>> The #0 stack trace is incorrect, the function address shouldn't exceed _end. > > >>>> The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from > > >>>> STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For > > >>>> CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will read a > > >>>> wrong value from stack when ABI_V2 enabled. > > >>>> > > >>>> To determine if ABI_V2 enabled is tricky. This patch do it by check the > > >>>> following: > > >>>> > > >>>> In arch/powerpc/include/asm/ppc_asm.h: > > >>>> #ifdef CONFIG_PPC64_ELF_ABI_V2 > > >>>> #define STK_GOT 24 > > >>>> #else > > >>>> #define STK_GOT 40 > > >>>> > > >>>> In arch/powerpc/kernel/tm.S: > > >>>> _GLOBAL(tm_reclaim) > > >>>> mfcr r5 > > >>>> mflr r0 > > >>>> stw r5, 8(r1) > > >>>> std r0, 16(r1) > > >>>> std r2, STK_GOT(r1) > > >>>> ... > > >>>> > > >>>> So a disassemble on tm_reclaim, and extract the STK_GOT value from std > > >>>> instruction is used as the approach. > > >>>> > > >>>> After the patch: > > >>>> crash> bt 1 > > >>>> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > >>>> #0 [c0000000037737d0] __schedule at c00000000112e4ec > > >>>> #1 [c0000000037738b0] schedule at c00000000112ea80 > > >>>> ... > > >>>> > > >>>> Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> > > >>>> --- > > >>>> defs.h | 1 + > > >>>> ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > > >>>> 2 files changed, 58 insertions(+), 3 deletions(-) > > >>>> > > >>>> diff --git a/defs.h b/defs.h > > >>>> index 2231cb6..d5cb8cc 100644 > > >>>> --- a/defs.h > > >>>> +++ b/defs.h > > >>>> @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t { > > >>>> #define MSR_PR_LG 14 /* Problem State / Privilege Level */ > > >>>> /* Used to find the user or kernel-mode frame*/ > > >>>> +#define STACK_SWITCH_FRAME_REGS 48 > > >>>> #define STACK_FRAME_OVERHEAD 112 > > >>>> #define EXCP_FRAME_MARKER 0x7265677368657265 > > >>>> diff --git a/ppc64.c b/ppc64.c > > >>>> index e8930a1..6e5f155 100644 > > >>>> --- a/ppc64.c > > >>>> +++ b/ppc64.c > > >>>> @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud); > > >>>> static ulong pmd_page_vaddr_l4(ulong pmd); > > >>>> static int is_opal_context(ulong sp, ulong nip); > > >>>> void opalmsg(void); > > >>>> +static bool is_ppc64_elf_abi_v2(void); > > >>>> static int is_opal_context(ulong sp, ulong nip) > > >>>> { > > >>>> @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task) > > >>>> return sp; > > >>>> } > > >>>> +static bool > > >>>> +is_ppc64_elf_abi_v2(void) > > >>>> +{ > > >>> > > >>> Thanks for fixing these issues on ppc64 ! > > >>> > > >>> > > >>> It is good. I am also checking if there is a way to implement > > >>> 'is_ppc64_elf_abi_v2' by getting ABI version from the ELF notes, as 'file' > > >>> command somehow does know: > > >>> > > >>> $ file vmlinux-little-endian vmlinux-little-endian: ELF 64-bit LSB > > >>> executable, 64-bit PowerPC or cisco 7500, OpenPOWER ELF V2 ABI, version 1 > > >>> (SYSV), statically linked, > > >>> BuildID[sha1]=f6449970e6af6cddeed1e260393e2377ccc0f369, not stripped > > >>> > > >>> That might be more dependable (if we can do that). > > >> How about this approach ? > > >> > > >> I feel this might be more dependable, as the file command also uses > > >> this, as well as fadump (PowerPC firmware-assisted dump) in the kernel. > > >> > > >>> As a sidenote, there is still some issue either in patch #1 or #3 > > >>> causing all secondary CPUs (where swapper task is running) to show all > > >>> threads as '_end' > > >> What do you think ? > > > I think your approach will work, thanks for your code! Please see my > > > comments inlined in the code: > > > > > >> Either way what you chose, I do realise the idea to utilise the > > >> 'std r2, STK_GOT(r1)' in such a way, was interesting and smart. > > >> > > >> static bool > > >> is_ppc64_elf_abi_v2(void) > > >> { > > >> const char *kernel_file = pc->namelist; > > >> int fd, swap; > > >> char eheader[BUFSIZE]; > > >> Elf64_Ehdr *elf64; > > >> > > >> if ((fd = open(kernel_file, O_RDONLY)) < 0) { > > >> error(INFO, "%s: %s\n", kernel_file, strerror(errno)); > > >> return FALSE; > > >> } > > >> if (read(fd, eheader, BUFSIZE) != BUFSIZE) { > > >> error(INFO, "%s: %s\n", kernel_file, strerror(errno)); > > >> close(fd); > > >> return FALSE; > > >> } > > >> close(fd); > > >> > > >> if (!STRNEQ(eheader, ELFMAG) || eheader[EI_VERSION] != EV_CURRENT) > > >> return FALSE; > > >> > > >> elf64 = (Elf64_Ehdr *)&eheader[0]; > > >> > > >> swap = (((eheader[EI_DATA] == ELFDATA2LSB) && > > >> (__BYTE_ORDER == __BIG_ENDIAN)) || > > >> ((eheader[EI_DATA] == ELFDATA2MSB) && > > >> (__BYTE_ORDER == __LITTLE_ENDIAN))); > > > The above code seems duplicated with the one in is_kernel(), in fact, > > > is_kernel() will be invoked at the very beginning to determine if one > > > file is vmlinux, so I would suggest to add our abi_v2 check here, > > > like: > > > > > > in ppc64.c: > > > int ppc64_elf_abi = 0; > > > > > > static bool is_ppc64_elf_abi_v2(void) > > > { > > > if (ppc64_elf_abi == 1) > > > ... > > > else if (ppc64_elf_abi == 2) > > > ... > > > else > > > ... > > > } > > > > > > in symbols.c:is_kernel(): > > > ... > > > } else if ((elf64->e_ident[EI_CLASS] == ELFCLASS64) && > > > ((swap16(elf64->e_type, swap) == ET_EXEC) || > > > ... > > > case EM_PPC64: > > > if (machine_type_mismatch(file, "PPC64", NULL, 0)) > > > goto bailout; > > > + else > > > + ppc64_elf_abi = swap32(elf64->e_flags, swap); > > > break; > > > > > > So we can initial the abi version at crash start only once, and get > > > rid of the code duplication. > > > > Yes, I actually copied pasted the initial part from is_kernel. > > > > The approach is good, just to clarify, we know that is_kernel will be > > called everytime before this check right ? > > Actually I used another approach rather than is_kernel(), see [1] for > v2 patchset of ppc64 fixing. > The reason is keeping a ppc64 specific variable in the multi-cpu arch > used function is_kernel(), it is hard to make the code as clean. > Hi Aditya, There is one more question about the abi_v1/2. Say if I have a abi_v2 machine, but I want to run a abi_v1 elf executable file. I don't know 1) is it possible to run v1 elf on a v2 kernel machine; 2) if it can, which STACK_FRAME_OVERHEAD will the kernel use, 48 or 112? If STACK_FRAME_OVERHEAD will vary depending on abi_v1/2 of elf executable file, then our approach needs to change. Currently I have failed to build a abi_v1 elf executable file on the abi_v2 machine, which reports: $ gcc test.c -mabi=elfv1 fatal error: gnu/stubs-64-v1.h: No such file or directory But I don't know where to install the missing header or rpms, or if it is doable. Any ideas? Thanks, Tao Liu > [1]: https://www.mail-archive.com/devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx/msg01170.html > > Thanks, > Tao Liu > > > > > > > Thanks, > > Aditya Gupta > > > > > > > >> /* > > >> * The ABI version is stored in E_FLAGS in the ELF header, atleast for > > >> * ppc64 binaries. > > >> * > > >> * These logic is also used by the 'file' command, which states this in > > >> * its magic file: > > >> * > > >> * >18 leshort 20 PowerPC or cisco 4500, > > >> * >18 leshort 21 64-bit PowerPC or cisco 7500, > > >> * >>48 lelong 0 Unspecified or Power ELF V1 ABI, > > >> * >>48 lelong 1 Power ELF V1 ABI, > > >> * >>48 lelong 2 OpenPOWER ELF V2 ABI, > > >> * > > >> * The '>18' above means, data at 18 (0x12), which is basically > > >> * 'elf64->e_machine', which will be 20 for PPC, and 21 for PPC64 > > >> * > > >> * Similarly, '>>48' is offset 48 (0x30) in the file, which is > > >> * basically 'elf64->e_flags', which has the ELF ABI version > > >> */ > > >> if (elf64->e_flags == 2) { > > >> /* ABI v2 */ > > >> return TRUE; > > >> } else if (elf64->e_flags == 1) { > > >> /* ABI v1 */ > > >> return FALSE; > > >> } > > >> > > >> error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > > >> return FALSE; > > >> } > > >> > > >> Also, we can those values in the ELF header: > > >> > > >> $ hexdump vmlinux-little-endian > > >> 0000000 457f 464c 0102 0001 0000 0000 0000 0000 > > >> 0000010 0002 0015 (= 21 = E_MACHINE) 0001 0000 0000 0000 0000 c000 > > >> 0000020 0040 0000 0000 0000 8bf8 0040 0000 0000 > > >> 0000030 0002 (= E_FLAGS = ABI) 0000 0040 0038 0002 0040 0027 0026 > > >> 0000040 0001 0000 0007 0000 0000 0001 0000 0000 > > >> > > > I agree with the elf header check on the vmlinux file, it is much > > > simpler. Also I compiled the kernel with CONFIG_PPC64_ELF_ABI_V1 on, > > > the file cmd will give: Power ELF V1 ABI string. So the approach can > > > work. > > > > > >> Also, I see 'pc->namelist' is basically the kernel filename, is that > > >> correct ? Can that be renamed to kernel_filename or something ? > > >> > > > Yes, by reading through the code, pc->namelist is vmlinux, and > > > pc->namelist_debug is the debug symbol if vmlinux doesn't have the > > > symbol. But I didn't see the need for the rename. > > > > > > Thanks, > > > Tao Liu > > > > > >> Thanks, > > >> Aditya Gupta > > >> > > >>> Thanks, > > >>> > > >>> Aditya Gupta > > >>> > > >>>> + char buf1[BUFSIZE]; > > >>>> + char *pos1, *pos2; > > >>>> + int errflag = 0; > > >>>> + ulong stk_got = 0; > > >>>> + static bool ret = false; > > >>>> + static bool checked = false; > > >>>> + > > >>>> + if (checked == true || !symbol_exists("tm_reclaim")) > > >>>> + return ret; > > >>>> + > > >>>> + sprintf(buf1, "x/16i tm_reclaim"); > > >>>> + open_tmpfile(); > > >>>> + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) > > >>>> + goto out; > > >>>> + checked = true; > > >>>> + rewind(pc->tmpfile); > > >>>> + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > > >>>> + // "std r2, STK_GOT(r1)" is expected > > >>>> + if (strstr(buf1, "std") && > > >>>> + strstr(buf1, "(r1)") && > > >>>> + (pos1 = strstr(buf1, "r2,"))) { > > >>>> + pos1 += strlen("r2,"); > > >>>> + for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; pos2++); > > >>>> + *pos2 = '\0'; > > >>>> + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag); > > >>>> + break; > > >>>> + } > > >>>> + } > > >>>> + > > >>>> + if (!errflag) { > > >>>> + switch (stk_got) { > > >>>> + case 24: > > >>>> + ret = true; > > >>>> + case 40: > > >>>> + goto out; > > >>>> + } > > >>>> + } > > >>>> + error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > > >>>> +out: > > >>>> + close_tmpfile(); > > >>>> + return ret; > > >>>> +} > > >>>> /* > > >>>> * get the SP and PC values for idle tasks. > > >>>> @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp) > > >>>> sp = ppc64_get_sp(task); > > >>>> if (!INSTACK(sp, bt)) > > >>>> goto out; > > >>>> - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > >>>> - sizeof(struct ppc64_pt_regs), > > >>>> - "PPC64 pt_regs", FAULT_ON_ERROR); > > >>>> + > > >>>> + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) { > > >>>> + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s, > > >>>> + sizeof(struct ppc64_pt_regs), > > >>>> + "PPC64 pt_regs", FAULT_ON_ERROR); > > >>>> + } else { > > >>>> + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > >>>> + sizeof(struct ppc64_pt_regs), > > >>>> + "PPC64 pt_regs", FAULT_ON_ERROR); > > >>>> + } > > >>>> + > > >>>> ip = regs.nip; > > >>>> closest = closest_symbol(ip); > > >>>> if (STREQ(closest, ".__switch_to") || STREQ(closest, "__switch_to")) { > > -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki