Add a new .note section containing type, size, offset and flags of every xfeature that is present. This information will be used by the debuggers to understand the XSAVE layout of the machine where the core file is dumped, and to read XSAVE registers, especially during cross-platform debugging. Some background: The XSAVE layouts of modern AMD and Intel CPUs differ, especially since Memory Protection Keys and the AVX-512 features have been inculcated into the AMD CPUs. This is since AMD never adopted (and hence never left room in the XSAVE layout for) the Intel MPX feature. Tools like GDB had assumed a fixed XSAVE layout matching that of Intel (based on the XCR0 mask). Hence, the core dumps from AMD CPUs didn't match the known size for the XCR0 mask. This resulted in GDB and other tools not being able to access the values of the AVX-512 and PKRU registers on AMD CPUs. To solve this, an interim solution has been accepted into GDB, and is already a part of GDB 14, thanks to these series of patches [ https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html ]. But this patch series depends on heuristics based on the total XSAVE register set size and the XCR0 mask to infer the layouts of the various register blocks for core dumps, and hence, is not a foolproof mechanism to determine the layout of the XSAVE area. Hence this new core dump note has been proposed as a more sturdy mechanism to allow GDB/LLDB and other relevant tools to determine the layout of the XSAVE area of the machine where the corefile was dumped. The new core dump note (which is being proposed as a per-process .note section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures. Each structure describes an individual extended feature containing offset, size and flags (that is obtained through CPUID instruction) in a format roughly matching the follow C structure: struct xfeat_component { u32 xfeat_type; u32 xfeat_sz; u32 xfeat_off; u32 xfeat_flags; }; Co-developed-by: Jini Susan George <jinisusan.george@xxxxxxx> Signed-off-by: Jini Susan George <jinisusan.george@xxxxxxx> Signed-off-by: Vignesh Balasubramanian <vigbalas@xxxxxxx> --- v1->v2: Removed kernel internal defn dependency, code improvements arch/x86/Kconfig | 1 + arch/x86/include/asm/elf.h | 34 +++++++++ arch/x86/kernel/fpu/xstate.c | 141 +++++++++++++++++++++++++++++++++++ fs/binfmt_elf.c | 4 +- include/uapi/linux/elf.h | 1 + 5 files changed, 179 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 928820e61cb5..cc67daab3396 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -105,6 +105,7 @@ config X86 select ARCH_HAS_DEBUG_WX select ARCH_HAS_ZONE_DMA_SET if EXPERT select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAVE_EXTRA_ELF_NOTES select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1fb83d47711f..5952574db64b 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -13,6 +13,40 @@ #include <asm/auxvec.h> #include <asm/fsgsbase.h> +struct xfeat_component { + u32 xfeat_type; + u32 xfeat_sz; + u32 xfeat_off; + u32 xfeat_flags; +} __packed; + +_Static_assert(sizeof(struct xfeat_component)%4 == 0, "xfeat_component is not aligned"); + +enum custom_feature { + FEATURE_XSAVE_FP = 0, + FEATURE_XSAVE_SSE = 1, + FEATURE_XSAVE_YMM = 2, + FEATURE_XSAVE_BNDREGS = 3, + FEATURE_XSAVE_BNDCSR = 4, + FEATURE_XSAVE_OPMASK = 5, + FEATURE_XSAVE_ZMM_Hi256 = 6, + FEATURE_XSAVE_Hi16_ZMM = 7, + FEATURE_XSAVE_PT = 8, + FEATURE_XSAVE_PKRU = 9, + FEATURE_XSAVE_PASID = 10, + FEATURE_XSAVE_CET_USER = 11, + FEATURE_XSAVE_CET_SHADOW_STACK = 12, + FEATURE_XSAVE_HDC = 13, + FEATURE_XSAVE_UINTR = 14, + FEATURE_XSAVE_LBR = 15, + FEATURE_XSAVE_HWP = 16, + FEATURE_XSAVE_XTILE_CFG = 17, + FEATURE_XSAVE_XTILE_DATA = 18, + FEATURE_MAX, + FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM, + FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA, +}; + typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 33a214b1a4ce..3d1c3c96e34d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -13,6 +13,7 @@ #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> +#include <linux/coredump.h> #include <asm/fpu/api.h> #include <asm/fpu/regset.h> @@ -87,6 +88,8 @@ static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; #define XSTATE_FLAG_SUPERVISOR BIT(0) #define XSTATE_FLAG_ALIGNED64 BIT(1) +static const char owner_name[] = "LINUX"; + /* * Return whether the system supports a given xfeature. * @@ -1837,3 +1840,141 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, return 0; } #endif /* CONFIG_PROC_PID_ARCH_STATUS */ + +#ifdef CONFIG_COREDUMP +static int get_sub_leaf(int custom_xfeat) +{ + switch (custom_xfeat) { + case FEATURE_XSAVE_YMM: return XFEATURE_YMM; + case FEATURE_XSAVE_BNDREGS: return XFEATURE_BNDREGS; + case FEATURE_XSAVE_BNDCSR: return XFEATURE_BNDCSR; + case FEATURE_XSAVE_OPMASK: return XFEATURE_OPMASK; + case FEATURE_XSAVE_ZMM_Hi256: return XFEATURE_ZMM_Hi256; + case FEATURE_XSAVE_Hi16_ZMM: return XFEATURE_Hi16_ZMM; + case FEATURE_XSAVE_PT: return XFEATURE_PT_UNIMPLEMENTED_SO_FAR; + case FEATURE_XSAVE_PKRU: return XFEATURE_PKRU; + case FEATURE_XSAVE_PASID: return XFEATURE_PASID; + case FEATURE_XSAVE_CET_USER: return XFEATURE_CET_USER; + case FEATURE_XSAVE_CET_SHADOW_STACK: return XFEATURE_CET_KERNEL_UNUSED; + case FEATURE_XSAVE_HDC: return XFEATURE_RSRVD_COMP_13; + case FEATURE_XSAVE_UINTR: return XFEATURE_RSRVD_COMP_14; + case FEATURE_XSAVE_LBR: return XFEATURE_LBR; + case FEATURE_XSAVE_HWP: return XFEATURE_RSRVD_COMP_16; + case FEATURE_XSAVE_XTILE_CFG: return XFEATURE_XTILE_CFG; + case FEATURE_XSAVE_XTILE_DATA: return XFEATURE_XTILE_DATA; + default: + pr_warn_ratelimited("Not a valid XSAVE Feature."); + return 0; + } +} + +/* + * Dump type, size, offset and flag values for every xfeature that is present. + */ +static int dump_xsave_layout_desc(struct coredump_params *cprm) +{ + u32 supported_features = 0; + struct xfeat_component xc; + u32 eax, ebx, ecx, edx; + int num_records = 0; + int sub_leaf = 0; + int i; + + /* Find supported extended features */ + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + supported_features = eax; + + for (i = FEATURE_XSAVE_EXTENDED_START; + i <= FEATURE_XSAVE_EXTENDED_END; i++) { + sub_leaf = get_sub_leaf(i); + if (!sub_leaf) + continue; + if (supported_features & (1U << sub_leaf)) { + cpuid_count(XSTATE_CPUID, sub_leaf, &eax, &ebx, &ecx, &edx); + xc.xfeat_type = i; + xc.xfeat_sz = eax; + xc.xfeat_off = ebx; + /* Reserved for future use */ + xc.xfeat_flags = 0; + + if (!dump_emit(cprm, &xc, + sizeof(struct xfeat_component))) + return 0; + num_records++; + } + } + + return num_records; +} + +static int get_xsave_desc_size(void) +{ + int supported_features = 0; + int xfeatures_count = 0; + u32 eax, ebx, ecx, edx; + int sub_leaf = 0; + int i; + + /* Find supported extended features */ + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + supported_features = eax; + + for (i = FEATURE_XSAVE_EXTENDED_START; + i <= FEATURE_XSAVE_EXTENDED_END; i++) { + sub_leaf = get_sub_leaf(i); + if (!sub_leaf) + continue; + if (supported_features & (1U << sub_leaf)) + xfeatures_count++; + } + + return xfeatures_count * (sizeof(struct xfeat_component)); +} + +int elf_coredump_extra_notes_write(struct coredump_params *cprm) +{ + int num_records = 0; + struct elf_note en; + + en.n_namesz = sizeof(owner_name); + en.n_descsz = get_xsave_desc_size(); + en.n_type = NT_X86_XSAVE_LAYOUT; + + if (!dump_emit(cprm, &en, sizeof(en))) + return 1; + if (!dump_emit(cprm, owner_name, en.n_namesz)) + return 1; + if (!dump_align(cprm, 4)) + return 1; + + num_records = dump_xsave_layout_desc(cprm); + if (!num_records) { + pr_warn_ratelimited("Error adding XSTATE layout ELF note. XSTATE buffer in the core file will be unparseable."); + return 1; + } + + /* Total size should be equal to the number of records */ + if ((sizeof(struct xfeat_component) * num_records) != en.n_descsz) { + pr_warn_ratelimited("Error adding XSTATE layout ELF note. The size of the .note section does not match with the total size of the records."); + return 1; + } + + return 0; +} + +/* + * Return the size of new note. + */ +int elf_coredump_extra_notes_size(void) +{ + int size = 0; + + /* NOTE Header */ + size += sizeof(struct elf_note); + /* name + align */ + size += roundup(sizeof(owner_name), 4); + size += get_xsave_desc_size(); + + return size; +} +#endif diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 5397b552fbeb..833bcb7e957b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2000,7 +2000,7 @@ static int elf_core_dump(struct coredump_params *cprm) { size_t sz = info.size; - /* For cell spufs */ + /* For cell spufs and x86 xstate */ sz += elf_coredump_extra_notes_size(); phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); @@ -2064,7 +2064,7 @@ static int elf_core_dump(struct coredump_params *cprm) if (!write_note_info(&info, cprm)) goto end_coredump; - /* For cell spufs */ + /* For cell spufs and x86 xstate */ if (elf_coredump_extra_notes_write(cprm)) goto end_coredump; diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b54b313bcf07..e30a9b47dc87 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -411,6 +411,7 @@ typedef struct elf64_shdr { #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ /* Old binutils treats 0x203 as a CET state */ #define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */ #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ #define NT_S390_TIMER 0x301 /* s390 timer register */ #define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ -- 2.34.1