[PATCH v2 1/1] x86/elf: Add a new .note section containing Xfeatures information to x86 core files

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a new .note section containing type, size, offset and flags of
every xfeature that is present.

This information will be used by the debuggers to understand the XSAVE
layout of the machine where the core file is dumped, and to read XSAVE
registers, especially during cross-platform debugging.

Some background:

The XSAVE layouts of modern AMD and Intel CPUs differ, especially since
Memory Protection Keys and the AVX-512 features have been inculcated into
the AMD CPUs.
This is since AMD never adopted (and hence never left room in the XSAVE
layout for) the Intel MPX feature. Tools like GDB had assumed a fixed XSAVE
layout matching that of Intel (based on the XCR0 mask).
Hence, the core dumps from AMD CPUs didn't match the known size for the
XCR0 mask. This resulted in GDB and other tools not being able to access
the values of the AVX-512 and PKRU registers on AMD CPUs.
To solve this, an interim solution has been accepted into GDB, and is
already a part of GDB 14, thanks to these series of patches
[ https://sourceware.org/pipermail/gdb-patches/2023-March/198081.html ].
But this patch series depends on heuristics based on the total XSAVE
register set size and the XCR0 mask to infer the layouts of the various
register blocks for core dumps, and hence, is not a foolproof mechanism to
determine the layout of the XSAVE area.

Hence this new core dump note has been proposed as a more sturdy mechanism
to allow GDB/LLDB and other relevant tools to determine the layout of the
XSAVE area of the machine where the corefile was dumped.
The new core dump note (which is being proposed as a per-process .note
section), NT_X86_XSAVE_LAYOUT (0x205) contains an array of structures.
Each structure describes an individual extended feature containing offset,
size and flags (that is obtained through CPUID instruction) in a format
roughly matching the follow C structure:

struct xfeat_component {
       u32 xfeat_type;
       u32 xfeat_sz;
       u32 xfeat_off;
       u32 xfeat_flags;
};

Co-developed-by: Jini Susan George <jinisusan.george@xxxxxxx>
Signed-off-by: Jini Susan George <jinisusan.george@xxxxxxx>
Signed-off-by: Vignesh Balasubramanian <vigbalas@xxxxxxx>
---
v1->v2: Removed kernel internal defn dependency, code improvements

 arch/x86/Kconfig             |   1 +
 arch/x86/include/asm/elf.h   |  34 +++++++++
 arch/x86/kernel/fpu/xstate.c | 141 +++++++++++++++++++++++++++++++++++
 fs/binfmt_elf.c              |   4 +-
 include/uapi/linux/elf.h     |   1 +
 5 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 928820e61cb5..cc67daab3396 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -105,6 +105,7 @@ config X86
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_ZONE_DMA_SET if EXPERT
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAVE_EXTRA_ELF_NOTES
 	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1fb83d47711f..5952574db64b 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -13,6 +13,40 @@
 #include <asm/auxvec.h>
 #include <asm/fsgsbase.h>
 
+struct xfeat_component {
+	u32 xfeat_type;
+	u32 xfeat_sz;
+	u32 xfeat_off;
+	u32 xfeat_flags;
+} __packed;
+
+_Static_assert(sizeof(struct xfeat_component)%4 == 0, "xfeat_component is not aligned");
+
+enum custom_feature {
+	FEATURE_XSAVE_FP = 0,
+	FEATURE_XSAVE_SSE = 1,
+	FEATURE_XSAVE_YMM = 2,
+	FEATURE_XSAVE_BNDREGS = 3,
+	FEATURE_XSAVE_BNDCSR = 4,
+	FEATURE_XSAVE_OPMASK = 5,
+	FEATURE_XSAVE_ZMM_Hi256 = 6,
+	FEATURE_XSAVE_Hi16_ZMM = 7,
+	FEATURE_XSAVE_PT = 8,
+	FEATURE_XSAVE_PKRU = 9,
+	FEATURE_XSAVE_PASID = 10,
+	FEATURE_XSAVE_CET_USER = 11,
+	FEATURE_XSAVE_CET_SHADOW_STACK = 12,
+	FEATURE_XSAVE_HDC = 13,
+	FEATURE_XSAVE_UINTR = 14,
+	FEATURE_XSAVE_LBR = 15,
+	FEATURE_XSAVE_HWP = 16,
+	FEATURE_XSAVE_XTILE_CFG = 17,
+	FEATURE_XSAVE_XTILE_DATA = 18,
+	FEATURE_MAX,
+	FEATURE_XSAVE_EXTENDED_START = FEATURE_XSAVE_YMM,
+	FEATURE_XSAVE_EXTENDED_END = FEATURE_XSAVE_XTILE_DATA,
+};
+
 typedef unsigned long elf_greg_t;
 
 #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 33a214b1a4ce..3d1c3c96e34d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -13,6 +13,7 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
+#include <linux/coredump.h>
 
 #include <asm/fpu/api.h>
 #include <asm/fpu/regset.h>
@@ -87,6 +88,8 @@ static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
 #define XSTATE_FLAG_SUPERVISOR	BIT(0)
 #define XSTATE_FLAG_ALIGNED64	BIT(1)
 
+static const char owner_name[] = "LINUX";
+
 /*
  * Return whether the system supports a given xfeature.
  *
@@ -1837,3 +1840,141 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
 	return 0;
 }
 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_COREDUMP
+static int get_sub_leaf(int custom_xfeat)
+{
+	switch (custom_xfeat) {
+	case FEATURE_XSAVE_YMM:			return XFEATURE_YMM;
+	case FEATURE_XSAVE_BNDREGS:		return XFEATURE_BNDREGS;
+	case FEATURE_XSAVE_BNDCSR:		return XFEATURE_BNDCSR;
+	case FEATURE_XSAVE_OPMASK:		return XFEATURE_OPMASK;
+	case FEATURE_XSAVE_ZMM_Hi256:		return XFEATURE_ZMM_Hi256;
+	case FEATURE_XSAVE_Hi16_ZMM:		return XFEATURE_Hi16_ZMM;
+	case FEATURE_XSAVE_PT:			return XFEATURE_PT_UNIMPLEMENTED_SO_FAR;
+	case FEATURE_XSAVE_PKRU:		return XFEATURE_PKRU;
+	case FEATURE_XSAVE_PASID:		return XFEATURE_PASID;
+	case FEATURE_XSAVE_CET_USER:		return XFEATURE_CET_USER;
+	case FEATURE_XSAVE_CET_SHADOW_STACK:	return XFEATURE_CET_KERNEL_UNUSED;
+	case FEATURE_XSAVE_HDC:			return XFEATURE_RSRVD_COMP_13;
+	case FEATURE_XSAVE_UINTR:		return XFEATURE_RSRVD_COMP_14;
+	case FEATURE_XSAVE_LBR:			return XFEATURE_LBR;
+	case FEATURE_XSAVE_HWP:			return XFEATURE_RSRVD_COMP_16;
+	case FEATURE_XSAVE_XTILE_CFG:		return XFEATURE_XTILE_CFG;
+	case FEATURE_XSAVE_XTILE_DATA:		return XFEATURE_XTILE_DATA;
+	default:
+		pr_warn_ratelimited("Not a valid XSAVE Feature.");
+		return 0;
+	}
+}
+
+/*
+ * Dump type, size, offset and flag values for every xfeature that is present.
+ */
+static int dump_xsave_layout_desc(struct coredump_params *cprm)
+{
+	u32 supported_features = 0;
+	struct xfeat_component xc;
+	u32 eax, ebx, ecx, edx;
+	int num_records = 0;
+	int sub_leaf = 0;
+	int i;
+
+	/* Find supported extended features */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	supported_features = eax;
+
+	for (i = FEATURE_XSAVE_EXTENDED_START;
+			i <= FEATURE_XSAVE_EXTENDED_END; i++) {
+		sub_leaf = get_sub_leaf(i);
+		if (!sub_leaf)
+			continue;
+		if (supported_features & (1U << sub_leaf)) {
+			cpuid_count(XSTATE_CPUID, sub_leaf, &eax, &ebx, &ecx, &edx);
+			xc.xfeat_type = i;
+			xc.xfeat_sz = eax;
+			xc.xfeat_off = ebx;
+			/* Reserved for future use */
+			xc.xfeat_flags = 0;
+
+			if (!dump_emit(cprm, &xc,
+				       sizeof(struct xfeat_component)))
+				return 0;
+			num_records++;
+		}
+	}
+
+	return num_records;
+}
+
+static int get_xsave_desc_size(void)
+{
+	int supported_features = 0;
+	int xfeatures_count = 0;
+	u32 eax, ebx, ecx, edx;
+	int sub_leaf = 0;
+	int i;
+
+	/* Find supported extended features */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	supported_features = eax;
+
+	for (i = FEATURE_XSAVE_EXTENDED_START;
+			i <= FEATURE_XSAVE_EXTENDED_END; i++) {
+		sub_leaf = get_sub_leaf(i);
+		if (!sub_leaf)
+			continue;
+		if (supported_features & (1U << sub_leaf))
+			xfeatures_count++;
+	}
+
+	return xfeatures_count * (sizeof(struct xfeat_component));
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	int num_records = 0;
+	struct elf_note en;
+
+	en.n_namesz = sizeof(owner_name);
+	en.n_descsz = get_xsave_desc_size();
+	en.n_type = NT_X86_XSAVE_LAYOUT;
+
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		return 1;
+	if (!dump_emit(cprm, owner_name, en.n_namesz))
+		return 1;
+	if (!dump_align(cprm, 4))
+		return 1;
+
+	num_records = dump_xsave_layout_desc(cprm);
+	if (!num_records) {
+		pr_warn_ratelimited("Error adding XSTATE layout ELF note. XSTATE buffer in the core file will be unparseable.");
+		return 1;
+	}
+
+	/* Total size should be equal to the number of records */
+	if ((sizeof(struct xfeat_component) * num_records) != en.n_descsz) {
+		pr_warn_ratelimited("Error adding XSTATE layout ELF note. The size of the .note section does not match with the total size of the records.");
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Return the size of new note.
+ */
+int elf_coredump_extra_notes_size(void)
+{
+	int size = 0;
+
+	/* NOTE Header */
+	size += sizeof(struct elf_note);
+	/* name + align */
+	size += roundup(sizeof(owner_name), 4);
+	size += get_xsave_desc_size();
+
+	return size;
+}
+#endif
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5397b552fbeb..833bcb7e957b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2000,7 +2000,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	{
 		size_t sz = info.size;
 
-		/* For cell spufs */
+		/* For cell spufs and x86 xstate */
 		sz += elf_coredump_extra_notes_size();
 
 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
@@ -2064,7 +2064,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!write_note_info(&info, cprm))
 		goto end_coredump;
 
-	/* For cell spufs */
+	/* For cell spufs and x86 xstate */
 	if (elf_coredump_extra_notes_write(cprm))
 		goto end_coredump;
 
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b54b313bcf07..e30a9b47dc87 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -411,6 +411,7 @@ typedef struct elf64_shdr {
 #define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
 /* Old binutils treats 0x203 as a CET state */
 #define NT_X86_SHSTK	0x204		/* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT	0x205	/* XSAVE layout description */
 #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 #define NT_S390_TIMER	0x301		/* s390 timer register */
 #define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
-- 
2.34.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux