With CONFIG_CALL_PADDING enabled, there will be 16-bytes(or more) padding space before all the kernel functions. And some kernel features can use it, such as MITIGATION_CALL_DEPTH_TRACKING, CFI_CLANG, FINEIBT, etc. In this commit, we add supporting to store per-function metadata in the function padding, and previous discussion can be found in [1]. Generally speaking, we have two way to implement this feature: 1. create a function metadata array, and prepend a 5-bytes insn "mov %eax, 0x12345678", and store the insn to the function padding. And 0x12345678 means the index of the function metadata in the array. By this way, 5-bytes will be consumed in the function padding. 2. prepend a 10-bytes insn "mov %rax, 0x12345678aabbccdd" and store the insn to the function padding, and 0x12345678aabbccdd means the address of the function metadata. Compared with way 2, way 1 consume less space, but we need to do more work on the global function metadata array. And in this commit, we implemented the way 1. In my research, MITIGATION_CALL_DEPTH_TRACKING will consume the tail 9-bytes in the function padding, and FINEIBT+CFI_CLANG will consume the head 7-bytes. So there will be no space for us if MITIGATION_CALL_DEPTH_TRACKING and CFI_CLANG are both enabled. So I have following logic: 1. use the head 5-bytes if CFI_CLANG is not enabled 2. use the tail 5-bytes if MITIGATION_CALL_DEPTH_TRACKING is not enabled 3. compile the kernel with extra 5-bytes padding if MITIGATION_CALL_DEPTH_TRACKING and CFI_CLANG are both enabled. In the third case, we compile the kernel with a function padding of 21-bytes, which means the real function is not 16-bytes aligned any more. And in [2], I tested the performance of the kernel with different padding, and it seems that extra 5-bytes don't have impact on the performance. However, it's a huge change to make the kernel function unaligned in 16-bytes, and I'm sure if there are any other influence. So another choice is to compile the kernel with 32-bytes aligned if there is no space available for us in the function padding. But this will increase the text size ~5%. (And I'm not sure with method to use.) The beneficiaries of this feature can be BPF and ftrace. For BPF, we can implement a "dynamic trampoline" and add tracing multi-link supporting base on this feature. And for ftrace, we can optimize its performance base on this feature. This commit is not complete, as the synchronous of func_metas is not considered :/ Link: https://lore.kernel.org/bpf/CADxym3anLzM6cAkn_z71GDd_VeKiqqk1ts=xuiP7pr4PO6USPA@xxxxxxxxxxxxxx/ [1] Link: https://lore.kernel.org/bpf/CADxym3af+CU5Mx8myB8UowdXSc3wJOqWyH4oyq+eXKahXBTXyg@xxxxxxxxxxxxxx/ [2] Signed-off-by: Menglong Dong <dongml2@xxxxxxxxxxxxxxx> --- arch/x86/Kconfig | 15 ++++ arch/x86/Makefile | 17 ++-- include/linux/func_meta.h | 17 ++++ kernel/trace/Makefile | 1 + kernel/trace/func_meta.c | 184 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 228 insertions(+), 6 deletions(-) create mode 100644 include/linux/func_meta.h create mode 100644 kernel/trace/func_meta.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6df7779ed6da..0ff3cb74cfc0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2510,6 +2510,21 @@ config PREFIX_SYMBOLS def_bool y depends on CALL_PADDING && !CFI_CLANG +config FUNCTION_METADATA + bool "Enable function metadata support" + select CALL_PADDING + default y + help + This feature allow us to set metadata for kernel functions, and + get the metadata of the function by its address without any + costing. + + The index of the metadata will be stored in the function padding, + which will consume 5-bytes. The spare space of the padding + is enough for us with CALL_PADDING and FUNCTION_ALIGNMENT_16B if + CALL_THUNKS or CFI_CLANG not enabled. Or, we need extra 5-bytes + in the function padding, which will increases text ~1%. + menuconfig CPU_MITIGATIONS bool "Mitigations for CPU vulnerabilities" default y diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5b773b34768d..05689c9a8942 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -240,13 +240,18 @@ ifdef CONFIG_MITIGATION_SLS endif ifdef CONFIG_CALL_PADDING -PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) -KBUILD_CFLAGS += $(PADDING_CFLAGS) -export PADDING_CFLAGS + __padding_bytes := $(CONFIG_FUNCTION_PADDING_BYTES) + ifneq ($(and $(CONFIG_FUNCTION_METADATA),$(CONFIG_CALL_THUNKS),$(CONFIG_CFI_CLANG)),) + __padding_bytes := $(shell echo $(__padding_bytes) + 5 | bc) + endif + + PADDING_CFLAGS := -fpatchable-function-entry=$(__padding_bytes),$(__padding_bytes) + KBUILD_CFLAGS += $(PADDING_CFLAGS) + export PADDING_CFLAGS -PADDING_RUSTFLAGS := -Zpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) -KBUILD_RUSTFLAGS += $(PADDING_RUSTFLAGS) -export PADDING_RUSTFLAGS + PADDING_RUSTFLAGS := -Zpatchable-function-entry=$(__padding_bytes),$(__padding_bytes) + KBUILD_RUSTFLAGS += $(PADDING_RUSTFLAGS) + export PADDING_RUSTFLAGS endif KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) diff --git a/include/linux/func_meta.h b/include/linux/func_meta.h new file mode 100644 index 000000000000..840cbd858c47 --- /dev/null +++ b/include/linux/func_meta.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FUNC_META_H +#define _LINUX_FUNC_META_H + +#include <linux/kernel.h> + +struct func_meta { + int users; + void *func; +}; + +extern struct func_meta *func_metas; + +struct func_meta *func_meta_get(void *ip); +void func_meta_put(void *ip, struct func_meta *meta); + +#endif diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 057cd975d014..4042c168dcfc 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o obj-$(CONFIG_FPROBE) += fprobe.o obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o +obj-$(CONFIG_FUNCTION_METADATA) += func_meta.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o obj-$(CONFIG_RV) += rv/ diff --git a/kernel/trace/func_meta.c b/kernel/trace/func_meta.c new file mode 100644 index 000000000000..9ce77da81e71 --- /dev/null +++ b/kernel/trace/func_meta.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/slab.h> +#include <linux/memory.h> +#include <linux/func_meta.h> +#include <linux/text-patching.h> + +#define FUNC_META_INSN_SIZE 5 + +#ifdef CONFIG_CFI_CLANG + #ifdef CONFIG_CALL_THUNKS + /* use the extra 5-bytes that we reserve */ + #define FUNC_META_INSN_OFFSET (CONFIG_FUNCTION_PADDING_BYTES + 5) + #define FUNC_META_DATA_OFFSET (CONFIG_FUNCTION_PADDING_BYTES + 4) + #else + /* use the space that CALL_THUNKS suppose to use */ + #define FUNC_META_INSN_OFFSET (5) + #define FUNC_META_DATA_OFFSET (4) + #endif +#else + /* use the space that CFI_CLANG support to use */ + #define FUNC_META_INSN_OFFSET (CONFIG_FUNCTION_PADDING_BYTES) + #define FUNC_META_DATA_OFFSET (CONFIG_FUNCTION_PADDING_BYTES - 1) +#endif + +static u32 func_meta_count = 32, func_meta_used; +struct func_meta *func_metas; +EXPORT_SYMBOL_GPL(func_metas); + +static DEFINE_MUTEX(func_meta_lock); + +static u32 func_meta_get_index(void *ip) +{ + return *(u32 *)(ip - FUNC_META_DATA_OFFSET); +} + +static bool func_meta_exist(void *ip) +{ + return *(u8 *)(ip - FUNC_META_INSN_OFFSET) == 0xB8; +} + +static void func_meta_init(struct func_meta *metas, u32 start, u32 end) +{ + u32 i; + + for (i = start; i < end; i++) + metas[i].users = 0; +} + +/* get next usable function metadata */ +static struct func_meta *func_meta_get_next(u32 *index) +{ + struct func_meta *tmp; + u32 i; + + if (func_metas == NULL) { + func_metas = kmalloc_array(func_meta_count, sizeof(*tmp), + GFP_KERNEL); + if (!func_metas) + return NULL; + func_meta_init(func_metas, 0, func_meta_count); + } + + /* maybe we can manage the used function metadata entry with a bit + * map ? + */ + for (i = 0; i < func_meta_count; i++) { + if (!func_metas[i].users) { + func_meta_used++; + *index = i; + func_metas[i].users++; + return func_metas + i; + } + } + + tmp = kmalloc_array(func_meta_count * 2, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return NULL; + + memcpy(tmp, func_metas, func_meta_count * sizeof(*tmp)); + kfree(func_metas); + + /* TODO: we need a way to update func_metas synchronously, RCU ? */ + func_metas = tmp; + func_meta_init(func_metas, func_meta_count, func_meta_count * 2); + + tmp += func_meta_count; + *index = func_meta_count; + func_meta_count <<= 1; + func_meta_used++; + tmp->users++; + + return tmp; +} + +static int func_meta_text_poke(void *ip, u32 index, bool nop) +{ + const u8 nop_insn[FUNC_META_INSN_SIZE] = { BYTES_NOP1, BYTES_NOP1, + BYTES_NOP1, BYTES_NOP1, + BYTES_NOP1 }; + u8 insn[FUNC_META_INSN_SIZE] = { 0xB8, }; + const u8 *prog; + void *target; + int ret = 0; + + target = ip - FUNC_META_INSN_OFFSET; + mutex_lock(&text_mutex); + if (nop) { + if (!memcmp(target, nop_insn, FUNC_META_INSN_SIZE)) + goto out; + prog = nop_insn; + } else { + *(u32 *)(insn + 1) = index; + if (!memcmp(target, insn, FUNC_META_INSN_SIZE)) + goto out; + + if (memcmp(target, nop_insn, FUNC_META_INSN_SIZE)) { + ret = -EBUSY; + goto out; + } + prog = insn; + } + text_poke(target, prog, FUNC_META_INSN_SIZE); + text_poke_sync(); +out: + mutex_unlock(&text_mutex); + return ret; +} + +static void __func_meta_put(void *ip, struct func_meta *meta) +{ + if (WARN_ON_ONCE(meta->users <= 0)) + return; + + meta->users--; + if (meta->users > 0) + return; + + /* TODO: we need a way to shrink the array "func_metas" */ + func_meta_used--; + if (!func_meta_exist(ip)) + return; + + func_meta_text_poke(ip, 0, true); +} + +void func_meta_put(void *ip, struct func_meta *meta) +{ + mutex_lock(&func_meta_lock); + __func_meta_put(ip, meta); + mutex_unlock(&func_meta_lock); +} +EXPORT_SYMBOL_GPL(func_meta_put); + +struct func_meta *func_meta_get(void *ip) +{ + struct func_meta *tmp = NULL; + u32 index; + + mutex_lock(&func_meta_lock); + if (func_meta_exist(ip)) { + index = func_meta_get_index(ip); + if (WARN_ON_ONCE(index >= func_meta_count)) + goto out; + + tmp = &func_metas[index]; + tmp->users++; + goto out; + } + + tmp = func_meta_get_next(&index); + if (!tmp) + goto out; + + if (func_meta_text_poke(ip, index, false)) { + __func_meta_put(ip, tmp); + goto out; + } + tmp->func = ip; +out: + mutex_unlock(&func_meta_lock); + return tmp; +} +EXPORT_SYMBOL_GPL(func_meta_get); -- 2.39.5