From: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> Allocate and map a separate stack for sandbox mode in arch_sbm_init(). Switch to this stack in arch_sbm_exec(). Store the address of the stack as arch-specific state. On X86_64, RSP is never used to locate thread-specific data, so it is safe to change its value. If the sandbox is preempted by an interrupt, RSP is saved by switch_to() and restored when the sandbox task is scheduled again. The original kernel stack pointer is restored when the sandbox function returns. Since the stack switch mechanism is implemented only for 64-bit, make CONFIG_HAVE_ARCH_SBM depend on X86_64 for now. Leave it under "config X86", because it would be possible to implement a 32-bit variant. Signed-off-by: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/sbm.h | 2 ++ arch/x86/kernel/sbm/Makefile | 6 ++++++ arch/x86/kernel/sbm/call_64.S | 40 +++++++++++++++++++++++++++++++++++ arch/x86/kernel/sbm/core.c | 17 ++++++++++++++- 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 arch/x86/kernel/sbm/call_64.S diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 41fa4ab84c15..090d46c7ee7c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -188,7 +188,7 @@ config X86 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_PREL32_RELOCATIONS - select HAVE_ARCH_SBM + select HAVE_ARCH_SBM if X86_64 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_STACKLEAK diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h index 01c8d357550b..ed214c17af06 100644 --- a/arch/x86/include/asm/sbm.h +++ b/arch/x86/include/asm/sbm.h @@ -16,12 +16,14 @@ /** * struct x86_sbm_state - Run-time state of the environment. * @pgd: Sandbox mode page global directory. + * @stack: Sandbox mode stack. * * One instance of this union is allocated for each sandbox and stored as SBM * instance private data. */ struct x86_sbm_state { pgd_t *pgd; + unsigned long stack; }; #endif /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */ diff --git a/arch/x86/kernel/sbm/Makefile b/arch/x86/kernel/sbm/Makefile index 92d368b526cd..62c3e85c14a4 100644 --- a/arch/x86/kernel/sbm/Makefile +++ b/arch/x86/kernel/sbm/Makefile @@ -8,3 +8,9 @@ # obj-y := core.o + +### +# 64 bit specific files +ifeq ($(CONFIG_X86_64),y) + obj-y += call_64.o +endif diff --git a/arch/x86/kernel/sbm/call_64.S b/arch/x86/kernel/sbm/call_64.S new file mode 100644 index 000000000000..245d0dddce73 --- /dev/null +++ b/arch/x86/kernel/sbm/call_64.S @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024 Huawei Technologies Duesseldorf GmbH + * + * Author: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> + * + * SandBox Mode (SBM) low-level x86_64 assembly. + */ + +#include <linux/linkage.h> +#include <asm/nospec-branch.h> + +.code64 +.section .entry.text, "ax" + +/* + * arguments: + * rdi .. SBM state (kernel address) + * rsi .. func + * rdx .. args + * rcx .. top of sandbox stack + */ +SYM_FUNC_START(x86_sbm_exec) + /* + * Set up the sandbox stack: + * 1. Store the old stack pointer at the top of the sandbox stack, + * where various unwinders can find it and link back to the + * kernel stack. + */ + sub $8, %rcx + mov %rsp, (%rcx) + mov %rcx, %rsp + + mov %rdx, %rdi /* args */ + CALL_NOSPEC rsi + + pop %rsp + + RET +SYM_FUNC_END(x86_sbm_exec) diff --git a/arch/x86/kernel/sbm/core.c b/arch/x86/kernel/sbm/core.c index b775e3b387b1..de6986801148 100644 --- a/arch/x86/kernel/sbm/core.c +++ b/arch/x86/kernel/sbm/core.c @@ -17,6 +17,9 @@ #define GFP_SBM_PGTABLE (GFP_KERNEL | __GFP_ZERO) #define PGD_ORDER get_order(sizeof(pgd_t) * PTRS_PER_PGD) +asmlinkage int x86_sbm_exec(struct x86_sbm_state *state, sbm_func func, + void *args, unsigned long sbm_tos); + static inline phys_addr_t page_to_ptval(struct page *page) { return PFN_PHYS(page_to_pfn(page)) | _PAGE_TABLE; @@ -182,6 +185,15 @@ int arch_sbm_init(struct sbm *sbm) if (err) return err; + state->stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); + if (!state->stack) + return -ENOMEM; + + err = map_range(state, state->stack, state->stack + THREAD_SIZE, + PAGE_SHARED); + if (err) + return err; + return 0; } @@ -238,11 +250,14 @@ void arch_sbm_destroy(struct sbm *sbm) free_pgd(state->pgd); free_pages((unsigned long)state->pgd, PGD_ORDER); } + free_pages(state->stack, THREAD_SIZE_ORDER); free_page((unsigned long)state); sbm->private = NULL; } int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args) { - return func(args); + struct x86_sbm_state *state = sbm->private; + + return x86_sbm_exec(state, func, args, state->stack + THREAD_SIZE); } -- 2.34.1