From: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> Add arch hooks for the x86 architecture and select CONFIG_HAVE_ARCH_SBM. Implement arch_sbm_init(): Allocate an arch-specific state page and store it as SBM instance private data. Set up mappings for kernel text, static data, current task and current thread stack into the. Implement arch_sbm_map_readonly() and arch_sbm_map_writable(): Set the PTE value, allocating additional page tables as necessary. Implement arch_sbm_destroy(): Walk the page table hierarchy and free all page tables, including the page global directory. Provide a trivial implementation of arch_sbm_exec() to avoid build failures, but do not switch to the constructed page tables yet. Signed-off-by: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> --- arch/x86/Kconfig | 1 + arch/x86/include/asm/sbm.h | 29 ++++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/sbm/Makefile | 10 ++ arch/x86/kernel/sbm/core.c | 248 +++++++++++++++++++++++++++++++++++ 5 files changed, 290 insertions(+) create mode 100644 arch/x86/include/asm/sbm.h create mode 100644 arch/x86/kernel/sbm/Makefile create mode 100644 arch/x86/kernel/sbm/core.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5edec175b9bf..41fa4ab84c15 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -188,6 +188,7 @@ config X86 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_PREL32_RELOCATIONS + select HAVE_ARCH_SBM select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_STACKLEAK diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h new file mode 100644 index 000000000000..01c8d357550b --- /dev/null +++ b/arch/x86/include/asm/sbm.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024 Huawei Technologies Duesseldorf GmbH + * + * Author: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> + * + * SandBox Mode (SBM) declarations for the x86 architecture. + */ +#ifndef __ASM_SBM_H +#define __ASM_SBM_H + +#if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) + +#include <asm/pgtable_types.h> + +/** + * struct x86_sbm_state - Run-time state of the environment. + * @pgd: Sandbox mode page global directory. + * + * One instance of this union is allocated for each sandbox and stored as SBM + * instance private data. + */ +struct x86_sbm_state { + pgd_t *pgd; +}; + +#endif /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */ + +#endif /* __ASM_SBM_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0000325ab98f..4ad63b7d13ee 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -150,6 +150,8 @@ obj-$(CONFIG_X86_CET) += cet.o obj-$(CONFIG_X86_USER_SHADOW_STACK) += shstk.o +obj-$(CONFIG_SANDBOX_MODE) += sbm/ + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/sbm/Makefile b/arch/x86/kernel/sbm/Makefile new file mode 100644 index 000000000000..92d368b526cd --- /dev/null +++ b/arch/x86/kernel/sbm/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2023-2024 Huawei Technologies Duesseldorf GmbH +# +# Author: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> +# +# Makefile for the x86 SandBox Mode (SBM) implementation. +# + +obj-y := core.o diff --git a/arch/x86/kernel/sbm/core.c b/arch/x86/kernel/sbm/core.c new file mode 100644 index 000000000000..b775e3b387b1 --- /dev/null +++ b/arch/x86/kernel/sbm/core.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024 Huawei Technologies Duesseldorf GmbH + * + * Author: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx> + * + * SandBox Mode (SBM) implementation for the x86 architecture. + */ + +#include <asm/pgtable.h> +#include <asm/sbm.h> +#include <asm/sections.h> +#include <linux/mm.h> +#include <linux/sbm.h> +#include <linux/sched/task_stack.h> + +#define GFP_SBM_PGTABLE (GFP_KERNEL | __GFP_ZERO) +#define PGD_ORDER get_order(sizeof(pgd_t) * PTRS_PER_PGD) + +static inline phys_addr_t page_to_ptval(struct page *page) +{ + return PFN_PHYS(page_to_pfn(page)) | _PAGE_TABLE; +} + +static int map_page(struct x86_sbm_state *state, unsigned long addr, + unsigned long pfn, pgprot_t prot) +{ + struct page *page; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_pgd(state->pgd, addr); + if (pgd_none(*pgdp)) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return -ENOMEM; + set_pgd(pgdp, __pgd(page_to_ptval(page))); + p4dp = (p4d_t *)page_address(page) + p4d_index(addr); + } else + p4dp = p4d_offset(pgdp, addr); + + if (p4d_none(*p4dp)) { + page = alloc_page(GFP_SBM_PGTABLE); + if (!page) + return -ENOMEM; + set_p4d(p4dp, __p4d(page_to_ptval(page))); + pudp = (pud_t *)page_address(page) + pud_index(addr); + } else + pudp = pud_offset(p4dp, addr); + + if (pud_none(*pudp)) { + page = alloc_page(GFP_SBM_PGTABLE); + if (!page) + return -ENOMEM; + set_pud(pudp, __pud(page_to_ptval(page))); + pmdp = (pmd_t *)page_address(page) + pmd_index(addr); + } else + pmdp = pmd_offset(pudp, addr); + + if (pmd_none(*pmdp)) { + page = alloc_page(GFP_SBM_PGTABLE); + if (!page) + return -ENOMEM; + set_pmd(pmdp, __pmd(page_to_ptval(page))); + ptep = (pte_t *)page_address(page) + pte_index(addr); + } else + ptep = pte_offset_kernel(pmdp, addr); + + set_pte(ptep, pfn_pte(pfn, prot)); + return 0; +} + +static int map_range(struct x86_sbm_state *state, unsigned long start, + unsigned long end, pgprot_t prot) +{ + unsigned long pfn; + int err; + + start = PAGE_ALIGN_DOWN(start); + while (start < end) { + if (is_vmalloc_or_module_addr((void *)start)) + pfn = vmalloc_to_pfn((void *)start); + else + pfn = PHYS_PFN(__pa(start)); + err = map_page(state, start, pfn, prot); + if (err) + return err; + start += PAGE_SIZE; + } + + return 0; +} + +int arch_sbm_map_readonly(struct sbm *sbm, const struct sbm_buf *buf) +{ + return map_range(sbm->private, (unsigned long)buf->sbm_ptr, + (unsigned long)buf->sbm_ptr + buf->size, + PAGE_READONLY); +} + +int arch_sbm_map_writable(struct sbm *sbm, const struct sbm_buf *buf) +{ + return map_range(sbm->private, (unsigned long)buf->sbm_ptr, + (unsigned long)buf->sbm_ptr + buf->size, + PAGE_SHARED); +} + +/* Map kernel text, data, rodata, BSS and static per-cpu sections. */ +static int map_kernel(struct x86_sbm_state *state) +{ + int __maybe_unused cpu; + int err; + + err = map_range(state, (unsigned long)_stext, (unsigned long)_etext, + PAGE_READONLY_EXEC); + if (err) + return err; + + err = map_range(state, (unsigned long)__entry_text_start, + (unsigned long)__entry_text_end, PAGE_KERNEL_ROX); + if (err) + return err; + + err = map_range(state, (unsigned long)_sdata, (unsigned long)_edata, + PAGE_READONLY); + if (err) + return err; + err = map_range(state, (unsigned long)__bss_start, + (unsigned long)__bss_stop, PAGE_READONLY); + if (err) + return err; + err = map_range(state, (unsigned long)__start_rodata, + (unsigned long)__end_rodata, PAGE_READONLY); + if (err) + return err; + +#ifdef CONFIG_SMP + for_each_possible_cpu(cpu) { + unsigned long off = per_cpu_offset(cpu); + + err = map_range(state, (unsigned long)__per_cpu_start + off, + (unsigned long)__per_cpu_end + off, + PAGE_READONLY); + if (err) + return err; + } +#endif + + return 0; +} + +int arch_sbm_init(struct sbm *sbm) +{ + struct x86_sbm_state *state; + unsigned long stack; + int err; + + BUILD_BUG_ON(sizeof(*state) > PAGE_SIZE); + state = (struct x86_sbm_state *)__get_free_page(GFP_KERNEL); + if (!state) + return -ENOMEM; + sbm->private = state; + + state->pgd = (pgd_t *)__get_free_pages(GFP_SBM_PGTABLE, PGD_ORDER); + if (!state->pgd) + return -ENOMEM; + + err = map_kernel(state); + if (err) + return err; + + err = map_range(state, (unsigned long)current, + (unsigned long)(current + 1), PAGE_READONLY); + if (err) + return err; + + stack = (unsigned long)task_stack_page(current); + err = map_range(state, stack, stack + THREAD_SIZE, PAGE_READONLY); + if (err) + return err; + + return 0; +} + +static void free_pmd(pmd_t *pmd) +{ + pmd_t *pmdp; + + for (pmdp = pmd; pmdp < pmd + PTRS_PER_PMD; ++pmdp) + if (!pmd_none(*pmdp)) + free_page(pmd_page_vaddr(*pmdp)); + if (PTRS_PER_PMD > 1) + free_page((unsigned long)pmd); +} + +static void free_pud(pud_t *pud) +{ + pud_t *pudp; + + for (pudp = pud; pudp < pud + PTRS_PER_PUD; ++pudp) + if (!pud_none(*pudp)) + free_pmd(pmd_offset(pudp, 0)); + if (PTRS_PER_PUD > 1) + free_page((unsigned long)pud); +} + +static void free_p4d(p4d_t *p4d) +{ + p4d_t *p4dp; + + for (p4dp = p4d; p4dp < p4d + PTRS_PER_P4D; ++p4dp) + if (!p4d_none(*p4dp)) + free_pud(pud_offset(p4dp, 0)); + if (PTRS_PER_P4D > 1) + free_page((unsigned long)p4d); +} + +static void free_pgd(pgd_t *pgd) +{ + pgd_t *pgdp; + + for (pgdp = pgd; pgdp < pgd + PTRS_PER_PGD; ++pgdp) + if (!pgd_none(*pgdp)) + free_p4d(p4d_offset(pgdp, 0)); +} + +void arch_sbm_destroy(struct sbm *sbm) +{ + struct x86_sbm_state *state = sbm->private; + + if (!state) + return; + + if (state->pgd) { + free_pgd(state->pgd); + free_pages((unsigned long)state->pgd, PGD_ORDER); + } + free_page((unsigned long)state); + sbm->private = NULL; +} + +int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args) +{ + return func(args); +} -- 2.34.1