On Fri, Jul 28, 2023 at 11:28 AM Enze Li <lienze@xxxxxxxxxx> wrote: > > On Thu, Jul 27 2023 at 09:26:04 AM +0800, Huacai Chen wrote: > > > On Tue, Jul 25, 2023 at 2:15 PM Enze Li <lienze@xxxxxxxxxx> wrote: > >> > >> The LoongArch architecture is quite different from other architectures. > >> When the allocating of KFENCE itself is done, it is mapped to the direct > >> mapping configuration window [1] by default on LoongArch. It means that > >> it is not possible to use the page table mapped mode which required by > >> the KFENCE system and therefore it should be remapped to the appropriate > >> region. > >> > >> This patch adds architecture specific implementation details for KFENCE. > >> In particular, this implements the required interface in <asm/kfence.h>. > >> > >> Tested this patch by running the testcases and all passed. > >> > >> [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode > >> > >> Signed-off-by: Enze Li <lienze@xxxxxxxxxx> > >> --- > >> arch/loongarch/Kconfig | 1 + > >> arch/loongarch/include/asm/kfence.h | 62 ++++++++++++++++++++++++++++ > >> arch/loongarch/include/asm/pgtable.h | 14 ++++++- > >> arch/loongarch/mm/fault.c | 22 ++++++---- > >> 4 files changed, 90 insertions(+), 9 deletions(-) > >> create mode 100644 arch/loongarch/include/asm/kfence.h > >> > >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > >> index 70635ea3d1e4..5b63b16be49e 100644 > >> --- a/arch/loongarch/Kconfig > >> +++ b/arch/loongarch/Kconfig > >> @@ -91,6 +91,7 @@ config LOONGARCH > >> select HAVE_ARCH_AUDITSYSCALL > >> select HAVE_ARCH_JUMP_LABEL > >> select HAVE_ARCH_JUMP_LABEL_RELATIVE > >> + select HAVE_ARCH_KFENCE > >> select HAVE_ARCH_MMAP_RND_BITS if MMU > >> select HAVE_ARCH_SECCOMP_FILTER > >> select HAVE_ARCH_TRACEHOOK > >> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h > >> new file mode 100644 > >> index 000000000000..fb39076fe4d7 > >> --- /dev/null > >> +++ b/arch/loongarch/include/asm/kfence.h > >> @@ -0,0 +1,62 @@ > >> +/* SPDX-License-Identifier: GPL-2.0 */ > >> +/* > >> + * KFENCE support for LoongArch. > >> + * > >> + * Author: Enze Li <lienze@xxxxxxxxxx> > >> + * Copyright (C) 2022-2023 KylinSoft Corporation. > >> + */ > >> + > >> +#ifndef _ASM_LOONGARCH_KFENCE_H > >> +#define _ASM_LOONGARCH_KFENCE_H > >> + > >> +#include <linux/kfence.h> > >> +#include <asm/pgtable.h> > >> +#include <asm/tlb.h> > >> + > >> +static inline bool arch_kfence_init_pool(void) > >> +{ > >> + char *kfence_pool = __kfence_pool; > >> + struct vm_struct *area; > >> + int err; > >> + > >> + area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP, > >> + KFENCE_AREA_START, KFENCE_AREA_END, > >> + __builtin_return_address(0)); > >> + if (!area) > >> + return false; > >> + > >> + __kfence_pool = (char *)area->addr; > >> + err = ioremap_page_range((unsigned long)__kfence_pool, > >> + (unsigned long)__kfence_pool + KFENCE_POOL_SIZE, > >> + virt_to_phys((void *)kfence_pool), > >> + PAGE_KERNEL); > >> + if (err) { > >> + free_vm_area(area); > >> + return false; > >> + } > >> + > >> + return true; > >> +} > >> + > >> +/* Protect the given page and flush TLB. */ > >> +static inline bool kfence_protect_page(unsigned long addr, bool protect) > >> +{ > >> + pte_t *pte = virt_to_kpte(addr); > >> + > >> + if (WARN_ON(!pte) || pte_none(*pte)) > >> + return false; > >> + > >> + if (protect) > >> + set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); > >> + else > >> + set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); > >> + > >> + /* Flush this CPU's TLB. */ > >> + preempt_disable(); > >> + local_flush_tlb_one(addr); > >> + preempt_enable(); > >> + > >> + return true; > >> +} > >> + > >> +#endif /* _ASM_LOONGARCH_KFENCE_H */ > >> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h > >> index 98a0c98de9d1..2702a6ba7122 100644 > >> --- a/arch/loongarch/include/asm/pgtable.h > >> +++ b/arch/loongarch/include/asm/pgtable.h > >> @@ -77,6 +77,13 @@ extern unsigned long zero_page_mask; > >> (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) > >> #define __HAVE_COLOR_ZERO_PAGE > >> > >> +#ifdef CONFIG_KFENCE > >> +#define KFENCE_AREA_SIZE \ > >> + (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE) > > Hi Huacai, > > > Another question: Why define KFENCE_AREA_SIZE while there is already > > KFENCE_POOL_SIZE? > > The KFENCE_POOL_SIZE macro is defined in linux/kfence.h. When I trying > to include this header file, I see the following error, > > ---------------------------------------------------------------------- > CC arch/loongarch/kernel/asm-offsets.s > In file included from ./arch/loongarch/include/asm/pgtable.h:64, > from ./include/linux/pgtable.h:6, > from ./include/linux/mm.h:29, > from arch/loongarch/kernel/asm-offsets.c:9: > ./include/linux/kfence.h:93:35: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration > 93 | void kfence_shutdown_cache(struct kmem_cache *s); > | ^~~~~~~~~~ > ./include/linux/kfence.h:99:29: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration > 99 | void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags); > | ^~~~~~~~~~ > ./include/linux/kfence.h:117:50: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration > 117 | static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) > | ^~~~~~~~~~ > ./include/linux/kfence.h: In function ‘kfence_alloc’: > ./include/linux/kfence.h:128:31: error: passing argument 1 of ‘__kfence_alloc’ from incompatible pointer type [-Werror=incompatible-pointer-types] > 128 | return __kfence_alloc(s, size, flags); > | ^ > | | > | struct kmem_cache * > -------------------------------------------------------------------- > > The root cause of this issue is that linux/kfence.h should be expanded > after linux/mm.h, not before. That said, we can not put any > "high-level" header files in the "low-level" ones. > > > And why is KFENCE_AREA_SIZE a little larger than > > KFENCE_POOL_SIZE? If we can reuse KFENCE_POOL_SIZE, > > KFENCE_AREA_START/KFENCE_AREA_END can be renamed to > > KFENCE_POOL_START/KFENCE_POOL_END. > > +#define KFENCE_AREA_SIZE \ > + (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE) > ^^^^^ > > Here I've added two extra pages, that's due to working with > __get_vm_area_caller() to request the space correctly. > > 1. arch_kfence_init_pool > __get_vm_area_caller > __get_vm_area_node > ================================= > if (!(flags & VM_NO_GUARD)) > size += PAGE_SIZE; > ================================= > > If we do not set VM_NO_GUARD, we would get one more page as "GUARD". > Setting VM_NO_GUARD is dangerous behavior and I suggest we keep this > page. > > 2. arch_kfence_init_pool > __get_vm_area_caller > __get_vm_area_node !!!This is my comment-- > ======================================= | > if (flags & VM_IOREMAP) | > align = 1ul << clamp_t(int, ... | > *** We got "align==0x200000" here. Based on the default <-- > KFENCE objects of 255, we got the maximum align here. *** > ======================================= > > alloc_vmap_area > __alloc_vmap_area > ================================= > nva_start_addr = ALIGN(vstart, align); > *** When running here, the starting address will be > moved forward one byte due to alignment > requirements. If we do not give enough space, we'll > fail on the next line. *** > > if (nva_start_addr + size > vend) > return vend; > ================================= > > Theoretically, this alignment requires at most 2MB of space. However, > considering that the starting address is fixed (the starting position is > determined by VMEMMAP_END), I think that adding another page will be > enough. OK, that makes sense. Huacai > > Best Regards, > Enze > > >> +#else > >> +#define KFENCE_AREA_SIZE 0 > >> +#endif > >> + > >> /* > >> * TLB refill handlers may also map the vmalloc area into xkvrange. > >> * Avoid the first couple of pages so NULL pointer dereferences will > >> @@ -88,11 +95,16 @@ extern unsigned long zero_page_mask; > >> #define VMALLOC_START MODULES_END > >> #define VMALLOC_END \ > >> (vm_map_base + \ > >> - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) > >> + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE) > >> > >> #define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) > >> #define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) > >> > >> +#ifdef CONFIG_KFENCE > >> +#define KFENCE_AREA_START VMEMMAP_END > >> +#define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE) > >> +#endif > >> + > >> #define pte_ERROR(e) \ > >> pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) > >> #ifndef __PAGETABLE_PMD_FOLDED > >> diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c > >> index da5b6d518cdb..c0319128b221 100644 > >> --- a/arch/loongarch/mm/fault.c > >> +++ b/arch/loongarch/mm/fault.c > >> @@ -23,6 +23,7 @@ > >> #include <linux/kprobes.h> > >> #include <linux/perf_event.h> > >> #include <linux/uaccess.h> > >> +#include <linux/kfence.h> > >> > >> #include <asm/branch.h> > >> #include <asm/mmu_context.h> > >> @@ -30,7 +31,8 @@ > >> > >> int show_unhandled_signals = 1; > >> > >> -static void __kprobes no_context(struct pt_regs *regs, unsigned long address) > >> +static void __kprobes no_context(struct pt_regs *regs, unsigned long address, > >> + unsigned long write) > >> { > >> const int field = sizeof(unsigned long) * 2; > >> > >> @@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address) > >> if (fixup_exception(regs)) > >> return; > >> > >> + if (kfence_handle_page_fault(address, write, regs)) > >> + return; > >> + > >> /* > >> * Oops. The kernel tried to access some bad page. We'll have to > >> * terminate things with extreme prejudice. > >> @@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address) > >> die("Oops", regs); > >> } > >> > >> -static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address) > >> +static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address, > >> + unsigned long write) > >> { > >> /* > >> * We ran out of memory, call the OOM killer, and return the userspace > >> * (which will retry the fault, or kill us if we got oom-killed). > >> */ > >> if (!user_mode(regs)) { > >> - no_context(regs, address); > >> + no_context(regs, address, write); > >> return; > >> } > >> pagefault_out_of_memory(); > >> @@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs, > >> { > >> /* Kernel mode? Handle exceptions or die */ > >> if (!user_mode(regs)) { > >> - no_context(regs, address); > >> + no_context(regs, address, write); > >> return; > >> } > >> > >> @@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs, > >> > >> /* Kernel mode? Handle exceptions or die */ > >> if (!user_mode(regs)) { > >> - no_context(regs, address); > >> + no_context(regs, address, write); > >> return; > >> } > >> > >> @@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, > >> */ > >> if (address & __UA_LIMIT) { > >> if (!user_mode(regs)) > >> - no_context(regs, address); > >> + no_context(regs, address, write); > >> else > >> do_sigsegv(regs, write, address, si_code); > >> return; > >> @@ -211,7 +217,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, > >> > >> if (fault_signal_pending(fault, regs)) { > >> if (!user_mode(regs)) > >> - no_context(regs, address); > >> + no_context(regs, address, write); > >> return; > >> } > >> > >> @@ -232,7 +238,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, > >> if (unlikely(fault & VM_FAULT_ERROR)) { > >> mmap_read_unlock(mm); > >> if (fault & VM_FAULT_OOM) { > >> - do_out_of_memory(regs, address); > >> + do_out_of_memory(regs, address, write); > >> return; > >> } else if (fault & VM_FAULT_SIGSEGV) { > >> do_sigsegv(regs, write, address, si_code); > >> -- > >> 2.34.1 > >> > >> >