On 2024-08-13 8:54 PM, Samuel Holland wrote: > Hi Alex, > > Thanks for the review! > > On 2024-08-13 3:58 AM, Alexandre Ghiti wrote: >> Hi Samuel, >> >> On 25/06/2024 23:09, Samuel Holland wrote: >>> RISC-V supports pointer masking with a variable number of tag bits >>> (which is called "PMLEN" in the specification) and which is configured >>> at the next higher privilege level. >>> >>> Wire up the PR_SET_TAGGED_ADDR_CTRL and PR_GET_TAGGED_ADDR_CTRL prctls >>> so userspace can request a lower bound on the number of tag bits and >>> determine the actual number of tag bits. As with arm64's >>> PR_TAGGED_ADDR_ENABLE, the pointer masking configuration is >>> thread-scoped, inherited on clone() and fork() and cleared on execve(). >>> >>> Signed-off-by: Samuel Holland <samuel.holland@xxxxxxxxxx> >>> --- >>> >>> Changes in v2: >>> - Rebase on riscv/linux.git for-next >>> - Add and use the envcfg_update_bits() helper function >>> - Inline flush_tagged_addr_state() >>> >>> arch/riscv/Kconfig | 11 ++++ >>> arch/riscv/include/asm/processor.h | 8 +++ >>> arch/riscv/include/asm/switch_to.h | 11 ++++ >>> arch/riscv/kernel/process.c | 99 ++++++++++++++++++++++++++++++ >>> include/uapi/linux/prctl.h | 3 + >>> 5 files changed, 132 insertions(+) >>> >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig >>> index b94176e25be1..8f9980f81ea5 100644 >>> --- a/arch/riscv/Kconfig >>> +++ b/arch/riscv/Kconfig >>> @@ -505,6 +505,17 @@ config RISCV_ISA_C >>> If you don't know what to do here, say Y. >>> +config RISCV_ISA_POINTER_MASKING >>> + bool "Smmpm, Smnpm, and Ssnpm extensions for pointer masking" >>> + depends on 64BIT >>> + default y >>> + help >>> + Add support for the pointer masking extensions (Smmpm, Smnpm, >>> + and Ssnpm) when they are detected at boot. >>> + >>> + If this option is disabled, userspace will be unable to use >>> + the prctl(PR_{SET,GET}_TAGGED_ADDR_CTRL) API. >>> + >>> config RISCV_ISA_SVNAPOT >>> bool "Svnapot extension support for supervisor mode NAPOT pages" >>> depends on 64BIT && MMU >>> diff --git a/arch/riscv/include/asm/processor.h >>> b/arch/riscv/include/asm/processor.h >>> index 0838922bd1c8..4f99c85d29ae 100644 >>> --- a/arch/riscv/include/asm/processor.h >>> +++ b/arch/riscv/include/asm/processor.h >>> @@ -194,6 +194,14 @@ extern int set_unalign_ctl(struct task_struct *tsk, >>> unsigned int val); >>> #define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) >>> riscv_set_icache_flush_ctx(arg1, arg2) >>> extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long >>> per_thread); >>> +#ifdef CONFIG_RISCV_ISA_POINTER_MASKING >>> +/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ >>> +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); >>> +long get_tagged_addr_ctrl(struct task_struct *task); >>> +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg) >>> +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) >>> +#endif >>> + >>> #endif /* __ASSEMBLY__ */ >>> #endif /* _ASM_RISCV_PROCESSOR_H */ >>> diff --git a/arch/riscv/include/asm/switch_to.h >>> b/arch/riscv/include/asm/switch_to.h >>> index 9685cd85e57c..94e33216b2d9 100644 >>> --- a/arch/riscv/include/asm/switch_to.h >>> +++ b/arch/riscv/include/asm/switch_to.h >>> @@ -70,6 +70,17 @@ static __always_inline bool has_fpu(void) { return false; } >>> #define __switch_to_fpu(__prev, __next) do { } while (0) >>> #endif >>> +static inline void envcfg_update_bits(struct task_struct *task, >>> + unsigned long mask, unsigned long val) >>> +{ >>> + unsigned long envcfg; >>> + >>> + envcfg = (task->thread.envcfg & ~mask) | val; >>> + task->thread.envcfg = envcfg; >>> + if (task == current) >>> + csr_write(CSR_ENVCFG, envcfg); >>> +} >>> + >>> static inline void __switch_to_envcfg(struct task_struct *next) >>> { >>> asm volatile (ALTERNATIVE("nop", "csrw " __stringify(CSR_ENVCFG) ", %0", >>> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c >>> index e4bc61c4e58a..dec5ccc44697 100644 >>> --- a/arch/riscv/kernel/process.c >>> +++ b/arch/riscv/kernel/process.c >>> @@ -7,6 +7,7 @@ >>> * Copyright (C) 2017 SiFive >>> */ >>> +#include <linux/bitfield.h> >>> #include <linux/cpu.h> >>> #include <linux/kernel.h> >>> #include <linux/sched.h> >>> @@ -171,6 +172,10 @@ void flush_thread(void) >>> memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); >>> clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); >>> #endif >>> +#ifdef CONFIG_RISCV_ISA_POINTER_MASKING >>> + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) >>> + envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0); >>> +#endif >> >> if (IS_ENABLED(CONFIG_RISCV_ISA_POINTER_MASKING) && >> riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) > > I will update this. > >>> } >>> void arch_release_task_struct(struct task_struct *tsk) >>> @@ -233,3 +238,97 @@ void __init arch_task_cache_init(void) >>> { >>> riscv_v_setup_ctx_cache(); >>> } >>> + >>> +#ifdef CONFIG_RISCV_ISA_POINTER_MASKING >>> +static bool have_user_pmlen_7; >>> +static bool have_user_pmlen_16; >>> + >>> +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) >>> +{ >>> + unsigned long valid_mask = PR_PMLEN_MASK; >>> + struct thread_info *ti = task_thread_info(task); >>> + unsigned long pmm; >>> + u8 pmlen; >>> + >>> + if (is_compat_thread(ti)) >>> + return -EINVAL; >>> + >>> + if (arg & ~valid_mask) >>> + return -EINVAL; >>> + >>> + pmlen = FIELD_GET(PR_PMLEN_MASK, arg); >>> + if (pmlen > 16) { >>> + return -EINVAL; >>> + } else if (pmlen > 7) { >>> + if (have_user_pmlen_16) >>> + pmlen = 16; >>> + else >>> + return -EINVAL; >>> + } else if (pmlen > 0) { >>> + /* >>> + * Prefer the smallest PMLEN that satisfies the user's request, >>> + * in case choosing a larger PMLEN has a performance impact. >>> + */ >>> + if (have_user_pmlen_7) >>> + pmlen = 7; >>> + else if (have_user_pmlen_16) >>> + pmlen = 16; >>> + else >>> + return -EINVAL; >>> + } >>> + >>> + if (pmlen == 7) >>> + pmm = ENVCFG_PMM_PMLEN_7; >>> + else if (pmlen == 16) >>> + pmm = ENVCFG_PMM_PMLEN_16; >>> + else >>> + pmm = ENVCFG_PMM_PMLEN_0; >>> + >>> + envcfg_update_bits(task, ENVCFG_PMM, pmm); >>> + >>> + return 0; >>> +} >>> + >>> +long get_tagged_addr_ctrl(struct task_struct *task) >>> +{ >>> + struct thread_info *ti = task_thread_info(task); >>> + long ret = 0; >>> + >>> + if (is_compat_thread(ti)) >>> + return -EINVAL; >>> + >>> + switch (task->thread.envcfg & ENVCFG_PMM) { >>> + case ENVCFG_PMM_PMLEN_7: >>> + ret |= FIELD_PREP(PR_PMLEN_MASK, 7); >>> + break; >>> + case ENVCFG_PMM_PMLEN_16: >>> + ret |= FIELD_PREP(PR_PMLEN_MASK, 16); >>> + break; >>> + } >> >> >> No need for the |= > > This is used in the next patch since the returned value may include > PR_TAGGED_ADDR_ENABLE as well, but it's not needed here, so I will make this change. > >>> + >>> + return ret; >>> +} >> >> >> In all the code above, I'd use a macro for 7 and 16, something like PMLEN[7|16]? > > I've done this using an enum in v4. Please let me know if it looks good to you. Obviously I meant to say v3 here. >>> + >>> +static bool try_to_set_pmm(unsigned long value) >>> +{ >>> + csr_set(CSR_ENVCFG, value); >>> + return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; >>> +} >>> + >>> +static int __init tagged_addr_init(void) >>> +{ >>> + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) >>> + return 0; >>> + >>> + /* >>> + * envcfg.PMM is a WARL field. Detect which values are supported. >>> + * Assume the supported PMLEN values are the same on all harts. >>> + */ >>> + csr_clear(CSR_ENVCFG, ENVCFG_PMM); >>> + have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); >>> + have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); >> >> >> Shouldn't this depend on the satp mode? sv57 does not allow 16bits for the tag. > > No, late last year the pointer masking spec was changed so that the valid values > for PMM can no longer dynamically depend on satp.MODE. If an implementation > chooses to support both Sv57 and PMLEN==16, then it does so by masking off some > of the valid bits in the virtual address. (This is a valid if unusual use case > considering that pointer masking does not apply to instruction fetches, so an > application could place code at addresses above 2^47-1 and use the whole masked > virtual address space for data. Or it could enable pointer masking for only > certain threads, and those threads would be limited to a subset of data.) > >>> + >>> + return 0; >>> +} >>> +core_initcall(tagged_addr_init); >> >> >> Any reason it's not called from setup_arch()? I see the vector extension does >> the same; just wondering if we should not centralize all this early extensions >> decisions in setup_arch() (in my Zacas series, I choose the spinlock >> implementation in setup_arch()). Forgot to reply: no special reason, I copied this part of the code from arm64. This code doesn't need to be called especially early; the only requirement is that it runs before userspace starts. One advantage of core_initcall() is that it happens after SMP bringup, so this way will have less impact on boot time. Regards, Samuel >>> +#endif /* CONFIG_RISCV_ISA_POINTER_MASKING */ >>> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h >>> index 35791791a879..6e84c827869b 100644 >>> --- a/include/uapi/linux/prctl.h >>> +++ b/include/uapi/linux/prctl.h >>> @@ -244,6 +244,9 @@ struct prctl_mm_map { >>> # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) >>> /* Unused; kept only for source compatibility */ >>> # define PR_MTE_TCF_SHIFT 1 >>> +/* RISC-V pointer masking tag length */ >>> +# define PR_PMLEN_SHIFT 24 >>> +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) >> >> >> I don't understand the need for this shift, can't userspace pass the pmlen value >> directly without worrying about this? > > No, because the PR_TAGGED_ADDR_ENABLE flag (bit 0, defined just a few lines > above) is part of the the same argument word. It's just not used until the next > patch. > > Regards, > Samuel >