On Tue, 2022-12-27 at 06:08 +0300, Kirill A. Shutemov wrote: > Add a couple of arch_prctl() handles: > > - ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required > number > of tag bits. It is rounded up to the nearest LAM mode that can > provide it. For now only LAM_U57 is supported, with 6 tag bits. > > - ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag > bits located in the address. > > - ARCH_GET_MAX_TAG_BITS returns the maximum tag bits user can > request. > Zero if LAM is not supported. > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/mmu.h | 2 ++ > arch/x86/include/uapi/asm/prctl.h | 4 +++ > arch/x86/kernel/process.c | 3 +++ > arch/x86/kernel/process_64.c | 44 > ++++++++++++++++++++++++++++++- > 4 files changed, 52 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h > index ed72fcd2292d..54e4a3e9b5c5 100644 > --- a/arch/x86/include/asm/mmu.h > +++ b/arch/x86/include/asm/mmu.h > @@ -12,6 +12,8 @@ > #define MM_CONTEXT_UPROBE_IA32 0 > /* vsyscall page is accessible on this MM */ > #define MM_CONTEXT_HAS_VSYSCALL 1 > +/* Do not allow changing LAM mode */ > +#define MM_CONTEXT_LOCK_LAM 2 > > /* > * x86 has arch-specific MMU state beyond what lives in mm_struct. > diff --git a/arch/x86/include/uapi/asm/prctl.h > b/arch/x86/include/uapi/asm/prctl.h > index 500b96e71f18..a31e27b95b19 100644 > --- a/arch/x86/include/uapi/asm/prctl.h > +++ b/arch/x86/include/uapi/asm/prctl.h > @@ -20,4 +20,8 @@ > #define ARCH_MAP_VDSO_32 0x2002 > #define ARCH_MAP_VDSO_64 0x2003 > > +#define ARCH_GET_UNTAG_MASK 0x4001 > +#define ARCH_ENABLE_TAGGED_ADDR 0x4002 > +#define ARCH_GET_MAX_TAG_BITS 0x4003 > + > #endif /* _ASM_X86_PRCTL_H */ > diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c > index ef6bde1d40d8..cc0677f58f42 100644 > --- a/arch/x86/kernel/process.c > +++ b/arch/x86/kernel/process.c > @@ -162,6 +162,9 @@ int copy_thread(struct task_struct *p, const > struct kernel_clone_args *args) > > savesegment(es, p->thread.es); > savesegment(ds, p->thread.ds); > + > + if (p->mm && (clone_flags & (CLONE_VM | CLONE_VFORK)) == > CLONE_VM) > + set_bit(MM_CONTEXT_LOCK_LAM, &p->mm->context.flags); > #else > p->thread.sp0 = (unsigned long) (childregs + 1); > savesegment(gs, p->thread.gs); > diff --git a/arch/x86/kernel/process_64.c > b/arch/x86/kernel/process_64.c > index 8b06034e8c70..fef127ed79b6 100644 > --- a/arch/x86/kernel/process_64.c > +++ b/arch/x86/kernel/process_64.c > @@ -743,6 +743,39 @@ static long prctl_map_vdso(const struct > vdso_image *image, unsigned long addr) > } > #endif > > +#define LAM_U57_BITS 6 > + > +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned > long nr_bits) > +{ > + if (!cpu_feature_enabled(X86_FEATURE_LAM)) > + return -ENODEV; > + > + if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) > + return -EBUSY; Since this bit doesn't get set on vfork, you might be able to work around this by enabling LAM in a vforked task, then enabling it again after returning to the parent. But why do we need this check anyway? > + > + if (mmap_write_lock_killable(mm)) > + return -EINTR; > + > + if (!nr_bits) { > + mmap_write_unlock(mm); > + return -EINVAL; > + } else if (nr_bits <= LAM_U57_BITS) { > + mm->context.lam_cr3_mask = X86_CR3_LAM_U57; > + mm->context.untag_mask = ~GENMASK(62, 57); > + } else { > + mmap_write_unlock(mm); > + return -EINVAL; > + } > + > + write_cr3(__read_cr3() | mm->context.lam_cr3_mask); mm might not be from the current task if it came from PTRACE_ARCH_PRCTL, so then this would write to the wrong CR3. Maybe for simplicity just return an error if task != current. > + set_tlbstate_cr3_lam_mask(mm->context.lam_cr3_mask); > + set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags); > + > + mmap_write_unlock(mm); > + > + return 0; > +} > + > long do_arch_prctl_64(struct task_struct *task, int option, unsigned > long arg2) > { > int ret = 0; > @@ -830,7 +863,16 @@ long do_arch_prctl_64(struct task_struct *task, > int option, unsigned long arg2) > case ARCH_MAP_VDSO_64: > return prctl_map_vdso(&vdso_image_64, arg2); > #endif > - > + case ARCH_GET_UNTAG_MASK: > + return put_user(task->mm->context.untag_mask, > + (unsigned long __user *)arg2); > + case ARCH_ENABLE_TAGGED_ADDR: > + return prctl_enable_tagged_addr(task->mm, arg2); > + case ARCH_GET_MAX_TAG_BITS: > + if (!cpu_feature_enabled(X86_FEATURE_LAM)) > + return put_user(0, (unsigned long __user > *)arg2); > + else > + return put_user(LAM_U57_BITS, (unsigned long > __user *)arg2); > default: > ret = -EINVAL; > break;