On Mon, May 15, 2023 at 01:05:51PM +0000, jeffxu@xxxxxxxxxxxx wrote: > From: Jeff Xu <jeffxu@xxxxxxxxxx> > > This patch enables PKEY_ENFORCE_API for the munmap > syscall. > > Signed-off-by: Jeff Xu<jeffxu@xxxxxxxxxx> > --- > include/linux/mm.h | 2 +- > mm/mmap.c | 34 ++++++++++++++++++++++++++-------- > mm/mremap.c | 6 ++++-- > 3 files changed, 31 insertions(+), 11 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 27ce77080c79..48076e845d53 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3136,7 +3136,7 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, > unsigned long pgoff, unsigned long *populate, struct list_head *uf); > extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > unsigned long start, size_t len, struct list_head *uf, > - bool downgrade); > + bool downgrade, bool syscall); For type checking and readability, I suggest using an enum instead of "bool". Perhaps something like: enum caller_origin { ON_BEHALF_OF_KERNEL = 0, ON_BEHALF_OF_USERSPACE, }; extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, - bool downgrade); + bool downgrade, enum caller_origin called); > extern int do_munmap(struct mm_struct *, unsigned long, size_t, > struct list_head *uf); > extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); > diff --git a/mm/mmap.c b/mm/mmap.c > index 13678edaa22c..29329aa794a6 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -2498,6 +2498,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > * @uf: The userfaultfd list_head > * @downgrade: set to true if the user wants to attempt to write_downgrade the > * mmap_lock > + * @syscall: set to true if this is called from syscall entry > * > * This function takes a @mas that is either pointing to the previous VMA or set > * to MA_START and sets it up to remove the mapping(s). The @len will be > @@ -2507,7 +2508,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > */ > int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > unsigned long start, size_t len, struct list_head *uf, > - bool downgrade) > + bool downgrade, bool syscall) > { > unsigned long end; > struct vm_area_struct *vma; > @@ -2519,6 +2520,19 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, > if (end == start) > return -EINVAL; > > + /* > + * When called by syscall from userspace, check if the calling > + * thread has the PKEY permission to modify the memory mapping. > + */ > + if (syscall && arch_check_pkey_enforce_api(mm, start, end) < 0) { if (called == ON_BEHALF_OF_USERSPACE && arch_check_pkey_enforce_api(mm, start, end) < 0) { > + char comm[TASK_COMM_LEN]; > + > + pr_warn_ratelimited( > + "munmap was denied on PKEY_ENFORCE_API memory, pid=%d '%s'\n", > + task_pid_nr(current), get_task_comm(comm, current)); > + return -EACCES; > + } > + > /* arch_unmap() might do unmaps itself. */ > arch_unmap(mm, start, end); > > @@ -2541,7 +2555,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, > { > VMA_ITERATOR(vmi, mm, start); > > - return do_vmi_munmap(&vmi, mm, start, len, uf, false); > + return do_vmi_munmap(&vmi, mm, start, len, uf, false, false); + return do_vmi_munmap(&vmi, mm, start, len, uf, false, ON_BEHALF_OF_KERNEL); > [...] > SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) > { > addr = untagged_addr(addr); > - return __vm_munmap(addr, len, true); > + return __vm_munmap(addr, len, true, true); + return __vm_munmap(addr, len, true, ON_BEHALF_OF_USERSPACE); etc. -- Kees Cook