From: Jeff Xu <jeffxu@xxxxxxxxxx> This patch enables PKEY_ENFORCE_API for the munmap syscall. Signed-off-by: Jeff Xu<jeffxu@xxxxxxxxxx> --- include/linux/mm.h | 2 +- mm/mmap.c | 34 ++++++++++++++++++++++++++-------- mm/mremap.c | 6 ++++-- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 27ce77080c79..48076e845d53 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3136,7 +3136,7 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long pgoff, unsigned long *populate, struct list_head *uf); extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, - bool downgrade); + bool downgrade, bool syscall); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); diff --git a/mm/mmap.c b/mm/mmap.c index 13678edaa22c..29329aa794a6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2498,6 +2498,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, * @uf: The userfaultfd list_head * @downgrade: set to true if the user wants to attempt to write_downgrade the * mmap_lock + * @syscall: set to true if this is called from syscall entry * * This function takes a @mas that is either pointing to the previous VMA or set * to MA_START and sets it up to remove the mapping(s). The @len will be @@ -2507,7 +2508,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, */ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, - bool downgrade) + bool downgrade, bool syscall) { unsigned long end; struct vm_area_struct *vma; @@ -2519,6 +2520,19 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, if (end == start) return -EINVAL; + /* + * When called by syscall from userspace, check if the calling + * thread has the PKEY permission to modify the memory mapping. + */ + if (syscall && arch_check_pkey_enforce_api(mm, start, end) < 0) { + char comm[TASK_COMM_LEN]; + + pr_warn_ratelimited( + "munmap was denied on PKEY_ENFORCE_API memory, pid=%d '%s'\n", + task_pid_nr(current), get_task_comm(comm, current)); + return -EACCES; + } + /* arch_unmap() might do unmaps itself. */ arch_unmap(mm, start, end); @@ -2541,7 +2555,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, { VMA_ITERATOR(vmi, mm, start); - return do_vmi_munmap(&vmi, mm, start, len, uf, false); + return do_vmi_munmap(&vmi, mm, start, len, uf, false, false); } unsigned long mmap_region(struct file *file, unsigned long addr, @@ -2575,7 +2589,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, } /* Unmap any existing mapping in the area */ - if (do_vmi_munmap(&vmi, mm, addr, len, uf, false)) + if (do_vmi_munmap(&vmi, mm, addr, len, uf, false, false)) return -ENOMEM; /* @@ -2792,7 +2806,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return error; } -static int __vm_munmap(unsigned long start, size_t len, bool downgrade) +/* + * @syscall: set to true if this is called from syscall entry + */ +static int __vm_munmap(unsigned long start, size_t len, bool downgrade, + bool syscall) { int ret; struct mm_struct *mm = current->mm; @@ -2802,7 +2820,7 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade) if (mmap_write_lock_killable(mm)) return -EINTR; - ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade); + ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade, syscall); /* * Returning 1 indicates mmap_lock is downgraded. * But 1 is not legal return value of vm_munmap() and munmap(), reset @@ -2820,14 +2838,14 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade) int vm_munmap(unsigned long start, size_t len) { - return __vm_munmap(start, len, false); + return __vm_munmap(start, len, false, false); } EXPORT_SYMBOL(vm_munmap); SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { addr = untagged_addr(addr); - return __vm_munmap(addr, len, true); + return __vm_munmap(addr, len, true, true); } @@ -3055,7 +3073,7 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) if (ret) goto limits_failed; - ret = do_vmi_munmap(&vmi, mm, addr, len, &uf, 0); + ret = do_vmi_munmap(&vmi, mm, addr, len, &uf, 0, false); if (ret) goto munmap_failed; diff --git a/mm/mremap.c b/mm/mremap.c index b11ce6c92099..768e5bd4e8b5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -703,7 +703,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, } vma_iter_init(&vmi, mm, old_addr); - if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) { + if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false, false) < + 0) { /* OOM: unable to split vma, just get accounts right */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) vm_acct_memory(old_len >> PAGE_SHIFT); @@ -993,7 +994,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, VMA_ITERATOR(vmi, mm, addr + new_len); retval = do_vmi_munmap(&vmi, mm, addr + new_len, - old_len - new_len, &uf_unmap, true); + old_len - new_len, &uf_unmap, true, + false); /* Returning 1 indicates mmap_lock is downgraded to read. */ if (retval == 1) { downgraded = true; -- 2.40.1.606.ga4b1b128d6-goog