Apply the unprivileged_userfaultfd check when doing userfaultfd syscall. We didn't check it in other paths of userfaultfd (e.g., the ioctl() path) because we don't want to drag down the fast path of userfaultfd, as suggested by Andrea. Suggested-by: Andrea Arcangeli <aarcange@xxxxxxxxxx> Suggested-by: Mike Rapoport <rppt@xxxxxxxxxxxxxxxxxx> Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> --- fs/userfaultfd.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index c2188464555a..effdcfc88629 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -951,6 +951,28 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) } } +/* Whether current process allows to use userfaultfd syscalls */ +static bool userfaultfd_allowed(void) +{ + bool allowed = false; + + switch (unprivileged_userfaultfd) { + case UFFD_UNPRIV_ENABLED: + allowed = true; + break; + case UFFD_UNPRIV_KVM: + allowed = !!test_bit(MMF_USERFAULTFD_ALLOW, + ¤t->mm->flags); + /* Fall through */ + case UFFD_UNPRIV_DISABLED: + allowed = allowed || ns_capable(current_user_ns(), + CAP_SYS_PTRACE); + break; + } + + return allowed; +} + static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; @@ -2018,6 +2040,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK); + if (!userfaultfd_allowed()) + return -EPERM; + if (flags & ~UFFD_SHARED_FCNTL_FLAGS) return -EINVAL; -- 2.17.1