userfaultfd handles page faults from both user and kernel code. Add a new UFFD_USER_MODE_ONLY flag for userfaultfd(2) that makes the resulting userfaultfd object refuse to handle faults from kernel mode, treating these faults as if SIGBUS were always raised, causing the kernel code to fail with EFAULT. A future patch adds a knob allowing administrators to give some processes the ability to create userfaultfd file objects only if they pass UFFD_USER_MODE_ONLY, reducing the likelihood that these processes will exploit userfaultfd's ability to delay kernel page faults to open timing windows for future exploits. Signed-off-by: Daniel Colascione <dancol@xxxxxxxxxx> --- fs/userfaultfd.c | 5 ++++- include/uapi/linux/userfaultfd.h | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1123089c3d55..986d23b2cd33 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -389,6 +389,9 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (ctx->features & UFFD_FEATURE_SIGBUS) goto out; + if ((vmf->flags & FAULT_FLAG_USER) == 0 && + ctx->flags & UFFD_USER_MODE_ONLY) + goto out; /* * If it's already released don't get it. This avoids to loop @@ -1959,7 +1962,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) { struct userfaultfd_ctx *ctx; int fd; - static const int uffd_flags = UFFD_SECURE; + static const int uffd_flags = UFFD_SECURE | UFFD_USER_MODE_ONLY; if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE)) return -EPERM; diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 12d7d40d7f25..eadd1497e7b5 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -239,4 +239,10 @@ struct uffdio_zeropage { * Create a userfaultfd with MAC security checks enabled. */ #define UFFD_SECURE 1 + +/* + * Create a userfaultfd that can handle page faults only in user mode. + */ +#define UFFD_USER_MODE_ONLY 2 + #endif /* _LINUX_USERFAULTFD_H */ -- 2.23.0.700.g56cf767bdb-goog