From: Nadav Amit <namit@xxxxxxxxxx> Using io-uring with userfaultfd for reads can lead upon a fork event to the installation of the userfaultfd file descriptor on the worker kernel thread instead of the process that initiated the read. io-uring assumes that no new file descriptors are installed during read. As a result the controlling process would not be able to access the new forked process userfaultfd file descriptor. To solve this problem, Save the files_struct of the process that initiated userfaultfd syscall in the context and reload it when needed. Cc: Jens Axboe <axboe@xxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: io-uring@xxxxxxxxxxxxxxx Cc: linux-fsdevel@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx Fixes: 2b188cc1bb85 ("Add io_uring IO interface") Signed-off-by: Nadav Amit <namit@xxxxxxxxxx> --- fs/userfaultfd.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index c8ed4320370e..4fe07c1a44c6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -27,6 +27,7 @@ #include <linux/ioctl.h> #include <linux/security.h> #include <linux/hugetlb.h> +#include <linux/fdtable.h> int sysctl_unprivileged_userfaultfd __read_mostly = 1; @@ -76,6 +77,8 @@ struct userfaultfd_ctx { bool mmap_changing; /* mm with one ore more vmas attached to this userfaultfd_ctx */ struct mm_struct *mm; + /* controlling process files as they might be different than current */ + struct files_struct *files; }; struct userfaultfd_fork_ctx { @@ -173,6 +176,7 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx) VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock)); VM_BUG_ON(waitqueue_active(&ctx->fd_wqh)); mmdrop(ctx->mm); + put_files_struct(ctx->files); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } } @@ -666,6 +670,8 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) ctx->mm = vma->vm_mm; mmgrab(ctx->mm); + ctx->files = octx->files; + atomic_inc(&ctx->files->count); userfaultfd_ctx_get(octx); WRITE_ONCE(octx->mmap_changing, true); fctx->orig = octx; @@ -976,10 +982,32 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *ctx, struct userfaultfd_ctx *new, struct uffd_msg *msg) { + struct files_struct *files = NULL; int fd; + BUG_ON(new->files == NULL); + + /* + * This function can be called from another context than the controlling + * process, for instance, for an io-uring submission kernel thread. If + * that is the case we must ensure the correct files are being used. + */ + if (current->files != new->files) { + task_lock(current); + files = current->files; + current->files = new->files; + task_unlock(current); + } + fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, new, O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS)); + + if (files != NULL) { + task_lock(current); + current->files = files; + task_unlock(current); + } + if (fd < 0) return fd; @@ -1986,6 +2014,8 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) /* prevent the mm struct to be freed */ mmgrab(ctx->mm); + ctx->files = get_files_struct(current); + fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); if (fd < 0) { -- 2.25.1