Re: [PATCH 2/3] userfaultfd: convert to ->read_iter()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 02, 2024 at 02:18:22PM -0600, Jens Axboe wrote:
> Rather than use the older style ->read() hook, use ->read_iter() so that
> userfaultfd can support both O_NONBLOCK and IOCB_NOWAIT for non-blocking
> read attempts.
> 
> Split the fd setup into two parts, so that userfaultfd can mark the file
> mode with FMODE_NOWAIT before installing it into the process table. With
> that, we can also defer grabbing the mm until we know the rest will
> succeed, as the fd isn't visible before then.
> 
> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
>  fs/userfaultfd.c | 42 ++++++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 60dcfafdc11a..7864c2dba858 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -282,7 +282,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
>  /*
>   * Verify the pagetables are still not ok after having reigstered into
>   * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
> - * userfault that has already been resolved, if userfaultfd_read and
> + * userfault that has already been resolved, if userfaultfd_read_iter and
>   * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
>   * threads.
>   */
> @@ -1177,34 +1177,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
>  	return ret;
>  }
>  
> -static ssize_t userfaultfd_read(struct file *file, char __user *buf,
> -				size_t count, loff_t *ppos)
> +static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  {
> +	struct file *file = iocb->ki_filp;
>  	struct userfaultfd_ctx *ctx = file->private_data;
>  	ssize_t _ret, ret = 0;
>  	struct uffd_msg msg;
> -	int no_wait = file->f_flags & O_NONBLOCK;
>  	struct inode *inode = file_inode(file);
> +	bool no_wait;
>  
>  	if (!userfaultfd_is_initialized(ctx))
>  		return -EINVAL;
>  
> +	no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
>  	for (;;) {
> -		if (count < sizeof(msg))
> +		if (iov_iter_count(to) < sizeof(msg))
>  			return ret ? ret : -EINVAL;
>  		_ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode);
>  		if (_ret < 0)
>  			return ret ? ret : _ret;
> -		if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg)))
> +		_ret = copy_to_iter(&msg, sizeof(msg), to);
> +		if (_ret < 0)
>  			return ret ? ret : -EFAULT;
>  		ret += sizeof(msg);
> -		buf += sizeof(msg);
> -		count -= sizeof(msg);
>  		/*
>  		 * Allow to read more than one fault at time but only
>  		 * block if waiting for the very first one.
>  		 */
> -		no_wait = O_NONBLOCK;
> +		no_wait = true;
>  	}
>  }
>  
> @@ -2172,7 +2172,7 @@ static const struct file_operations userfaultfd_fops = {
>  #endif
>  	.release	= userfaultfd_release,
>  	.poll		= userfaultfd_poll,
> -	.read		= userfaultfd_read,
> +	.read_iter	= userfaultfd_read_iter,
>  	.unlocked_ioctl = userfaultfd_ioctl,
>  	.compat_ioctl	= compat_ptr_ioctl,
>  	.llseek		= noop_llseek,
> @@ -2192,6 +2192,7 @@ static void init_once_userfaultfd_ctx(void *mem)
>  static int new_userfaultfd(int flags)
>  {
>  	struct userfaultfd_ctx *ctx;
> +	struct file *file;
>  	int fd;
>  
>  	BUG_ON(!current->mm);
> @@ -2215,16 +2216,25 @@ static int new_userfaultfd(int flags)
>  	init_rwsem(&ctx->map_changing_lock);
>  	atomic_set(&ctx->mmap_changing, 0);
>  	ctx->mm = current->mm;
> -	/* prevent the mm struct to be freed */
> -	mmgrab(ctx->mm);
> +
> +	fd = get_unused_fd_flags(O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS));
> +	if (fd < 0)
> +		goto err_out;
>  
>  	/* Create a new inode so that the LSM can block the creation.  */
> -	fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
> +	file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
>  			O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
> -	if (fd < 0) {
> -		mmdrop(ctx->mm);
> -		kmem_cache_free(userfaultfd_ctx_cachep, ctx);
> +	if (IS_ERR(file)) {
> +		fd = PTR_ERR(file);
> +		goto err_out;

You're leaking the fd you allocated above.

>  	}
> +	/* prevent the mm struct to be freed */
> +	mmgrab(ctx->mm);
> +	file->f_mode |= FMODE_NOWAIT;
> +	fd_install(fd, file);
> +	return fd;
> +err_out:
> +	kmem_cache_free(userfaultfd_ctx_cachep, ctx);
>  	return fd;
>  }
>  
> -- 
> 2.43.0
> 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux