Re: [PATCH 28/43] xfs: implement direct writes to zoned RT devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 11, 2024 at 09:54:53AM +0100, Christoph Hellwig wrote:
> Direct writes to zoned RT devices are extremely simple.  After taking the
> block reservation before acquiring the iolock, the iomap direct I/O calls
> into ->iomap_begin which will return a "fake" iomap for the entire
> requested range.  The actual block allocation is then done from the
> submit_io handler using code shared with the buffered I/O path.
> 
> The iomap_dio_ops set the bio_set to the (iomap) ioend one and initialize
> the embedded ioend, which allows reusing the existing ioend based buffered
> I/O completion path.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Yeah that is a lot simpler. :)
Reviewed-by: "Darrick J. Wong" <djwong@xxxxxxxxxx>

--D

> ---
>  fs/xfs/xfs_aops.c  |  6 ++--
>  fs/xfs/xfs_aops.h  |  3 +-
>  fs/xfs/xfs_file.c  | 80 +++++++++++++++++++++++++++++++++++++++++-----
>  fs/xfs/xfs_iomap.c | 54 +++++++++++++++++++++++++++++++
>  fs/xfs/xfs_iomap.h |  1 +
>  5 files changed, 133 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 67392413216b..a3ca14e811fd 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -137,7 +137,9 @@ xfs_end_ioend(
>  	else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
>  		error = xfs_iomap_write_unwritten(ip, offset, size, false);
>  
> -	if (!error && xfs_ioend_is_append(ioend))
> +	if (!error &&
> +	    !(ioend->io_flags & IOMAP_IOEND_DIRECT) &&
> +	    xfs_ioend_is_append(ioend))
>  		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
>  done:
>  	iomap_finish_ioends(ioend, error);
> @@ -182,7 +184,7 @@ xfs_end_io(
>  	}
>  }
>  
> -static void
> +void
>  xfs_end_bio(
>  	struct bio		*bio)
>  {
> diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> index e0bd68419764..5a7a0f1a0b49 100644
> --- a/fs/xfs/xfs_aops.h
> +++ b/fs/xfs/xfs_aops.h
> @@ -9,6 +9,7 @@
>  extern const struct address_space_operations xfs_address_space_operations;
>  extern const struct address_space_operations xfs_dax_aops;
>  
> -int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
> +int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
> +void xfs_end_bio(struct bio *bio);
>  
>  #endif /* __XFS_AOPS_H__ */
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 195cf60a81b0..1b39000b7c62 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -25,6 +25,7 @@
>  #include "xfs_iomap.h"
>  #include "xfs_reflink.h"
>  #include "xfs_file.h"
> +#include "xfs_aops.h"
>  #include "xfs_zone_alloc.h"
>  
>  #include <linux/dax.h>
> @@ -548,6 +549,9 @@ xfs_dio_write_end_io(
>  	loff_t			offset = iocb->ki_pos;
>  	unsigned int		nofs_flag;
>  
> +	ASSERT(!xfs_is_zoned_inode(ip) ||
> +	       !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
> +
>  	trace_xfs_end_io_direct_write(ip, offset, size);
>  
>  	if (xfs_is_shutdown(ip->i_mount))
> @@ -627,14 +631,51 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
>  	.end_io		= xfs_dio_write_end_io,
>  };
>  
> +static void
> +xfs_dio_zoned_submit_io(
> +	const struct iomap_iter	*iter,
> +	struct bio		*bio,
> +	loff_t			file_offset)
> +{
> +	struct xfs_mount	*mp = XFS_I(iter->inode)->i_mount;
> +	struct xfs_zone_alloc_ctx *ac = iter->private;
> +	xfs_filblks_t		count_fsb;
> +	struct iomap_ioend	*ioend;
> +
> +	count_fsb = XFS_B_TO_FSB(mp, bio->bi_iter.bi_size);
> +	if (count_fsb > ac->reserved_blocks) {
> +		xfs_err(mp,
> +"allocation (%lld) larger than reservation (%lld).",
> +			count_fsb, ac->reserved_blocks);
> +		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
> +		bio_io_error(bio);
> +		return;
> +	}
> +	ac->reserved_blocks -= count_fsb;
> +
> +	bio->bi_end_io = xfs_end_bio;
> +	ioend = iomap_init_ioend(iter->inode, bio, file_offset,
> +			IOMAP_IOEND_DIRECT);
> +	xfs_zone_alloc_and_submit(ioend, &ac->open_zone);
> +}
> +
> +static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
> +	.bio_set	= &iomap_ioend_bioset,
> +	.submit_io	= xfs_dio_zoned_submit_io,
> +	.end_io		= xfs_dio_write_end_io,
> +};
> +
>  /*
> - * Handle block aligned direct I/O writes
> + * Handle block aligned direct I/O writes.
>   */
>  static noinline ssize_t
>  xfs_file_dio_write_aligned(
>  	struct xfs_inode	*ip,
>  	struct kiocb		*iocb,
> -	struct iov_iter		*from)
> +	struct iov_iter		*from,
> +	const struct iomap_ops	*ops,
> +	const struct iomap_dio_ops *dops,
> +	struct xfs_zone_alloc_ctx *ac)
>  {
>  	unsigned int		iolock = XFS_IOLOCK_SHARED;
>  	ssize_t			ret;
> @@ -642,7 +683,7 @@ xfs_file_dio_write_aligned(
>  	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
>  	if (ret)
>  		return ret;
> -	ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
> +	ret = xfs_file_write_checks(iocb, from, &iolock, ac);
>  	if (ret)
>  		goto out_unlock;
>  
> @@ -656,11 +697,31 @@ xfs_file_dio_write_aligned(
>  		iolock = XFS_IOLOCK_SHARED;
>  	}
>  	trace_xfs_file_direct_write(iocb, from);
> -	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
> -			   &xfs_dio_write_ops, 0, NULL, 0);
> +	ret = iomap_dio_rw(iocb, from, ops, dops, 0, ac, 0);
>  out_unlock:
> -	if (iolock)
> -		xfs_iunlock(ip, iolock);
> +	xfs_iunlock(ip, iolock);
> +	return ret;
> +}
> +
> +/*
> + * Handle block aligned direct I/O writes to zoned devices.
> + */
> +static noinline ssize_t
> +xfs_file_dio_write_zoned(
> +	struct xfs_inode	*ip,
> +	struct kiocb		*iocb,
> +	struct iov_iter		*from)
> +{
> +	struct xfs_zone_alloc_ctx ac = { };
> +	ssize_t			ret;
> +
> +	ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac);
> +	if (ret < 0)
> +		return ret;
> +	ret = xfs_file_dio_write_aligned(ip, iocb, from,
> +			&xfs_zoned_direct_write_iomap_ops,
> +			&xfs_dio_zoned_write_ops, &ac);
> +	xfs_zoned_space_unreserve(ip, &ac);
>  	return ret;
>  }
>  
> @@ -777,7 +838,10 @@ xfs_file_dio_write(
>  	    (xfs_is_always_cow_inode(ip) &&
>  	     (iov_iter_alignment(from) & ip->i_mount->m_blockmask)))
>  		return xfs_file_dio_write_unaligned(ip, iocb, from);
> -	return xfs_file_dio_write_aligned(ip, iocb, from);
> +	if (xfs_is_zoned_inode(ip))
> +		return xfs_file_dio_write_zoned(ip, iocb, from);
> +	return xfs_file_dio_write_aligned(ip, iocb, from,
> +			&xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
>  }
>  
>  static noinline ssize_t
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 402b253ce3a2..9626632883d0 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -965,6 +965,60 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
>  	.iomap_begin		= xfs_direct_write_iomap_begin,
>  };
>  
> +#ifdef CONFIG_XFS_RT
> +/*
> + * This is really simple.  The space has already been reserved before taking the
> + * IOLOCK, the actual block allocation is done just before submitting the bio
> + * and only recorded in the extent map on I/O completion.
> + */
> +static int
> +xfs_zoned_direct_write_iomap_begin(
> +	struct inode		*inode,
> +	loff_t			offset,
> +	loff_t			length,
> +	unsigned		flags,
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
> +{
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	int			error;
> +
> +	ASSERT(!(flags & IOMAP_OVERWRITE_ONLY));
> +
> +	/*
> +	 * Needs to be pushed down into the allocator so that only writes into
> +	 * a single zone can be supported.
> +	 */
> +	if (flags & IOMAP_NOWAIT)
> +		return -EAGAIN;
> +
> +	/*
> +	 * Ensure the extent list is in memory in so that we don't have to do
> +	 * read it from the I/O completion handler.
> +	 */
> +	if (xfs_need_iread_extents(&ip->i_df)) {
> +		xfs_ilock(ip, XFS_ILOCK_EXCL);
> +		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> +		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +		if (error)
> +			return error;
> +	}
> +
> +	iomap->type = IOMAP_MAPPED;
> +	iomap->flags = IOMAP_F_DIRTY;
> +	iomap->bdev = ip->i_mount->m_rtdev_targp->bt_bdev;
> +	iomap->offset = offset;
> +	iomap->length = length;
> +	iomap->flags = IOMAP_F_ZONE_APPEND;
> +	iomap->addr = 0;
> +	return 0;
> +}
> +
> +const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
> +	.iomap_begin		= xfs_zoned_direct_write_iomap_begin,
> +};
> +#endif /* CONFIG_XFS_RT */
> +
>  static int
>  xfs_dax_write_iomap_end(
>  	struct inode		*inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index bc8a00cad854..d330c4a581b1 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -51,6 +51,7 @@ xfs_aligned_fsb_count(
>  
>  extern const struct iomap_ops xfs_buffered_write_iomap_ops;
>  extern const struct iomap_ops xfs_direct_write_iomap_ops;
> +extern const struct iomap_ops xfs_zoned_direct_write_iomap_ops;
>  extern const struct iomap_ops xfs_read_iomap_ops;
>  extern const struct iomap_ops xfs_seek_iomap_ops;
>  extern const struct iomap_ops xfs_xattr_iomap_ops;
> -- 
> 2.45.2
> 
> 




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux