Re: [PATCH 09/13] iomap: don't chain bios

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Nov 26, 2023 at 01:47:16PM +0100, Christoph Hellwig wrote:
> Back in the days when a single bio could only be filled to the hardware
> limits, and we scheduled a work item for each bio completion, chaining
> multiple bios for a single ioend made a lot of sense to reduce the number
> of completions.  But these days bios can be filled until we reach the
> number of vectors or total size limit, which means we can always fit at
> least 1 megabyte worth of data in the worst case, but usually a lot more
> due to large folios.  The only thing bio chaining is buying us now is
> to reduce the size of the allocation from an ioend with an embedded bio
> into a plain bio, which is a 52 bytes differences on 64-bit systems.
> 
> This is not worth the added complexity, so remove the bio chaining and
> only use the bio embedded into the ioend.  This will help to simplify
> further changes to the iomap writeback code.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Well that's a nice code deflation!  And if I read this correctly,
instead of chaining bios together, now we create a new ioend and add it
to the ioend list, eventually submitting the entire list of them?

If so,

Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>

--D

> ---
>  fs/iomap/buffered-io.c | 90 +++++++++++-------------------------------
>  fs/xfs/xfs_aops.c      |  6 +--
>  include/linux/iomap.h  |  8 +++-
>  3 files changed, 32 insertions(+), 72 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 329e2c342f1c64..17760f3e67c61e 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -1489,40 +1489,23 @@ static u32
>  iomap_finish_ioend(struct iomap_ioend *ioend, int error)
>  {
>  	struct inode *inode = ioend->io_inode;
> -	struct bio *bio = &ioend->io_inline_bio;
> -	struct bio *last = ioend->io_bio, *next;
> -	u64 start = bio->bi_iter.bi_sector;
> -	loff_t offset = ioend->io_offset;
> -	bool quiet = bio_flagged(bio, BIO_QUIET);
> +	struct bio *bio = &ioend->io_bio;
> +	struct folio_iter fi;
>  	u32 folio_count = 0;
>  
> -	for (bio = &ioend->io_inline_bio; bio; bio = next) {
> -		struct folio_iter fi;
> -
> -		/*
> -		 * For the last bio, bi_private points to the ioend, so we
> -		 * need to explicitly end the iteration here.
> -		 */
> -		if (bio == last)
> -			next = NULL;
> -		else
> -			next = bio->bi_private;
> -
> -		/* walk all folios in bio, ending page IO on them */
> -		bio_for_each_folio_all(fi, bio) {
> -			iomap_finish_folio_write(inode, fi.folio, fi.length,
> -					error);
> -			folio_count++;
> -		}
> -		bio_put(bio);
> +	/* walk all folios in bio, ending page IO on them */
> +	bio_for_each_folio_all(fi, bio) {
> +		iomap_finish_folio_write(inode, fi.folio, fi.length, error);
> +		folio_count++;
>  	}
> -	/* The ioend has been freed by bio_put() */
>  
> -	if (unlikely(error && !quiet)) {
> +	if (unlikely(error && !bio_flagged(bio, BIO_QUIET))) {
>  		printk_ratelimited(KERN_ERR
>  "%s: writeback error on inode %lu, offset %lld, sector %llu",
> -			inode->i_sb->s_id, inode->i_ino, offset, start);
> +			inode->i_sb->s_id, inode->i_ino,
> +			ioend->io_offset, ioend->io_sector);
>  	}
> +	bio_put(bio);	/* frees the ioend */
>  	return folio_count;
>  }
>  
> @@ -1563,7 +1546,7 @@ EXPORT_SYMBOL_GPL(iomap_finish_ioends);
>  static bool
>  iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
>  {
> -	if (ioend->io_bio->bi_status != next->io_bio->bi_status)
> +	if (ioend->io_bio.bi_status != next->io_bio.bi_status)
>  		return false;
>  	if ((ioend->io_flags & IOMAP_F_SHARED) ^
>  	    (next->io_flags & IOMAP_F_SHARED))
> @@ -1628,9 +1611,8 @@ EXPORT_SYMBOL_GPL(iomap_sort_ioends);
>  
>  static void iomap_writepage_end_bio(struct bio *bio)
>  {
> -	struct iomap_ioend *ioend = bio->bi_private;
> -
> -	iomap_finish_ioend(ioend, blk_status_to_errno(bio->bi_status));
> +	iomap_finish_ioend(iomap_ioend_from_bio(bio),
> +			blk_status_to_errno(bio->bi_status));
>  }
>  
>  /*
> @@ -1645,9 +1627,6 @@ static int
>  iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
>  		int error)
>  {
> -	ioend->io_bio->bi_private = ioend;
> -	ioend->io_bio->bi_end_io = iomap_writepage_end_bio;
> -
>  	if (wpc->ops->prepare_ioend)
>  		error = wpc->ops->prepare_ioend(ioend, error);
>  	if (error) {
> @@ -1657,12 +1636,12 @@ iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
>  		 * as there is only one reference to the ioend at this point in
>  		 * time.
>  		 */
> -		ioend->io_bio->bi_status = errno_to_blk_status(error);
> -		bio_endio(ioend->io_bio);
> +		ioend->io_bio.bi_status = errno_to_blk_status(error);
> +		bio_endio(&ioend->io_bio);
>  		return error;
>  	}
>  
> -	submit_bio(ioend->io_bio);
> +	submit_bio(&ioend->io_bio);
>  	return 0;
>  }
>  
> @@ -1676,44 +1655,22 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
>  			       REQ_OP_WRITE | wbc_to_write_flags(wbc),
>  			       GFP_NOFS, &iomap_ioend_bioset);
>  	bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos);
> +	bio->bi_end_io = iomap_writepage_end_bio;
>  	wbc_init_bio(wbc, bio);
>  
> -	ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
> +	ioend = iomap_ioend_from_bio(bio);
>  	INIT_LIST_HEAD(&ioend->io_list);
>  	ioend->io_type = wpc->iomap.type;
>  	ioend->io_flags = wpc->iomap.flags;
>  	ioend->io_inode = inode;
>  	ioend->io_size = 0;
>  	ioend->io_offset = pos;
> -	ioend->io_bio = bio;
>  	ioend->io_sector = bio->bi_iter.bi_sector;
>  
>  	wpc->nr_folios = 0;
>  	return ioend;
>  }
>  
> -/*
> - * Allocate a new bio, and chain the old bio to the new one.
> - *
> - * Note that we have to perform the chaining in this unintuitive order
> - * so that the bi_private linkage is set up in the right direction for the
> - * traversal in iomap_finish_ioend().
> - */
> -static struct bio *
> -iomap_chain_bio(struct bio *prev)
> -{
> -	struct bio *new;
> -
> -	new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
> -	bio_clone_blkg_association(new, prev);
> -	new->bi_iter.bi_sector = bio_end_sector(prev);
> -
> -	bio_chain(prev, new);
> -	bio_get(prev);		/* for iomap_finish_ioend */
> -	submit_bio(prev);
> -	return new;
> -}
> -
>  static bool
>  iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset)
>  {
> @@ -1725,7 +1682,7 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset)
>  	if (offset != wpc->ioend->io_offset + wpc->ioend->io_size)
>  		return false;
>  	if (iomap_sector(&wpc->iomap, offset) !=
> -	    bio_end_sector(wpc->ioend->io_bio))
> +	    bio_end_sector(&wpc->ioend->io_bio))
>  		return false;
>  	/*
>  	 * Limit ioend bio chain lengths to minimise IO completion latency. This
> @@ -1750,15 +1707,14 @@ static void iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
>  	size_t poff = offset_in_folio(folio, pos);
>  
>  	if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
> +new_ioend:
>  		if (wpc->ioend)
>  			list_add(&wpc->ioend->io_list, iolist);
>  		wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos);
>  	}
>  
> -	if (!bio_add_folio(wpc->ioend->io_bio, folio, len, poff)) {
> -		wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio);
> -		bio_add_folio_nofail(wpc->ioend->io_bio, folio, len, poff);
> -	}
> +	if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
> +		goto new_ioend;
>  
>  	if (ifs)
>  		atomic_add(len, &ifs->write_bytes_pending);
> @@ -1979,7 +1935,7 @@ EXPORT_SYMBOL_GPL(iomap_writepages);
>  static int __init iomap_init(void)
>  {
>  	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> -			   offsetof(struct iomap_ioend, io_inline_bio),
> +			   offsetof(struct iomap_ioend, io_bio),
>  			   BIOSET_NEED_BVECS);
>  }
>  fs_initcall(iomap_init);
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 465d7630bb2185..b45ee6cbbdaab2 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -112,7 +112,7 @@ xfs_end_ioend(
>  	 * longer dirty. If we don't remove delalloc blocks here, they become
>  	 * stale and can corrupt free space accounting on unmount.
>  	 */
> -	error = blk_status_to_errno(ioend->io_bio->bi_status);
> +	error = blk_status_to_errno(ioend->io_bio.bi_status);
>  	if (unlikely(error)) {
>  		if (ioend->io_flags & IOMAP_F_SHARED) {
>  			xfs_reflink_cancel_cow_range(ip, offset, size, true);
> @@ -179,7 +179,7 @@ STATIC void
>  xfs_end_bio(
>  	struct bio		*bio)
>  {
> -	struct iomap_ioend	*ioend = bio->bi_private;
> +	struct iomap_ioend	*ioend = iomap_ioend_from_bio(bio);
>  	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
>  	unsigned long		flags;
>  
> @@ -444,7 +444,7 @@ xfs_prepare_ioend(
>  	/* send ioends that might require a transaction to the completion wq */
>  	if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
>  	    (ioend->io_flags & IOMAP_F_SHARED))
> -		ioend->io_bio->bi_end_io = xfs_end_bio;
> +		ioend->io_bio.bi_end_io = xfs_end_bio;
>  	return status;
>  }
>  
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index b2a05dff914d0c..b8d3b658ad2b03 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -297,10 +297,14 @@ struct iomap_ioend {
>  	size_t			io_size;	/* size of the extent */
>  	loff_t			io_offset;	/* offset in the file */
>  	sector_t		io_sector;	/* start sector of ioend */
> -	struct bio		*io_bio;	/* bio being built */
> -	struct bio		io_inline_bio;	/* MUST BE LAST! */
> +	struct bio		io_bio;		/* MUST BE LAST! */
>  };
>  
> +static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio)
> +{
> +	return container_of(bio, struct iomap_ioend, io_bio);
> +}
> +
>  struct iomap_writeback_ops {
>  	/*
>  	 * Required, maps the blocks so that writeback can be performed on
> -- 
> 2.39.2
> 
> 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux