Re: [PATCH 3/3] xfs: optimize bio handling in the buffer writeback path

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Mar 16, 2016 at 12:44:41PM +0100, Christoph Hellwig wrote:
> This patch implements two closely related changes:  First it embedds a
> bio the ioend structure so that we don't have to allocate one separately.
> Second it uses the block layer bio chaining mechanism to chain additional
> bios off this first one if needed instead of manually accouting for
> multiple bio completions in the ioend structure.  Together this removes a
> memory allocation per ioend and greatly simplifies the ioend setup and
> I/O completion path.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---

Thanks for the helper cleanups and the bi_error fix. Looks good to me:

Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>

>  fs/xfs/xfs_aops.c  | 247 ++++++++++++++++++++++++-----------------------------
>  fs/xfs/xfs_aops.h  |  15 ++--
>  fs/xfs/xfs_super.c |  26 ++----
>  3 files changed, 123 insertions(+), 165 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 5928770..42e7368 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -124,18 +124,25 @@ next_bh:
>   */
>  STATIC void
>  xfs_destroy_ioend(
> -	struct xfs_ioend	*ioend)
> +	struct xfs_ioend	*ioend,
> +	int			error)
>  {
>  	struct inode		*inode = ioend->io_inode;
> -	int			error = ioend->io_error;
> +	struct bio		*last = ioend->io_bio;
>  	struct bio		*bio, *next;
>  
> -	for (bio = ioend->io_bio_done; bio; bio = next) {
> +	for (bio = &ioend->io_inline_bio; bio; bio = next) {
>  		struct bio_vec	*bvec;
>  		int		i;
>  
> -		next = bio->bi_private;
> -		bio->bi_private = NULL;
> +		/*
> +		 * For the last bio, bi_private points to the ioend, so we
> +		 * need to explicitly end the iteration here.
> +		 */
> +		if (bio == last)
> +			next = NULL;
> +		else
> +			next = bio->bi_private;
>  
>  		/* walk each page on bio, ending page IO on them */
>  		bio_for_each_segment_all(bvec, bio, i)
> @@ -143,8 +150,6 @@ xfs_destroy_ioend(
>  
>  		bio_put(bio);
>  	}
> -
> -	mempool_free(ioend, xfs_ioend_pool);
>  }
>  
>  /*
> @@ -218,7 +223,8 @@ xfs_setfilesize(
>  
>  STATIC int
>  xfs_setfilesize_ioend(
> -	struct xfs_ioend	*ioend)
> +	struct xfs_ioend	*ioend,
> +	int			error)
>  {
>  	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
>  	struct xfs_trans	*tp = ioend->io_append_trans;
> @@ -232,53 +238,32 @@ xfs_setfilesize_ioend(
>  	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
>  
>  	/* we abort the update if there was an IO error */
> -	if (ioend->io_error) {
> +	if (error) {
>  		xfs_trans_cancel(tp);
> -		return ioend->io_error;
> +		return error;
>  	}
>  
>  	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
>  }
>  
>  /*
> - * Schedule IO completion handling on the final put of an ioend.
> - *
> - * If there is no work to do we might as well call it a day and free the
> - * ioend right now.
> - */
> -STATIC void
> -xfs_finish_ioend(
> -	struct xfs_ioend	*ioend)
> -{
> -	if (atomic_dec_and_test(&ioend->io_remaining)) {
> -		struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
> -
> -		if (ioend->io_type == XFS_IO_UNWRITTEN)
> -			queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
> -		else if (ioend->io_append_trans)
> -			queue_work(mp->m_data_workqueue, &ioend->io_work);
> -		else
> -			xfs_destroy_ioend(ioend);
> -	}
> -}
> -
> -/*
>   * IO write completion.
>   */
>  STATIC void
>  xfs_end_io(
>  	struct work_struct *work)
>  {
> -	xfs_ioend_t	*ioend = container_of(work, xfs_ioend_t, io_work);
> -	struct xfs_inode *ip = XFS_I(ioend->io_inode);
> -	int		error = 0;
> +	struct xfs_ioend	*ioend =
> +		container_of(work, struct xfs_ioend, io_work);
> +	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
> +	int			error = ioend->io_bio->bi_error;
>  
>  	/*
>  	 * Set an error if the mount has shut down and proceed with end I/O
>  	 * processing so it can perform whatever cleanups are necessary.
>  	 */
>  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
> -		ioend->io_error = -EIO;
> +		error = -EIO;
>  
>  	/*
>  	 * For unwritten extents we need to issue transactions to convert a
> @@ -288,50 +273,33 @@ xfs_end_io(
>  	 * on error.
>  	 */
>  	if (ioend->io_type == XFS_IO_UNWRITTEN) {
> -		if (ioend->io_error)
> +		if (error)
>  			goto done;
>  		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
>  						  ioend->io_size);
>  	} else if (ioend->io_append_trans) {
> -		error = xfs_setfilesize_ioend(ioend);
> +		error = xfs_setfilesize_ioend(ioend, error);
>  	} else {
>  		ASSERT(!xfs_ioend_is_append(ioend));
>  	}
>  
>  done:
> -	if (error)
> -		ioend->io_error = error;
> -	xfs_destroy_ioend(ioend);
> +	xfs_destroy_ioend(ioend, error);
>  }
>  
> -/*
> - * Allocate and initialise an IO completion structure.
> - * We need to track unwritten extent write completion here initially.
> - * We'll need to extend this for updating the ondisk inode size later
> - * (vs. incore size).
> - */
> -STATIC xfs_ioend_t *
> -xfs_alloc_ioend(
> -	struct inode		*inode,
> -	unsigned int		type)
> +STATIC void
> +xfs_end_bio(
> +	struct bio		*bio)
>  {
> -	xfs_ioend_t		*ioend;
> -
> -	ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
> -	memset(ioend, 0, sizeof(*ioend));
> +	struct xfs_ioend	*ioend = bio->bi_private;
> +	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
>  
> -	/*
> -	 * Set the count to 1 initially, which will prevent an I/O
> -	 * completion callback from happening before we have started
> -	 * all the I/O from calling the completion routine too early.
> -	 */
> -	atomic_set(&ioend->io_remaining, 1);
> -	INIT_LIST_HEAD(&ioend->io_list);
> -	ioend->io_type = type;
> -	ioend->io_inode = inode;
> -	INIT_WORK(&ioend->io_work, xfs_end_io);
> -	spin_lock_init(&ioend->io_lock);
> -	return ioend;
> +	if (ioend->io_type == XFS_IO_UNWRITTEN)
> +		queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
> +	else if (ioend->io_append_trans)
> +		queue_work(mp->m_data_workqueue, &ioend->io_work);
> +	else
> +		xfs_destroy_ioend(ioend, bio->bi_error);
>  }
>  
>  STATIC int
> @@ -403,56 +371,6 @@ xfs_imap_valid(
>  		offset < imap->br_startoff + imap->br_blockcount;
>  }
>  
> -/*
> - * BIO completion handler for buffered IO.
> - */
> -STATIC void
> -xfs_end_bio(
> -	struct bio		*bio)
> -{
> -	struct xfs_ioend	*ioend = bio->bi_private;
> -	unsigned long		flags;
> -
> -	bio->bi_private = NULL;
> -	bio->bi_end_io = NULL;
> -
> -	spin_lock_irqsave(&ioend->io_lock, flags);
> -	if (!ioend->io_error)
> -	       ioend->io_error = bio->bi_error;
> -	if (!ioend->io_bio_done)
> -		ioend->io_bio_done = bio;
> -	else
> -		ioend->io_bio_done_tail->bi_private = bio;
> -	ioend->io_bio_done_tail = bio;
> -	spin_unlock_irqrestore(&ioend->io_lock, flags);
> -
> -	xfs_finish_ioend(ioend);
> -}
> -
> -STATIC void
> -xfs_submit_ioend_bio(
> -	struct writeback_control *wbc,
> -	xfs_ioend_t		*ioend,
> -	struct bio		*bio)
> -{
> -	atomic_inc(&ioend->io_remaining);
> -	bio->bi_private = ioend;
> -	bio->bi_end_io = xfs_end_bio;
> -	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
> -}
> -
> -STATIC struct bio *
> -xfs_alloc_ioend_bio(
> -	struct buffer_head	*bh)
> -{
> -	struct bio		*bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
> -
> -	ASSERT(bio->bi_private == NULL);
> -	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
> -	bio->bi_bdev = bh->b_bdev;
> -	return bio;
> -}
> -
>  STATIC void
>  xfs_start_buffer_writeback(
>  	struct buffer_head	*bh)
> @@ -513,16 +431,19 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
>  STATIC int
>  xfs_submit_ioend(
>  	struct writeback_control *wbc,
> -	xfs_ioend_t		*ioend,
> +	struct xfs_ioend	*ioend,
>  	int			status)
>  {
>  	/* Reserve log space if we might write beyond the on-disk inode size. */
>  	if (!status &&
> -	    ioend->io_bio && ioend->io_type != XFS_IO_UNWRITTEN &&
> +	    ioend->io_type != XFS_IO_UNWRITTEN &&
>  	    xfs_ioend_is_append(ioend) &&
>  	    !ioend->io_append_trans)
>  		status = xfs_setfilesize_trans_alloc(ioend);
>  
> +	ioend->io_bio->bi_private = ioend;
> +	ioend->io_bio->bi_end_io = xfs_end_bio;
> +
>  	/*
>  	 * If we are failing the IO now, just mark the ioend with an
>  	 * error and finish it. This will run IO completion immediately
> @@ -530,19 +451,75 @@ xfs_submit_ioend(
>  	 * time.
>  	 */
>  	if (status) {
> -		if (ioend->io_bio)
> -			bio_put(ioend->io_bio);
> -		ioend->io_error = status;
> -		xfs_finish_ioend(ioend);
> +		ioend->io_bio->bi_error = status;
> +		bio_endio(ioend->io_bio);
>  		return status;
>  	}
>  
> -	xfs_submit_ioend_bio(wbc, ioend, ioend->io_bio);
> -	ioend->io_bio = NULL;
> -	xfs_finish_ioend(ioend);
> +	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
> +			ioend->io_bio);
>  	return 0;
>  }
>  
> +static void
> +xfs_init_bio_from_bh(
> +	struct bio		*bio,
> +	struct buffer_head	*bh)
> +{
> +	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
> +	bio->bi_bdev = bh->b_bdev;
> +}
> +
> +static struct xfs_ioend *
> +xfs_alloc_ioend(
> +	struct inode		*inode,
> +	unsigned int		type,
> +	xfs_off_t		offset,
> +	struct buffer_head	*bh)
> +{
> +	struct xfs_ioend	*ioend;
> +	struct bio		*bio;
> +
> +	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
> +	xfs_init_bio_from_bh(bio, bh);
> +
> +	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
> +	INIT_LIST_HEAD(&ioend->io_list);
> +	ioend->io_type = type;
> +	ioend->io_inode = inode;
> +	ioend->io_size = 0;
> +	ioend->io_offset = offset;
> +	INIT_WORK(&ioend->io_work, xfs_end_io);
> +	ioend->io_append_trans = NULL;
> +	ioend->io_bio = bio;
> +	return ioend;
> +}
> +
> +/*
> + * Allocate a new bio, and chain the old bio to the new one.
> + *
> + * Note that we have to do perform the chaining in this unintuitive order
> + * so that the bi_private linkage is set up in the right direction for the
> + * traversal in xfs_destroy_ioend().
> + */
> +static void
> +xfs_chain_bio(
> +	struct xfs_ioend	*ioend,
> +	struct writeback_control *wbc,
> +	struct buffer_head	*bh)
> +{
> +	struct bio *new;
> +
> +	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
> +	xfs_init_bio_from_bh(new, bh);
> +
> +	bio_chain(ioend->io_bio, new);
> +	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
> +	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
> +			ioend->io_bio);
> +	ioend->io_bio = new;
> +}
> +
>  /*
>   * Test to see if we've been building up a completion structure for
>   * earlier buffers -- if so, we try to append to this ioend if we
> @@ -564,19 +541,15 @@ xfs_add_to_ioend(
>  	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
>  		if (wpc->ioend)
>  			list_add(&wpc->ioend->io_list, iolist);
> -		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type);
> -		wpc->ioend->io_offset = offset;
> +		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
>  	}
>  
> -retry:
> -	if (!wpc->ioend->io_bio)
> -		wpc->ioend->io_bio = xfs_alloc_ioend_bio(bh);
> -
> -	if (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) {
> -		xfs_submit_ioend_bio(wbc, wpc->ioend, wpc->ioend->io_bio);
> -		wpc->ioend->io_bio = NULL;
> -		goto retry;
> -	}
> +	/*
> +	 * If the buffer doesn't fit into the bio we need to allocate a new
> +	 * one.  This shouldn't happen more than once for a given buffer.
> +	 */
> +	while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
> +		xfs_chain_bio(wpc->ioend, wbc, bh);
>  
>  	wpc->ioend->io_size += bh->b_size;
>  	wpc->last_block = bh->b_blocknr;
> diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> index 1c7b041..8b5b641 100644
> --- a/fs/xfs/xfs_aops.h
> +++ b/fs/xfs/xfs_aops.h
> @@ -18,7 +18,7 @@
>  #ifndef __XFS_AOPS_H__
>  #define __XFS_AOPS_H__
>  
> -extern mempool_t *xfs_ioend_pool;
> +extern struct bio_set *xfs_ioend_bioset;
>  
>  /*
>   * Types of I/O for bmap clustering and I/O completion tracking.
> @@ -37,24 +37,19 @@ enum {
>  	{ XFS_IO_OVERWRITE,		"overwrite" }
>  
>  /*
> - * xfs_ioend struct manages large extent writes for XFS.
> - * It can manage several multi-page bio's at once.
> + * Structure for buffered I/O completions.
>   */
> -typedef struct xfs_ioend {
> +struct xfs_ioend {
>  	struct list_head	io_list;	/* next ioend in chain */
>  	unsigned int		io_type;	/* delalloc / unwritten */
> -	int			io_error;	/* I/O error code */
> -	atomic_t		io_remaining;	/* hold count */
>  	struct inode		*io_inode;	/* file being written to */
>  	size_t			io_size;	/* size of the extent */
>  	xfs_off_t		io_offset;	/* offset in the file */
>  	struct work_struct	io_work;	/* xfsdatad work queue */
>  	struct xfs_trans	*io_append_trans;/* xact. for size update */
>  	struct bio		*io_bio;	/* bio being built */
> -	struct bio		*io_bio_done;	/* bios completed */
> -	struct bio		*io_bio_done_tail; /* bios completed */
> -	spinlock_t		io_lock;	/* for bio completion list */
> -} xfs_ioend_t;
> +	struct bio		io_inline_bio;	/* MUST BE LAST! */
> +};
>  
>  extern const struct address_space_operations xfs_address_space_operations;
>  
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index d760934..e52e9c1 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -58,8 +58,7 @@
>  #include <linux/parser.h>
>  
>  static const struct super_operations xfs_super_operations;
> -static kmem_zone_t *xfs_ioend_zone;
> -mempool_t *xfs_ioend_pool;
> +struct bio_set *xfs_ioend_bioset;
>  
>  static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
>  #ifdef DEBUG
> @@ -1688,20 +1687,15 @@ MODULE_ALIAS_FS("xfs");
>  STATIC int __init
>  xfs_init_zones(void)
>  {
> -
> -	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
> -	if (!xfs_ioend_zone)
> +	xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
> +			offsetof(struct xfs_ioend, io_inline_bio));
> +	if (!xfs_ioend_bioset)
>  		goto out;
>  
> -	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
> -						  xfs_ioend_zone);
> -	if (!xfs_ioend_pool)
> -		goto out_destroy_ioend_zone;
> -
>  	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
>  						"xfs_log_ticket");
>  	if (!xfs_log_ticket_zone)
> -		goto out_destroy_ioend_pool;
> +		goto out_free_ioend_bioset;
>  
>  	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
>  						"xfs_bmap_free_item");
> @@ -1797,10 +1791,8 @@ xfs_init_zones(void)
>  	kmem_zone_destroy(xfs_bmap_free_item_zone);
>   out_destroy_log_ticket_zone:
>  	kmem_zone_destroy(xfs_log_ticket_zone);
> - out_destroy_ioend_pool:
> -	mempool_destroy(xfs_ioend_pool);
> - out_destroy_ioend_zone:
> -	kmem_zone_destroy(xfs_ioend_zone);
> + out_free_ioend_bioset:
> +	bioset_free(xfs_ioend_bioset);
>   out:
>  	return -ENOMEM;
>  }
> @@ -1826,9 +1818,7 @@ xfs_destroy_zones(void)
>  	kmem_zone_destroy(xfs_btree_cur_zone);
>  	kmem_zone_destroy(xfs_bmap_free_item_zone);
>  	kmem_zone_destroy(xfs_log_ticket_zone);
> -	mempool_destroy(xfs_ioend_pool);
> -	kmem_zone_destroy(xfs_ioend_zone);
> -
> +	bioset_free(xfs_ioend_bioset);
>  }
>  
>  STATIC int __init
> -- 
> 2.1.4
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux