Re: [PATCH 2/2] iomap: Convert readahead to iomap_iter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jul 28, 2020 at 06:32:15PM +0100, Matthew Wilcox (Oracle) wrote:
> This approach removes at least two indirect function calls from the
> readahead path.  Previous call chain (indirect function calls marked *):
> 
> xfs_vm_readahead
>   iomap_readahead
>     iomap_apply
>       xfs_read_iomap_begin [*]
>       iomap_readahead_actor [*]
>         iomap_readpage_actor
> 
> New call chain:
> 
> xfs_vm_readahead
>   xfs_iomap_next_read
>   iomi_advance
>   iomap_readahead
>     iomap_readpage_actor

I mostly like this, with a few comments...

> Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
> ---
>  fs/iomap/buffered-io.c | 82 ++++++++++++++----------------------------
>  fs/xfs/xfs_aops.c      |  9 ++++-
>  fs/xfs/xfs_iomap.c     | 15 ++++++++
>  fs/xfs/xfs_iomap.h     |  2 ++
>  fs/zonefs/super.c      | 20 ++++++++++-
>  include/linux/iomap.h  | 10 +++++-
>  6 files changed, 79 insertions(+), 59 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index bcfc288dba3f..fff23ed6a682 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -206,13 +206,6 @@ iomap_read_end_io(struct bio *bio)
>  	bio_put(bio);
>  }
>  
> -struct iomap_readpage_ctx {
> -	struct page		*cur_page;
> -	bool			cur_page_in_bio;
> -	struct bio		*bio;
> -	struct readahead_control *rac;
> -};
> -
>  static void
>  iomap_read_inline_data(struct inode *inode, struct page *page,
>  		struct iomap *iomap)
> @@ -369,35 +362,10 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
>  }
>  EXPORT_SYMBOL_GPL(iomap_readpage);
>  
> -static loff_t
> -iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap, struct iomap *srcmap)
> -{
> -	struct iomap_readpage_ctx *ctx = data;
> -	loff_t done, ret;
> -
> -	for (done = 0; done < length; done += ret) {
> -		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
> -			if (!ctx->cur_page_in_bio)
> -				unlock_page(ctx->cur_page);
> -			put_page(ctx->cur_page);
> -			ctx->cur_page = NULL;
> -		}
> -		if (!ctx->cur_page) {
> -			ctx->cur_page = readahead_page(ctx->rac);
> -			ctx->cur_page_in_bio = false;
> -		}
> -		ret = iomap_readpage_actor(inode, pos + done, length - done,
> -				ctx, iomap, srcmap);
> -	}
> -
> -	return done;
> -}
> -
>  /**
>   * iomap_readahead - Attempt to read pages from a file.
> + * @iomi: The iomap iterator for this operation.
>   * @rac: Describes the pages to be read.
> - * @ops: The operations vector for the filesystem.
>   *
>   * This function is for filesystems to call to implement their readahead
>   * address_space operation.
> @@ -409,35 +377,37 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
>   * function is called with memalloc_nofs set, so allocations will not cause
>   * the filesystem to be reentered.
>   */
> -void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
> +loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
>  {
> -	struct inode *inode = rac->mapping->host;
> -	loff_t pos = readahead_pos(rac);
> -	loff_t length = readahead_length(rac);
> -	struct iomap_readpage_ctx ctx = {
> -		.rac	= rac,
> -	};
> -
> -	trace_iomap_readahead(inode, readahead_count(rac));
> +	loff_t done, ret, length = iomap_length(iomi);
>  
> -	while (length > 0) {
> -		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
> -				&ctx, iomap_readahead_actor);
> -		if (ret <= 0) {
> -			WARN_ON_ONCE(ret == 0);
> -			break;
> +	for (done = 0; done < length; done += ret) {
> +		if (ctx->cur_page && offset_in_page(iomi->pos + done) == 0) {
> +			if (!ctx->cur_page_in_bio)
> +				unlock_page(ctx->cur_page);
> +			put_page(ctx->cur_page);
> +			ctx->cur_page = NULL;
>  		}
> -		pos += ret;
> -		length -= ret;
> +		if (!ctx->cur_page) {
> +			ctx->cur_page = readahead_page(ctx->rac);
> +			ctx->cur_page_in_bio = false;
> +		}
> +		ret = iomap_readpage_actor(iomi->inode, iomi->pos + done,
> +				length - done, ctx,
> +				&iomi->iomap, &iomi->srcmap);
>  	}
>  
> -	if (ctx.bio)
> -		submit_bio(ctx.bio);
> -	if (ctx.cur_page) {
> -		if (!ctx.cur_page_in_bio)
> -			unlock_page(ctx.cur_page);
> -		put_page(ctx.cur_page);
> +	if (iomi->len == done) {
> +		if (ctx->bio)
> +			submit_bio(ctx->bio);
> +		if (ctx->cur_page) {
> +			if (!ctx->cur_page_in_bio)
> +				unlock_page(ctx->cur_page);
> +			put_page(ctx->cur_page);
> +		}
>  	}
> +
> +	return done;
>  }
>  EXPORT_SYMBOL_GPL(iomap_readahead);
>  
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index b35611882ff9..2884752e40e8 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -625,7 +625,14 @@ STATIC void
>  xfs_vm_readahead(
>  	struct readahead_control	*rac)
>  {
> -	iomap_readahead(rac, &xfs_read_iomap_ops);
> +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> +			readahead_length(rac), 0);
> +	struct iomap_readpage_ctx ctx = {
> +		.rac = rac,
> +	};
> +
> +	while (iomap_iter(&iomi, xfs_iomap_next_read))
> +		iomi.copied = iomap_readahead(&iomi, &ctx);

Why not have iomap_readahead set iomi.copied on its way out?  The actor
function is supposed to set iomi.ret if an error happens, right?

Oh wait no, the actor function returns a positive copied value, or a
negative error code, and then it's up to the _next_read function to
notice if copied is negative, stuff it in ret, and then return false to
stop the iteration?

>  }
>  
>  static int
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 0e3f62cde375..66f2fcaf136e 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -1150,6 +1150,21 @@ const struct iomap_ops xfs_read_iomap_ops = {
>  	.iomap_begin		= xfs_read_iomap_begin,
>  };
>  
> +int
> +xfs_iomap_next_read(
> +	const struct iomap_iter *iomi,
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)

Aren't these last two parameters already in the iomap iter?
Are they passed separately to work around the pointer being const?

> +{
> +	if (iomi->copied < 0)
> +		return iomi->copied;

Is this boilerplate going to end up in every single iomap_next_t
function?  If so, it should probably just go in iomap_iter prior to the
next() call, right?

I also wonder if these functions (and the typedef) ought to be called
iomap_iter_advance_t since that's what they do -- pick up the status
from the last round, and advance the iterator to the next mapping that
we want to process.

> +	if (iomi->copied >= iomi->len)
> +		return 0;

Er... if we copied more than we asked for, doesn't that imply something
bad just happened?

> +
> +	return xfs_read_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);

Would be kinda nice if you could just pass the whole iomap_iter, but I
get that we're probably stuck with this until the entirety gets
converted.

--D

> +}
> +
>  static int
>  xfs_seek_iomap_begin(
>  	struct inode		*inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 7d3703556d0e..1b1fa225e938 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -46,4 +46,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
>  extern const struct iomap_ops xfs_seek_iomap_ops;
>  extern const struct iomap_ops xfs_xattr_iomap_ops;
>  
> +int xfs_iomap_next_read(const struct iomap_iter *iomi, struct iomap *iomap,
> +		struct iomap *srcmap);
>  #endif /* __XFS_IOMAP_H__*/
> diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
> index 07bc42d62673..4842b85ce36d 100644
> --- a/fs/zonefs/super.c
> +++ b/fs/zonefs/super.c
> @@ -70,6 +70,17 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  	return 0;
>  }
>  
> +static int zonefs_iomap_next(const struct iomap_iter *iomi,
> +		struct iomap *iomap, struct iomap *srcmap)
> +{
> +	if (iomi->copied < 0)
> +		return iomi->copied;
> +	if (iomi->copied >= iomi->len)
> +		return 0;
> +	return zonefs_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
> +}
> +
>  static const struct iomap_ops zonefs_iomap_ops = {
>  	.iomap_begin	= zonefs_iomap_begin,
>  };
> @@ -81,7 +92,14 @@ static int zonefs_readpage(struct file *unused, struct page *page)
>  
>  static void zonefs_readahead(struct readahead_control *rac)
>  {
> -	iomap_readahead(rac, &zonefs_iomap_ops);
> +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> +			readahead_length(rac), 0);
> +	struct iomap_readpage_ctx ctx = {
> +		.rac = rac,
> +	};
> +
> +	while (iomap_iter(&iomi, zonefs_iomap_next))
> +		iomi.copied = iomap_readahead(&iomi, &ctx);
>  }
>  
>  /*
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index fe58e68ec0c1..dd9bfed85c4f 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -212,7 +212,6 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		const struct iomap_ops *ops);
>  int iomap_readpage(struct page *page, const struct iomap_ops *ops);
> -void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
>  int iomap_set_page_dirty(struct page *page);
>  int iomap_is_partially_uptodate(struct page *page, unsigned long from,
>  		unsigned long count);
> @@ -299,6 +298,15 @@ int iomap_writepages(struct address_space *mapping,
>  		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
>  		const struct iomap_writeback_ops *ops);
>  
> +struct iomap_readpage_ctx {
> +	struct page		*cur_page;
> +	bool			cur_page_in_bio;
> +	struct bio		*bio;
> +	struct readahead_control *rac;
> +};
> +
> +loff_t iomap_readahead(struct iomap_iter *, struct iomap_readpage_ctx *);
> +
>  /*
>   * Flags for direct I/O ->end_io:
>   */
> -- 
> 2.27.0
> 



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux