Re: [PATCH v2] ceph: fix blindly expanding the readahead windows

胡玮文 <huww98@xxxxxxxxxxx> · Thu, 4 May 2023 18:12:57 +0800

Hi Xiubo,

I just send a patch to solve the same issue[1].  My patch bound the
request by i_size, which is orthogonal to your changes. To incorporate
both, I propose the following patch.

Also, since this is a performance regression, I think we should backport
it to stable versions?

Another comment inline.

Hu Weiwen

[1]: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@xxxxxxxxxxxxxxxx/

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6bb251a4d613..d1d8e2562182 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -187,16 +187,30 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
 	struct inode *inode = rreq->inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_layout *lo = &ci->i_layout;
+	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
+	unsigned long max_len = max_pages << PAGE_SHIFT;
+	loff_t end = rreq->start + rreq->len, new_end;
 	u32 blockoff;
 	u64 blockno;

-	/* Expand the start downward */
-	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
-	rreq->start = blockno * lo->stripe_unit;
-	rreq->len += blockoff;
+	/* Readahead is disabled */
+	if (!max_pages)
+		return;
+
+	/* Try to expand the length forward by rounding up it to the next block */
+	new_end = round_up(end, lo->stripe_unit);
+	/* But do not exceed the file size,
+	 * unless the original request already exceeds it. */
+	new_end = min(new_end, rreq->i_size);
+	if (new_end > end && new_end <= rreq->start + max_len)
+		rreq->len = new_end - rreq->start;

-	/* Now, round up the length to the next block */
-	rreq->len = roundup(rreq->len, lo->stripe_unit);
+	/* Try to expand the start downward */
+	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+	if (rreq->len + blockoff <= max_len) {
+		rreq->start = blockno * lo->stripe_unit;
+		rreq->len += blockoff;
+	}
 }

 static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)


On Thu, May 04, 2023 at 01:23:06PM +0800, xiubli@xxxxxxxxxx wrote:
> From: Xiubo Li <xiubli@xxxxxxxxxx>
> 
> Blindly expanding the readahead windows will cause unneccessary
> pagecache thrashing and also will introdue the network workload.
> We should disable expanding the windows if the readahead is disabled
> and also shouldn't expand the windows too much.
> 
> Expanding forward firstly instead of expanding backward for possible
> sequential reads.
> 
> URL: https://www.spinics.net/lists/ceph-users/msg76183.html
> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>
> ---
> 
> V2:
> - fix possible cross-block issue pointed out by Ilya.
> 
> 
>  fs/ceph/addr.c | 24 ++++++++++++++++++------
>  1 file changed, 18 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index ca4dc6450887..03a326da8fd8 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -188,16 +188,28 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
>  	struct inode *inode = rreq->inode;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct ceph_file_layout *lo = &ci->i_layout;
> +	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
> +	unsigned long max_len = max_pages << PAGE_SHIFT;
> +	unsigned long len;
>  	u32 blockoff;
>  	u64 blockno;
>  
> -	/* Expand the start downward */
> -	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> -	rreq->start = blockno * lo->stripe_unit;
> -	rreq->len += blockoff;
> +	/* Readahead is disabled */
> +	if (!max_pages)
> +		return;
> +
> +	/* Try to expand the length forward by rounding up it to the next block */
> +	div_u64_rem(rreq->start + rreq->len, lo->stripe_unit, &blockoff);
> +	len = lo->stripe_unit - blockoff;
This would expand the request by a whole block, if the original request
is already aligned (blockoff == 0).
> +	if (rreq->len + len <= max_len)
> +		rreq->len += len;
>  
> -	/* Now, round up the length to the next block */
> -	rreq->len = roundup(rreq->len, lo->stripe_unit);
> +	/* Try to expand the start downward */
> +	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> +	if (rreq->len + blockoff <= max_len) {
> +		rreq->start = blockno * lo->stripe_unit;
> +		rreq->len += blockoff;
> +	}
>  }
>  
>  static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
> -- 
> 2.40.0
>