Re: [PATCH 06/12] xfs: remove the kmalloc to page allocator fallback

"Darrick J. Wong" <djwong@xxxxxxxxxx> · Wed, 26 Feb 2025 09:22:31 -0800

On Wed, Feb 26, 2025 at 07:51:34AM -0800, Christoph Hellwig wrote:
> Since commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment
> for kmalloc(power-of-two)", kmalloc and friends guarantee that power of
> two sized allocations are naturally aligned.  Limit our use of kmalloc
> for buffers to these power of two sizes and remove the fallback to
> the page allocator for this case, but keep a check in addition to
> trusting the slab allocator to get the alignment right.
> 
> Also refactor the kmalloc path to reuse various calculations for the
> size and gfp flags.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  fs/xfs/xfs_buf.c | 49 ++++++++++++++++++++++++------------------------
>  1 file changed, 25 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index e8783ee23623..f327bf5b04c0 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -301,23 +301,24 @@ xfs_buf_free(
>  
>  static int
>  xfs_buf_alloc_kmem(
> -	struct xfs_buf	*bp,
> -	xfs_buf_flags_t	flags)
> +	struct xfs_buf		*bp,
> +	size_t			size,
> +	gfp_t			gfp_mask)
>  {
> -	gfp_t		gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL;
> -	size_t		size = BBTOB(bp->b_length);
> -
> -	/* Assure zeroed buffer for non-read cases. */
> -	if (!(flags & XBF_READ))
> -		gfp_mask |= __GFP_ZERO;
> +	ASSERT(is_power_of_2(size));
> +	ASSERT(size < PAGE_SIZE);
>  
> -	bp->b_addr = kmalloc(size, gfp_mask);
> +	bp->b_addr = kmalloc(size, gfp_mask | __GFP_NOFAIL);
>  	if (!bp->b_addr)
>  		return -ENOMEM;
>  
> -	if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
> -	    ((unsigned long)bp->b_addr & PAGE_MASK)) {
> -		/* b_addr spans two pages - use alloc_page instead */
> +	/*
> +	 * Slab guarantees that we get back naturally aligned allocations for
> +	 * power of two sizes.  Keep this check as the canary in the coal mine
> +	 * if anything changes in slab.
> +	 */
> +	if (!IS_ALIGNED((unsigned long)bp->b_addr, size)) {
> +		ASSERT(IS_ALIGNED((unsigned long)bp->b_addr, size));

Depending on the level of paranoia about "outside" subsystems, should
this be a regular xfs_err so we can catch these kinds of things on
production kernels?

>  		kfree(bp->b_addr);
>  		bp->b_addr = NULL;
>  		return -ENOMEM;
> @@ -358,18 +359,22 @@ xfs_buf_alloc_backing_mem(
>  	if (xfs_buftarg_is_mem(bp->b_target))
>  		return xmbuf_map_page(bp);
>  
> -	/*
> -	 * For buffers that fit entirely within a single page, first attempt to
> -	 * allocate the memory from the heap to minimise memory usage.  If we
> -	 * can't get heap memory for these small buffers, we fall back to using
> -	 * the page allocator.
> -	 */
> -	if (size < PAGE_SIZE && xfs_buf_alloc_kmem(new_bp, flags) == 0)
> -		return 0;
> +	/* Assure zeroed buffer for non-read cases. */
> +	if (!(flags & XBF_READ))
> +		gfp_mask |= __GFP_ZERO;
>  
>  	if (flags & XBF_READ_AHEAD)
>  		gfp_mask |= __GFP_NORETRY;
>  
> +	/*
> +	 * For buffers smaller than PAGE_SIZE use a kmalloc allocation if that
> +	 * is properly aligned.  The slab allocator now guarantees an aligned
> +	 * allocation for all power of two sizes, we matches must of the smaller

"...which matches most of the smaller than PAGE_SIZE buffers..."

--D

> +	 * than PAGE_SIZE buffers used by XFS.
> +	 */
> +	if (size < PAGE_SIZE && is_power_of_2(size))
> +		return xfs_buf_alloc_kmem(bp, size, gfp_mask);
> +
>  	/* Make sure that we have a page list */
>  	bp->b_page_count = DIV_ROUND_UP(size, PAGE_SIZE);
>  	if (bp->b_page_count <= XB_PAGES) {
> @@ -382,10 +387,6 @@ xfs_buf_alloc_backing_mem(
>  	}
>  	bp->b_flags |= _XBF_PAGES;
>  
> -	/* Assure zeroed buffer for non-read cases. */
> -	if (!(flags & XBF_READ))
> -		gfp_mask |= __GFP_ZERO;
> -
>  	/*
>  	 * Bulk filling of pages can take multiple calls. Not filling the entire
>  	 * array is not an allocation failure, so don't back off if we get at
> -- 
> 2.45.2
> 
>