From: Christoph Hellwig <hch@xxxxxx> Instead of allocating the folios manually using the bulk page allocator and then using vm_map_page just use vmalloc to allocate the entire buffer - vmalloc will use the bulk allocator internally if it fits. With this the b_folios array can go away as well as nothing uses it. [dchinner: port to folio based buffers.] Signed-off-by: Christoph Hellwig <hch@xxxxxx> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/xfs/xfs_buf.c | 164 ++++++++++++------------------------------- fs/xfs/xfs_buf.h | 2 - fs/xfs/xfs_buf_mem.c | 9 +-- 3 files changed, 45 insertions(+), 130 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 303945554415..6d6bad80722e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -282,29 +282,6 @@ _xfs_buf_alloc( return 0; } -static void -xfs_buf_free_folios( - struct xfs_buf *bp) -{ - uint i; - - ASSERT(bp->b_flags & _XBF_FOLIOS); - - if (xfs_buf_is_vmapped(bp)) - vm_unmap_ram(bp->b_addr, bp->b_folio_count); - - for (i = 0; i < bp->b_folio_count; i++) { - if (bp->b_folios[i]) - __folio_put(bp->b_folios[i]); - } - mm_account_reclaimed_pages(bp->b_folio_count); - - if (bp->b_folios != bp->b_folio_array) - kfree(bp->b_folios); - bp->b_folios = NULL; - bp->b_flags &= ~_XBF_FOLIOS; -} - static void xfs_buf_free_callback( struct callback_head *cb) @@ -323,13 +300,22 @@ xfs_buf_free( ASSERT(list_empty(&bp->b_lru)); - if (xfs_buftarg_is_mem(bp->b_target)) + if (xfs_buftarg_is_mem(bp->b_target)) { xmbuf_unmap_folio(bp); - else if (bp->b_flags & _XBF_FOLIOS) - xfs_buf_free_folios(bp); - else if (bp->b_flags & _XBF_KMEM) - kfree(bp->b_addr); + goto free; + } + if (!(bp->b_flags & _XBF_KMEM)) + mm_account_reclaimed_pages(bp->b_folio_count); + + if (bp->b_flags & _XBF_FOLIOS) + __folio_put(kmem_to_folio(bp->b_addr)); + else + kvfree(bp->b_addr); + + bp->b_flags &= _XBF_KMEM | _XBF_FOLIOS; + +free: call_rcu(&bp->b_rcu, xfs_buf_free_callback); } @@ -356,8 +342,6 @@ xfs_buf_alloc_kmem( bp->b_addr = NULL; return -ENOMEM; } - bp->b_folios = bp->b_folio_array; - bp->b_folios[0] = kmem_to_folio(bp->b_addr); bp->b_folio_count = 1; bp->b_flags |= _XBF_KMEM; return 0; @@ -377,14 +361,15 @@ xfs_buf_alloc_folio( struct xfs_buf *bp, gfp_t gfp_mask) { + struct folio *folio; int length = BBTOB(bp->b_length); int order = get_order(length); - bp->b_folio_array[0] = folio_alloc(gfp_mask, order); - if (!bp->b_folio_array[0]) + folio = folio_alloc(gfp_mask, order); + if (!folio) return false; - bp->b_folios = bp->b_folio_array; + bp->b_addr = folio_address(folio); bp->b_folio_count = 1; bp->b_flags |= _XBF_FOLIOS; return true; @@ -400,15 +385,11 @@ xfs_buf_alloc_folio( * contiguous memory region that we don't have to map and unmap to access the * data directly. * - * The second type of buffer is the multi-folio buffer. These are *always* made - * up of single page folios so that they can be fed to vmap_ram() to return a - * contiguous memory region we can access the data through. - * - * We don't use high order folios for this second type of buffer (yet) because - * having variable size folios makes offset-to-folio indexing and iteration of - * the data range more complex than if they are fixed size. This case should now - * be the slow path, though, so unless we regularly fail to allocate high order - * folios, there should be little need to optimise this path. + * The second type of buffer is the vmalloc()d buffer. This provides the buffer + * with the required contiguous memory region but backed by discontiguous + * physical pages. vmalloc() typically doesn't fail, but it can and so we may + * need to wrap the allocation in a loop to prevent low memory failures and + * shutdowns. */ static int xfs_buf_alloc_folios( @@ -416,7 +397,7 @@ xfs_buf_alloc_folios( xfs_buf_flags_t flags) { gfp_t gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN; - long filled = 0; + unsigned nofs_flag; if (flags & XBF_READ_AHEAD) gfp_mask |= __GFP_NORETRY; @@ -425,89 +406,32 @@ xfs_buf_alloc_folios( if (!(flags & XBF_READ)) gfp_mask |= __GFP_ZERO; - /* Optimistically attempt a single high order folio allocation. */ - if (xfs_buf_alloc_folio(bp, gfp_mask)) - return 0; - /* Fall back to allocating an array of single page folios. */ bp->b_folio_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE); - if (bp->b_folio_count <= XB_FOLIOS) { - bp->b_folios = bp->b_folio_array; - } else { - bp->b_folios = kzalloc(sizeof(struct folio *) * bp->b_folio_count, - gfp_mask); - if (!bp->b_folios) - return -ENOMEM; - } - bp->b_flags |= _XBF_FOLIOS; + /* Optimistically attempt a single high order folio allocation. */ + if (xfs_buf_alloc_folio(bp, gfp_mask)) + return 0; + + /* We are done if an order-0 allocation has already failed. */ + if (bp->b_folio_count == 1) + return -ENOMEM; /* - * Bulk filling of pages can take multiple calls. Not filling the entire - * array is not an allocation failure, so don't back off if we get at - * least one extra page. + * XXX(dgc): I think dquot reclaim is the only place we can get + * to this function from memory reclaim context now. If we fix + * that like we've fixed inode reclaim to avoid writeback from + * reclaim, this nofs wrapping can go away. */ - for (;;) { - long last = filled; - - filled = alloc_pages_bulk_array(gfp_mask, bp->b_folio_count, - (struct page **)bp->b_folios); - if (filled == bp->b_folio_count) { - XFS_STATS_INC(bp->b_mount, xb_page_found); - break; - } - - if (filled != last) - continue; - - if (flags & XBF_READ_AHEAD) { - xfs_buf_free_folios(bp); - return -ENOMEM; - } - - XFS_STATS_INC(bp->b_mount, xb_page_retries); - memalloc_retry_wait(gfp_mask); - } - - if (bp->b_folio_count == 1) { - /* A single folio buffer is always mappable */ - bp->b_addr = folio_address(bp->b_folios[0]); - } else { - int retried = 0; - unsigned nofs_flag; - - /* - * vm_map_ram() will allocate auxiliary structures (e.g. - * pagetables) with GFP_KERNEL, yet we often under a scoped nofs - * context here. Mixing GFP_KERNEL with GFP_NOFS allocations - * from the same call site that can be run from both above and - * below memory reclaim causes lockdep false positives. Hence we - * always need to force this allocation to nofs context because - * we can't pass __GFP_NOLOCKDEP down to auxillary structures to - * prevent false positive lockdep reports. - * - * XXX(dgc): I think dquot reclaim is the only place we can get - * to this function from memory reclaim context now. If we fix - * that like we've fixed inode reclaim to avoid writeback from - * reclaim, this nofs wrapping can go away. - */ - nofs_flag = memalloc_nofs_save(); - do { - bp->b_addr = vm_map_ram((struct page **)bp->b_folios, - bp->b_folio_count, -1); - if (bp->b_addr) - break; - vm_unmap_aliases(); - } while (retried++ <= 1); - memalloc_nofs_restore(nofs_flag); - - if (!bp->b_addr) { - xfs_warn_ratelimited(bp->b_mount, - "%s: failed to map %u folios", __func__, - bp->b_folio_count); - xfs_buf_free_folios(bp); - return -ENOMEM; - } + nofs_flag = memalloc_nofs_save(); + bp->b_addr = __vmalloc(BBTOB(bp->b_length), gfp_mask); + memalloc_nofs_restore(nofs_flag); + + if (!bp->b_addr) { + xfs_warn_ratelimited(bp->b_mount, + "%s: failed to allocate %u folios", __func__, + bp->b_folio_count); + return -ENOMEM; } return 0; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 4d515407713b..68c24947ca1a 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -190,8 +190,6 @@ struct xfs_buf { struct xfs_buf_log_item *b_log_item; struct list_head b_li_list; /* Log items list head */ struct xfs_trans *b_transp; - struct folio **b_folios; /* array of folio pointers */ - struct folio *b_folio_array[XB_FOLIOS]; /* inline folios */ struct xfs_buf_map *b_maps; /* compound buffer map */ struct xfs_buf_map __b_map; /* inline compound buffer map */ int b_map_count; diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index 26734c64c10e..336e7c8effb7 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -169,8 +169,6 @@ xmbuf_map_folio( unlock_page(page); bp->b_addr = page_address(page); - bp->b_folios = bp->b_folio_array; - bp->b_folios[0] = folio; bp->b_folio_count = 1; return 0; } @@ -180,15 +178,10 @@ void xmbuf_unmap_folio( struct xfs_buf *bp) { - struct folio *folio = bp->b_folios[0]; - ASSERT(xfs_buftarg_is_mem(bp->b_target)); - folio_put(folio); - + folio_put(kmem_to_folio(bp->b_addr)); bp->b_addr = NULL; - bp->b_folios[0] = NULL; - bp->b_folios = NULL; bp->b_folio_count = 0; } -- 2.43.0