On Mon, 2025-02-17 at 20:31 +0800, Yunsheng Lin wrote: > As mentioned in [1], it seems odd to check NULL elements in > the middle of page bulk allocating, and it seems caller can > do a better job of bulk allocating pages into a whole array > sequentially without checking NULL elements first before > doing the page bulk allocation. > > Remove the above checking also enable the caller to not > zero the array before calling the page bulk allocating API, > which has about 1~2 ns performance improvement for the test > case of time_bench_page_pool03_slow() for page_pool in a > x86 vm system, this reduces some performance impact of > fixing the DMA API misuse problem in [2], performance > improves from 87.886 ns to 86.429 ns. > > 1. https://lore.kernel.org/all/bd8c2f5c-464d-44ab-b607-390a87ea4cd5@xxxxxxxxxx/ > 2. https://lore.kernel.org/all/20250212092552.1779679-1-linyunsheng@xxxxxxxxxx/ > CC: Jesper Dangaard Brouer <hawk@xxxxxxxxxx> > CC: Luiz Capitulino <luizcap@xxxxxxxxxx> > CC: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> > Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx> > --- > drivers/vfio/pci/virtio/migrate.c | 2 -- > fs/btrfs/extent_io.c | 8 +++++--- > fs/erofs/zutil.c | 12 ++++++------ > fs/xfs/xfs_buf.c | 9 +++++---- > mm/page_alloc.c | 32 +++++-------------------------- > net/core/page_pool.c | 3 --- > net/sunrpc/svc_xprt.c | 9 +++++---- > 7 files changed, 26 insertions(+), 49 deletions(-) > > diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c > index ba92bb4e9af9..9f003a237dec 100644 > --- a/drivers/vfio/pci/virtio/migrate.c > +++ b/drivers/vfio/pci/virtio/migrate.c > @@ -91,8 +91,6 @@ static int virtiovf_add_migration_pages(struct virtiovf_data_buffer *buf, > if (ret) > goto err_append; > buf->allocated_length += filled * PAGE_SIZE; > - /* clean input for another bulk allocation */ > - memset(page_list, 0, filled * sizeof(*page_list)); > to_fill = min_t(unsigned int, to_alloc, > PAGE_SIZE / sizeof(*page_list)); > } while (to_alloc > 0); > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index b2fae67f8fa3..d0466d795cca 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -626,10 +626,12 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, > unsigned int allocated; > > for (allocated = 0; allocated < nr_pages;) { > - unsigned int last = allocated; > + unsigned int new_allocated; > > - allocated = alloc_pages_bulk(gfp, nr_pages, page_array); > - if (unlikely(allocated == last)) { > + new_allocated = alloc_pages_bulk(gfp, nr_pages - allocated, > + page_array + allocated); > + allocated += new_allocated; > + if (unlikely(!new_allocated)) { > /* No progress, fail and do cleanup. */ > for (int i = 0; i < allocated; i++) { > __free_page(page_array[i]); > diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c > index 55ff2ab5128e..1c50b5e27371 100644 > --- a/fs/erofs/zutil.c > +++ b/fs/erofs/zutil.c > @@ -85,13 +85,13 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) > > for (j = 0; j < gbuf->nrpages; ++j) > tmp_pages[j] = gbuf->pages[j]; > - do { > - last = j; > - j = alloc_pages_bulk(GFP_KERNEL, nrpages, > - tmp_pages); > - if (last == j) > + > + for (last = j; last < nrpages; last += j) { > + j = alloc_pages_bulk(GFP_KERNEL, nrpages - last, > + tmp_pages + last); > + if (!j) > goto out; > - } while (j != nrpages); > + } > > ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL); > if (!ptr) > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c > index 15bb790359f8..9e1ce0ab9c35 100644 > --- a/fs/xfs/xfs_buf.c > +++ b/fs/xfs/xfs_buf.c > @@ -377,16 +377,17 @@ xfs_buf_alloc_pages( > * least one extra page. > */ > for (;;) { > - long last = filled; > + long alloc; > > - filled = alloc_pages_bulk(gfp_mask, bp->b_page_count, > - bp->b_pages); > + alloc = alloc_pages_bulk(gfp_mask, bp->b_page_count - refill, > + bp->b_pages + refill); > + refill += alloc; > if (filled == bp->b_page_count) { > XFS_STATS_INC(bp->b_mount, xb_page_found); > break; > } > > - if (filled != last) > + if (alloc) > continue; > > if (flags & XBF_READ_AHEAD) { > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 579789600a3c..e0309532b6c4 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -4541,9 +4541,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, > * This is a batched version of the page allocator that attempts to > * allocate nr_pages quickly. Pages are added to the page_array. > * > - * Note that only NULL elements are populated with pages and nr_pages > - * is the maximum number of pages that will be stored in the array. > - * > * Returns the number of pages in the array. > */ > unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, > @@ -4559,29 +4556,18 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, > struct alloc_context ac; > gfp_t alloc_gfp; > unsigned int alloc_flags = ALLOC_WMARK_LOW; > - int nr_populated = 0, nr_account = 0; > - > - /* > - * Skip populated array elements to determine if any pages need > - * to be allocated before disabling IRQs. > - */ > - while (nr_populated < nr_pages && page_array[nr_populated]) > - nr_populated++; > + int nr_populated = 0; > > /* No pages requested? */ > if (unlikely(nr_pages <= 0)) > goto out; > > - /* Already populated array? */ > - if (unlikely(nr_pages - nr_populated == 0)) > - goto out; > - > /* Bulk allocator does not support memcg accounting. */ > if (memcg_kmem_online() && (gfp & __GFP_ACCOUNT)) > goto failed; > > /* Use the single page allocator for one page. */ > - if (nr_pages - nr_populated == 1) > + if (nr_pages == 1) > goto failed; > > #ifdef CONFIG_PAGE_OWNER > @@ -4653,24 +4639,16 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, > /* Attempt the batch allocation */ > pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)]; > while (nr_populated < nr_pages) { > - > - /* Skip existing pages */ > - if (page_array[nr_populated]) { > - nr_populated++; > - continue; > - } > - > page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags, > pcp, pcp_list); > if (unlikely(!page)) { > /* Try and allocate at least one page */ > - if (!nr_account) { > + if (!nr_populated) { > pcp_spin_unlock(pcp); > goto failed_irq; > } > break; > } > - nr_account++; > > prep_new_page(page, 0, gfp, 0); > set_page_refcounted(page); > @@ -4680,8 +4658,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, > pcp_spin_unlock(pcp); > pcp_trylock_finish(UP_flags); > > - __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); > - zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_account); > + __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_populated); > + zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_populated); > > out: > return nr_populated; > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index f5e908c9e7ad..ae9e8c78e4bb 100644 > --- a/net/core/page_pool.c > +++ b/net/core/page_pool.c > @@ -536,9 +536,6 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, > if (unlikely(pool->alloc.count > 0)) > return pool->alloc.cache[--pool->alloc.count]; > > - /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */ > - memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); > - > nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk, > (struct page **)pool->alloc.cache); > if (unlikely(!nr_pages)) > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > index ae25405d8bd2..6321a4d2f2be 100644 > --- a/net/sunrpc/svc_xprt.c > +++ b/net/sunrpc/svc_xprt.c > @@ -663,9 +663,10 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) > pages = RPCSVC_MAXPAGES; > } > > - for (filled = 0; filled < pages; filled = ret) { > - ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages); > - if (ret > filled) > + for (filled = 0; filled < pages; filled += ret) { > + ret = alloc_pages_bulk(GFP_KERNEL, pages - filled, > + rqstp->rq_pages + filled); > + if (ret) > /* Made progress, don't sleep yet */ > continue; > > @@ -674,7 +675,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) > set_current_state(TASK_RUNNING); > return false; > } > - trace_svc_alloc_arg_err(pages, ret); > + trace_svc_alloc_arg_err(pages, filled); > memalloc_retry_wait(GFP_KERNEL); > } > rqstp->rq_page_end = &rqstp->rq_pages[pages]; I agree that the API is cumbersome and weird as it is today. For the sunrpc parts: Acked-by: Jeff Layton <jlayton@xxxxxxxxxx>