Hi Mina, Apologies for not participating in the party earlier. On Fri, 8 Dec 2023 at 02:52, Mina Almasry <almasrymina@xxxxxxxxxx> wrote: > > From: Jakub Kicinski <kuba@xxxxxxxxxx> > > The page providers which try to reuse the same pages will > need to hold onto the ref, even if page gets released from > the pool - as in releasing the page from the pp just transfers > the "ownership" reference from pp to the provider, and provider > will wait for other references to be gone before feeding this > page back into the pool. > > Signed-off-by: Jakub Kicinski <kuba@xxxxxxxxxx> > Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx> > > --- > > This is implemented by Jakub in his RFC: > https://lore.kernel.org/netdev/f8270765-a27b-6ccf-33ea-cda097168d79@xxxxxxxxxx/T/ > > I take no credit for the idea or implementation; I only added minor > edits to make this workable with device memory TCP, and removed some > hacky test code. This is a critical dependency of device memory TCP > and thus I'm pulling it into this series to make it revewable and > mergable. > > RFC v3 -> v1 > - Removed unusued mem_provider. (Yunsheng). > - Replaced memory_provider & mp_priv with netdev_rx_queue (Jakub). > > --- > include/net/page_pool/types.h | 12 ++++++++++ > net/core/page_pool.c | 43 +++++++++++++++++++++++++++++++---- > 2 files changed, 50 insertions(+), 5 deletions(-) > > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h > index ac286ea8ce2d..0e9fa79a5ef1 100644 > --- a/include/net/page_pool/types.h > +++ b/include/net/page_pool/types.h > @@ -51,6 +51,7 @@ struct pp_alloc_cache { > * @dev: device, for DMA pre-mapping purposes > * @netdev: netdev this pool will serve (leave as NULL if none or multiple) > * @napi: NAPI which is the sole consumer of pages, otherwise NULL > + * @queue: struct netdev_rx_queue this page_pool is being created for. > * @dma_dir: DMA mapping direction > * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV > * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV > @@ -63,6 +64,7 @@ struct page_pool_params { > int nid; > struct device *dev; > struct napi_struct *napi; > + struct netdev_rx_queue *queue; > enum dma_data_direction dma_dir; > unsigned int max_len; > unsigned int offset; > @@ -125,6 +127,13 @@ struct page_pool_stats { > }; > #endif > > +struct memory_provider_ops { > + int (*init)(struct page_pool *pool); > + void (*destroy)(struct page_pool *pool); > + struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp); > + bool (*release_page)(struct page_pool *pool, struct page *page); > +}; > + > struct page_pool { > struct page_pool_params_fast p; > > @@ -174,6 +183,9 @@ struct page_pool { > */ > struct ptr_ring ring; > > + void *mp_priv; > + const struct memory_provider_ops *mp_ops; > + > #ifdef CONFIG_PAGE_POOL_STATS > /* recycle stats are per-cpu to avoid locking */ > struct page_pool_recycle_stats __percpu *recycle_stats; > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index ca1b3b65c9b5..f5c84d2a4510 100644 > --- a/net/core/page_pool.c > +++ b/net/core/page_pool.c > @@ -25,6 +25,8 @@ > > #include "page_pool_priv.h" > > +static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); We could add the existing page pool mechanisms as another 'provider', but I assume this is coded like this for performance reasons (IOW skip the expensive ptr call for the default case?) > + > #define DEFER_TIME (msecs_to_jiffies(1000)) > #define DEFER_WARN_INTERVAL (60 * HZ) > > @@ -174,6 +176,7 @@ static int page_pool_init(struct page_pool *pool, > const struct page_pool_params *params) > { > unsigned int ring_qsize = 1024; /* Default */ > + int err; > > memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); > memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); > @@ -234,10 +237,25 @@ static int page_pool_init(struct page_pool *pool, > /* Driver calling page_pool_create() also call page_pool_destroy() */ > refcount_set(&pool->user_cnt, 1); > > + if (pool->mp_ops) { > + err = pool->mp_ops->init(pool); > + if (err) { > + pr_warn("%s() mem-provider init failed %d\n", > + __func__, err); > + goto free_ptr_ring; > + } > + > + static_branch_inc(&page_pool_mem_providers); > + } > + > if (pool->p.flags & PP_FLAG_DMA_MAP) > get_device(pool->p.dev); > > return 0; > + > +free_ptr_ring: > + ptr_ring_cleanup(&pool->ring, NULL); > + return err; > } > > static void page_pool_uninit(struct page_pool *pool) > @@ -519,7 +537,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) > return page; > > /* Slow-path: cache empty, do real allocation */ > - page = __page_pool_alloc_pages_slow(pool, gfp); > + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) Why do we need && pool->mp_ops? On the init function, we only bump page_pool_mem_providers if the ops are there > + page = pool->mp_ops->alloc_pages(pool, gfp); > + else > + page = __page_pool_alloc_pages_slow(pool, gfp); > return page; > } > EXPORT_SYMBOL(page_pool_alloc_pages); > @@ -576,10 +597,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) > void page_pool_return_page(struct page_pool *pool, struct page *page) > { > int count; > + bool put; > > - __page_pool_release_page_dma(pool, page); > - > - page_pool_clear_pp_info(page); > + put = true; > + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) ditto > + put = pool->mp_ops->release_page(pool, page); > + else > + __page_pool_release_page_dma(pool, page); > > /* This may be the last page returned, releasing the pool, so > * it is not safe to reference pool afterwards. > @@ -587,7 +611,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page) > count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); > trace_page_pool_state_release(pool, page, count); > > - put_page(page); > + if (put) { > + page_pool_clear_pp_info(page); > + put_page(page); > + } > /* An optimization would be to call __free_pages(page, pool->p.order) > * knowing page is not part of page-cache (thus avoiding a > * __page_cache_release() call). > @@ -857,6 +884,12 @@ static void __page_pool_destroy(struct page_pool *pool) > > page_pool_unlist(pool); > page_pool_uninit(pool); > + > + if (pool->mp_ops) { Same here. Using a mix of pool->mp_ops and page_pool_mem_providers will work, but since we always check the ptr on init, can't we simply rely on page_pool_mem_providers for the rest of the code? Thanks /Ilias > + pool->mp_ops->destroy(pool); > + static_branch_dec(&page_pool_mem_providers); > + } > + > kfree(pool); > } > > -- > 2.43.0.472.g3155946c3a-goog >