Implement a memory provider that allocates dmabuf devmem in the form of net_iov. The provider receives a reference to the struct netdev_dmabuf_binding via the pool->mp_priv pointer. The driver needs to set this pointer for the provider in the net_iov. The provider obtains a reference on the netdev_dmabuf_binding which guarantees the binding and the underlying mapping remains alive until the provider is destroyed. Usage of PP_FLAG_DMA_MAP is required for this memory provide such that the page_pool can provide the driver with the dma-addrs of the devmem. Support for PP_FLAG_DMA_SYNC_DEV is omitted for simplicity & p.order != 0. Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx> Signed-off-by: Kaiyuan Zhang <kaiyuanz@xxxxxxxxxx> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx> --- v8: - Use skb_frag_size instead of frag->bv_len to fix patch-by-patch build error v6: - refactor new memory provider functions into net/core/devmem.c (Pavel) v2: - Disable devmem for p.order != 0 v1: - static_branch check in page_is_page_pool_iov() (Willem & Paolo). - PP_DEVMEM -> PP_IOV (David). - Require PP_FLAG_DMA_MAP (Jakub). --- include/net/netmem.h | 15 ++++++ include/net/page_pool/helpers.h | 22 +++++++++ include/net/page_pool/types.h | 2 + net/core/devmem.c | 83 +++++++++++++++++++++++++++++++++ net/core/page_pool.c | 38 +++++++-------- 5 files changed, 138 insertions(+), 22 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index 74eeaa34883e..34aa1c80c1ca 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -126,6 +126,21 @@ static inline struct page *netmem_to_page(netmem_ref netmem) return (__force struct page *)netmem; } +static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + return (struct net_iov *)((__force unsigned long)netmem & + ~NET_IOV); + + DEBUG_NET_WARN_ON_ONCE(true); + return NULL; +} + +static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) +{ + return (__force netmem_ref)((unsigned long)niov | NET_IOV); +} + static inline netmem_ref page_to_netmem(struct page *page) { return (__force netmem_ref)page; diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index c6a55eddefae..eb736506c3ce 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -453,4 +453,26 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) page_pool_update_nid(pool, new_nid); } +static inline void page_pool_set_pp_info(struct page_pool *pool, + netmem_ref netmem) +{ + netmem_set_pp(netmem, pool); + netmem_or_pp_magic(netmem, PP_SIGNATURE); + + /* Ensuring all pages have been split into one fragment initially: + * page_pool_set_pp_info() is only called once for every page when it + * is allocated from the page allocator and page_pool_fragment_page() + * is dirtying the same cache line as the page->pp_magic above, so + * the overhead is negligible. + */ + page_pool_fragment_netmem(netmem, 1); + if (pool->has_init_callback) + pool->slow.init_callback(netmem, pool->slow.init_arg); +} + +static inline void page_pool_clear_pp_info(netmem_ref netmem) +{ + netmem_clear_pp_magic(netmem); + netmem_set_pp(netmem, NULL); +} #endif /* _NET_PAGE_POOL_HELPERS_H */ diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index f04af1613f59..5b58c9e185a4 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -141,6 +141,8 @@ struct pp_memory_provider_params { void *mp_priv; }; +extern const struct memory_provider_ops dmabuf_devmem_ops; + struct page_pool { struct page_pool_params_fast p; diff --git a/net/core/devmem.c b/net/core/devmem.c index c25ede5f6fb9..01337de7d6a4 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -204,6 +204,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, * the driver may read this config while it's creating its * rx-queues. * WRITE_ONCE() here to match the READ_ONCE() in the driver. */ + WRITE_ONCE(rxq->mp_params.mp_ops, &dmabuf_devmem_ops); WRITE_ONCE(rxq->mp_params.mp_priv, binding); err = net_devmem_restart_rx_queue(dev, rxq_idx); @@ -340,3 +341,85 @@ int net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, return err; } #endif + +/*** "Dmabuf devmem memory provider" ***/ + +static int mp_dmabuf_devmem_init(struct page_pool *pool) +{ + struct net_devmem_dmabuf_binding *binding = pool->mp_priv; + + if (!binding) + return -EINVAL; + + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + return -EOPNOTSUPP; + + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + return -EOPNOTSUPP; + + if (pool->p.order != 0) + return -E2BIG; + + net_devmem_dmabuf_binding_get(binding); + return 0; +} + +static netmem_ref mp_dmabuf_devmem_alloc_pages(struct page_pool *pool, + gfp_t gfp) +{ + struct net_devmem_dmabuf_binding *binding = pool->mp_priv; + netmem_ref netmem; + struct net_iov *niov; + dma_addr_t dma_addr; + + niov = net_devmem_alloc_dmabuf(binding); + if (!niov) + return 0; + + dma_addr = net_iov_dma_addr(niov); + + netmem = net_iov_to_netmem(niov); + + page_pool_set_pp_info(pool, netmem); + + if (page_pool_set_dma_addr_netmem(netmem, dma_addr)) + goto err_free; + + pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); + return netmem; + +err_free: + net_devmem_free_dmabuf(niov); + return 0; +} + +static void mp_dmabuf_devmem_destroy(struct page_pool *pool) +{ + struct net_devmem_dmabuf_binding *binding = pool->mp_priv; + + net_devmem_dmabuf_binding_put(binding); +} + +static bool mp_dmabuf_devmem_release_page(struct page_pool *pool, + netmem_ref netmem) +{ + WARN_ON_ONCE(!netmem_is_net_iov(netmem)); + WARN_ON_ONCE(atomic_long_read(netmem_get_pp_ref_count_ref(netmem)) != + 1); + + page_pool_clear_pp_info(netmem); + + net_devmem_free_dmabuf(netmem_to_net_iov(netmem)); + + /* We don't want the page pool put_page()ing our net_iovs. */ + return false; +} + +const struct memory_provider_ops dmabuf_devmem_ops = { + .init = mp_dmabuf_devmem_init, + .destroy = mp_dmabuf_devmem_destroy, + .alloc_pages = mp_dmabuf_devmem_alloc_pages, + .release_page = mp_dmabuf_devmem_release_page, +}; +EXPORT_SYMBOL(dmabuf_devmem_ops); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index c7bffd08218b..a0544b680e8a 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -12,6 +12,7 @@ #include <net/page_pool/helpers.h> #include <net/xdp.h> +#include <net/netdev_rx_queue.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> @@ -20,12 +21,15 @@ #include <linux/poison.h> #include <linux/ethtool.h> #include <linux/netdevice.h> +#include <linux/genalloc.h> +#include <net/devmem.h> #include <trace/events/page_pool.h> #include "page_pool_priv.h" DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); +EXPORT_SYMBOL(page_pool_mem_providers); #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) @@ -178,7 +182,9 @@ static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params, int cpuid) { + const struct memory_provider_ops *mp_ops = NULL; unsigned int ring_qsize = 1024; /* Default */ + void *mp_priv = NULL; int err; memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); @@ -251,6 +257,16 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); + if (pool->p.queue) { + mp_ops = READ_ONCE(pool->p.queue->mp_params.mp_ops); + mp_priv = READ_ONCE(pool->p.queue->mp_params.mp_priv); + } + + if (mp_ops && mp_priv) { + pool->mp_ops = mp_ops; + pool->mp_priv = mp_priv; + } + if (pool->mp_ops) { err = pool->mp_ops->init(pool); if (err) { @@ -444,28 +460,6 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) return false; } -static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) -{ - netmem_set_pp(netmem, pool); - netmem_or_pp_magic(netmem, PP_SIGNATURE); - - /* Ensuring all pages have been split into one fragment initially: - * page_pool_set_pp_info() is only called once for every page when it - * is allocated from the page allocator and page_pool_fragment_page() - * is dirtying the same cache line as the page->pp_magic above, so - * the overhead is negligible. - */ - page_pool_fragment_netmem(netmem, 1); - if (pool->has_init_callback) - pool->slow.init_callback(netmem, pool->slow.init_arg); -} - -static void page_pool_clear_pp_info(netmem_ref netmem) -{ - netmem_clear_pp_magic(netmem); - netmem_set_pp(netmem, NULL); -} - static struct page *__page_pool_alloc_page_order(struct page_pool *pool, gfp_t gfp) { -- 2.44.0.478.gd926399ef9-goog