On Sun, 2023-11-05 at 18:44 -0800, Mina Almasry wrote: > Overload the LSB of struct page* to indicate that it's a page_pool_iov. > > Refactor mm calls on struct page* into helpers, and add page_pool_iov > handling on those helpers. Modify callers of these mm APIs with calls to > these helpers instead. > > In areas where struct page* is dereferenced, add a check for special > handling of page_pool_iov. > > Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx> > > --- > include/net/page_pool/helpers.h | 74 ++++++++++++++++++++++++++++++++- > net/core/page_pool.c | 63 ++++++++++++++++++++-------- > 2 files changed, 118 insertions(+), 19 deletions(-) > > diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h > index b93243c2a640..08f1a2cc70d2 100644 > --- a/include/net/page_pool/helpers.h > +++ b/include/net/page_pool/helpers.h > @@ -151,6 +151,64 @@ static inline struct page_pool_iov *page_to_page_pool_iov(struct page *page) > return NULL; > } > > +static inline int page_pool_page_ref_count(struct page *page) > +{ > + if (page_is_page_pool_iov(page)) > + return page_pool_iov_refcount(page_to_page_pool_iov(page)); > + > + return page_ref_count(page); > +} > + > +static inline void page_pool_page_get_many(struct page *page, > + unsigned int count) > +{ > + if (page_is_page_pool_iov(page)) > + return page_pool_iov_get_many(page_to_page_pool_iov(page), > + count); > + > + return page_ref_add(page, count); > +} > + > +static inline void page_pool_page_put_many(struct page *page, > + unsigned int count) > +{ > + if (page_is_page_pool_iov(page)) > + return page_pool_iov_put_many(page_to_page_pool_iov(page), > + count); > + > + if (count > 1) > + page_ref_sub(page, count - 1); > + > + put_page(page); > +} > + > +static inline bool page_pool_page_is_pfmemalloc(struct page *page) > +{ > + if (page_is_page_pool_iov(page)) > + return false; > + > + return page_is_pfmemalloc(page); > +} > + > +static inline bool page_pool_page_is_pref_nid(struct page *page, int pref_nid) > +{ > + /* Assume page_pool_iov are on the preferred node without actually > + * checking... > + * > + * This check is only used to check for recycling memory in the page > + * pool's fast paths. Currently the only implementation of page_pool_iov > + * is dmabuf device memory. It's a deliberate decision by the user to > + * bind a certain dmabuf to a certain netdev, and the netdev rx queue > + * would not be able to reallocate memory from another dmabuf that > + * exists on the preferred node, so, this check doesn't make much sense > + * in this case. Assume all page_pool_iovs can be recycled for now. > + */ > + if (page_is_page_pool_iov(page)) > + return true; > + > + return page_to_nid(page) == pref_nid; > +} > + > /** > * page_pool_dev_alloc_pages() - allocate a page. > * @pool: pool from which to allocate > @@ -301,6 +359,9 @@ static inline long page_pool_defrag_page(struct page *page, long nr) > { > long ret; > > + if (page_is_page_pool_iov(page)) > + return -EINVAL; > + > /* If nr == pp_frag_count then we have cleared all remaining > * references to the page: > * 1. 'n == 1': no need to actually overwrite it. > @@ -431,7 +492,12 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, > */ > static inline dma_addr_t page_pool_get_dma_addr(struct page *page) > { > - dma_addr_t ret = page->dma_addr; > + dma_addr_t ret; > + > + if (page_is_page_pool_iov(page)) > + return page_pool_iov_dma_addr(page_to_page_pool_iov(page)); Should the above conditional be guarded by the page_pool_mem_providers static key? this looks like fast-path. Same question for the refcount helper above. Minor nit: possibly cache 'page_is_page_pool_iov(page)' to make the code more readable. > + > + ret = page->dma_addr; > > if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) > ret <<= PAGE_SHIFT; > @@ -441,6 +507,12 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page) > > static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) > { > + /* page_pool_iovs are mapped and their dma-addr can't be modified. */ > + if (page_is_page_pool_iov(page)) { > + DEBUG_NET_WARN_ON_ONCE(true); > + return false; > + } Quickly skimming over the page_pool_code it looks like page_pool_set_dma_addr() usage is guarded by the PP_FLAG_DMA_MAP page pool flag. Could the device mem provider enforce such flag being cleared on the page pool? > + > if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { > page->dma_addr = addr >> PAGE_SHIFT; > > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index 138ddea0b28f..d211996d423b 100644 > --- a/net/core/page_pool.cnn > +++ b/net/core/page_pool.c > @@ -317,7 +317,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) > if (unlikely(!page)) > break; > > - if (likely(page_to_nid(page) == pref_nid)) { > + if (likely(page_pool_page_is_pref_nid(page, pref_nid))) { > pool->alloc.cache[pool->alloc.count++] = page; > } else { > /* NUMA mismatch; > @@ -362,7 +362,15 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool, > struct page *page, > unsigned int dma_sync_size) > { > - dma_addr_t dma_addr = page_pool_get_dma_addr(page); > + dma_addr_t dma_addr; > + > + /* page_pool_iov memory provider do not support PP_FLAG_DMA_SYNC_DEV */ > + if (page_is_page_pool_iov(page)) { > + DEBUG_NET_WARN_ON_ONCE(true); > + return; > + } Similar to the above point, mutatis mutandis. > + > + dma_addr = page_pool_get_dma_addr(page); > > dma_sync_size = min(dma_sync_size, pool->p.max_len); > dma_sync_single_range_for_device(pool->p.dev, dma_addr, > @@ -374,6 +382,12 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) > { > dma_addr_t dma; > > + if (page_is_page_pool_iov(page)) { > + /* page_pool_iovs are already mapped */ > + DEBUG_NET_WARN_ON_ONCE(true); > + return true; > + } Ditto. Cheers, Paolo