Re: [PATCH net-next v22 10/14] mm: page_frag: introduce prepare/probe/commit API

Alexander Duyck <alexander.duyck@xxxxxxxxx> · Fri, 18 Oct 2024 11:03:03 -0700

On Fri, Oct 18, 2024 at 4:00 AM Yunsheng Lin <linyunsheng@xxxxxxxxxx> wrote:
>
> There are many use cases that need minimum memory in order
> for forward progress, but more performant if more memory is
> available or need to probe the cache info to use any memory
> available for frag caoleasing reason.
>
> Currently skb_page_frag_refill() API is used to solve the
> above use cases, but caller needs to know about the internal
> detail and access the data field of 'struct page_frag' to
> meet the requirement of the above use cases and its
> implementation is similar to the one in mm subsystem.
>
> To unify those two page_frag implementations, introduce a
> prepare API to ensure minimum memory is satisfied and return
> how much the actual memory is available to the caller and a
> probe API to report the current available memory to caller
> without doing cache refilling. The caller needs to either call
> the commit API to report how much memory it actually uses, or
> not do so if deciding to not use any memory.
>
> CC: Alexander Duyck <alexander.duyck@xxxxxxxxx>
> Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
> ---
>  include/linux/page_frag_cache.h | 130 ++++++++++++++++++++++++++++++++
>  mm/page_frag_cache.c            |  21 ++++++
>  2 files changed, 151 insertions(+)
>
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index feed99d0cddb..1c0c11250b66 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -46,6 +46,10 @@ void *__page_frag_cache_prepare(struct page_frag_cache *nc, unsigned int fragsz,
>  unsigned int __page_frag_cache_commit_noref(struct page_frag_cache *nc,
>                                             struct page_frag *pfrag,
>                                             unsigned int used_sz);
> +void *__page_frag_alloc_refill_probe_align(struct page_frag_cache *nc,
> +                                          unsigned int fragsz,
> +                                          struct page_frag *pfrag,
> +                                          unsigned int align_mask);
>
>  static inline unsigned int __page_frag_cache_commit(struct page_frag_cache *nc,
>                                                     struct page_frag *pfrag,
> @@ -88,6 +92,132 @@ static inline void *page_frag_alloc(struct page_frag_cache *nc,
>         return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
>  }
>
> +static inline bool __page_frag_refill_align(struct page_frag_cache *nc,
> +                                           unsigned int fragsz,
> +                                           struct page_frag *pfrag,
> +                                           gfp_t gfp_mask,
> +                                           unsigned int align_mask)
> +{
> +       if (unlikely(!__page_frag_cache_prepare(nc, fragsz, pfrag, gfp_mask,
> +                                               align_mask)))
> +               return false;
> +
> +       __page_frag_cache_commit(nc, pfrag, fragsz);
> +       return true;
> +}
> +
> +static inline bool page_frag_refill_align(struct page_frag_cache *nc,
> +                                         unsigned int fragsz,
> +                                         struct page_frag *pfrag,
> +                                         gfp_t gfp_mask, unsigned int align)
> +{
> +       WARN_ON_ONCE(!is_power_of_2(align));
> +       return __page_frag_refill_align(nc, fragsz, pfrag, gfp_mask, -align);
> +}
> +
> +static inline bool page_frag_refill(struct page_frag_cache *nc,
> +                                   unsigned int fragsz,
> +                                   struct page_frag *pfrag, gfp_t gfp_mask)
> +{
> +       return __page_frag_refill_align(nc, fragsz, pfrag, gfp_mask, ~0u);
> +}
> +
> +static inline bool __page_frag_refill_prepare_align(struct page_frag_cache *nc,
> +                                                   unsigned int fragsz,
> +                                                   struct page_frag *pfrag,
> +                                                   gfp_t gfp_mask,
> +                                                   unsigned int align_mask)
> +{
> +       return !!__page_frag_cache_prepare(nc, fragsz, pfrag, gfp_mask,
> +                                          align_mask);
> +}
> +
> +static inline bool page_frag_refill_prepare_align(struct page_frag_cache *nc,
> +                                                 unsigned int fragsz,
> +                                                 struct page_frag *pfrag,
> +                                                 gfp_t gfp_mask,
> +                                                 unsigned int align)
> +{
> +       WARN_ON_ONCE(!is_power_of_2(align));
> +       return __page_frag_refill_prepare_align(nc, fragsz, pfrag, gfp_mask,
> +                                               -align);
> +}
> +
> +static inline bool page_frag_refill_prepare(struct page_frag_cache *nc,
> +                                           unsigned int fragsz,
> +                                           struct page_frag *pfrag,
> +                                           gfp_t gfp_mask)
> +{
> +       return __page_frag_refill_prepare_align(nc, fragsz, pfrag, gfp_mask,
> +                                               ~0u);
> +}
> +
> +static inline void *__page_frag_alloc_refill_prepare_align(struct page_frag_cache *nc,
> +                                                          unsigned int fragsz,
> +                                                          struct page_frag *pfrag,
> +                                                          gfp_t gfp_mask,
> +                                                          unsigned int align_mask)
> +{
> +       return __page_frag_cache_prepare(nc, fragsz, pfrag, gfp_mask, align_mask);
> +}
> +
> +static inline void *page_frag_alloc_refill_prepare_align(struct page_frag_cache *nc,
> +                                                        unsigned int fragsz,
> +                                                        struct page_frag *pfrag,
> +                                                        gfp_t gfp_mask,
> +                                                        unsigned int align)
> +{
> +       WARN_ON_ONCE(!is_power_of_2(align));
> +       return __page_frag_alloc_refill_prepare_align(nc, fragsz, pfrag,
> +                                                     gfp_mask, -align);
> +}
> +
> +static inline void *page_frag_alloc_refill_prepare(struct page_frag_cache *nc,
> +                                                  unsigned int fragsz,
> +                                                  struct page_frag *pfrag,
> +                                                  gfp_t gfp_mask)
> +{
> +       return __page_frag_alloc_refill_prepare_align(nc, fragsz, pfrag,
> +                                                     gfp_mask, ~0u);
> +}
> +
> +static inline void *page_frag_alloc_refill_probe(struct page_frag_cache *nc,
> +                                                unsigned int fragsz,
> +                                                struct page_frag *pfrag)
> +{
> +       return __page_frag_alloc_refill_probe_align(nc, fragsz, pfrag, ~0u);
> +}
> +
> +static inline bool page_frag_refill_probe(struct page_frag_cache *nc,
> +                                         unsigned int fragsz,
> +                                         struct page_frag *pfrag)
> +{
> +       return !!page_frag_alloc_refill_probe(nc, fragsz, pfrag);
> +}
> +
> +static inline void page_frag_commit(struct page_frag_cache *nc,
> +                                   struct page_frag *pfrag,
> +                                   unsigned int used_sz)
> +{
> +       __page_frag_cache_commit(nc, pfrag, used_sz);
> +}
> +
> +static inline void page_frag_commit_noref(struct page_frag_cache *nc,
> +                                         struct page_frag *pfrag,
> +                                         unsigned int used_sz)
> +{
> +       __page_frag_cache_commit_noref(nc, pfrag, used_sz);
> +}
> +

Not a huge fan of introducing a ton of new API calls and then having
to have them all applied at once in the follow-on patches. Ideally the
functions and the header documentation for them would be introduced in
the same patch as well as examples on how it would be used.

I really think we should break these up as some are used in one case,
and others in another and it is a pain to have a pile of abstractions
that are all using these functions in different ways.

> +static inline void page_frag_alloc_abort(struct page_frag_cache *nc,
> +                                        unsigned int fragsz)
> +{
> +       VM_BUG_ON(fragsz > nc->offset);
> +
> +       nc->pagecnt_bias++;
> +       nc->offset -= fragsz;
> +}
> +

We should probably have the same checks here you had on the earlier
commit. We should not be allowing blind changes. If we are using the
commit or abort interfaces we should be verifying a page frag with
them to verify that the request to modify this is legitimate.

>  void page_frag_free(void *addr);
>
>  #endif
> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> index f55d34cf7d43..5ea4b663ab8e 100644
> --- a/mm/page_frag_cache.c
> +++ b/mm/page_frag_cache.c
> @@ -112,6 +112,27 @@ unsigned int __page_frag_cache_commit_noref(struct page_frag_cache *nc,
>  }
>  EXPORT_SYMBOL(__page_frag_cache_commit_noref);
>
> +void *__page_frag_alloc_refill_probe_align(struct page_frag_cache *nc,
> +                                          unsigned int fragsz,
> +                                          struct page_frag *pfrag,
> +                                          unsigned int align_mask)
> +{
> +       unsigned long encoded_page = nc->encoded_page;
> +       unsigned int size, offset;
> +
> +       size = PAGE_SIZE << encoded_page_decode_order(encoded_page);
> +       offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
> +       if (unlikely(!encoded_page || offset + fragsz > size))
> +               return NULL;
> +
> +       pfrag->page = encoded_page_decode_page(encoded_page);
> +       pfrag->size = size - offset;
> +       pfrag->offset = offset;
> +
> +       return encoded_page_decode_virt(encoded_page) + offset;
> +}
> +EXPORT_SYMBOL(__page_frag_alloc_refill_probe_align);
> +

If I am not mistaken this would be the equivalent of allocating a size
0 fragment right? The only difference is that you are copying out the
"remaining" size, but we could get that from the offset if we knew the
size couldn't we? Would it maybe make sense to look at limiting this
to PAGE_SIZE instead of passing the size of the actual fragment?

>  void *__page_frag_cache_prepare(struct page_frag_cache *nc, unsigned int fragsz,
>                                 struct page_frag *pfrag, gfp_t gfp_mask,
>                                 unsigned int align_mask)
> --
> 2.33.0
>