Re: [PATCH v1 1/2] mm: zswap: Modified zswap_store_page() to process multiple pages in a folio.

Yosry Ahmed <yosryahmed@xxxxxxxxxx> · Mon, 2 Dec 2024 11:33:51 -0800

On Wed, Nov 27, 2024 at 2:53 PM Kanchana P Sridhar
<kanchana.p.sridhar@xxxxxxxxx> wrote:
>
> Modified zswap_store() to store the folio in batches of
> SWAP_CRYPTO_BATCH_SIZE pages. Accordingly, refactored zswap_store_page()
> into zswap_store_pages() that processes a range of pages in the folio.
> zswap_store_pages() is a vectorized version of zswap_store_page().
>
> For now, zswap_store_pages() will sequentially compress these pages with
> zswap_compress().
>
> These changes are follow-up to code review comments received for [1], and
> are intended to set up zswap_store() for batching with Intel IAA.
>
> [1]: https://patchwork.kernel.org/project/linux-mm/patch/20241123070127.332773-11-kanchana.p.sridhar@xxxxxxxxx/
>
> Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@xxxxxxxxx>
> ---
>  include/linux/zswap.h |   1 +
>  mm/zswap.c            | 154 ++++++++++++++++++++++++------------------
>  2 files changed, 88 insertions(+), 67 deletions(-)
>
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index d961ead91bf1..05a81e750744 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -7,6 +7,7 @@
>
>  struct lruvec;
>
> +#define SWAP_CRYPTO_BATCH_SIZE 8UL
>  extern atomic_long_t zswap_stored_pages;
>
>  #ifdef CONFIG_ZSWAP
> diff --git a/mm/zswap.c b/mm/zswap.c
> index f6316b66fb23..b09d1023e775 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1409,78 +1409,96 @@ static void shrink_worker(struct work_struct *w)
>  * main API
>  **********************************/
>
> -static ssize_t zswap_store_page(struct page *page,
> -                               struct obj_cgroup *objcg,
> -                               struct zswap_pool *pool)
> +/*
> + * Store multiple pages in @folio, starting from the page at index @si up to
> + * and including the page at index @ei.
> + */
> +static ssize_t zswap_store_pages(struct folio *folio,
> +                                long si,
> +                                long ei,
> +                                struct obj_cgroup *objcg,
> +                                struct zswap_pool *pool)
>  {
> -       swp_entry_t page_swpentry = page_swap_entry(page);
> +       struct page *page;
> +       swp_entry_t page_swpentry;
>         struct zswap_entry *entry, *old;
> +       size_t compressed_bytes = 0;
> +       u8 nr_pages = ei - si + 1;
> +       u8 i;
> +
> +       for (i = 0; i < nr_pages; ++i) {
> +               page = folio_page(folio, si + i);
> +               page_swpentry = page_swap_entry(page);
> +
> +               /* allocate entry */
> +               entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> +               if (!entry) {
> +                       zswap_reject_kmemcache_fail++;
> +                       return -EINVAL;
> +               }

I think this patch is wrong on its own, for example if an allocation
fails in the above loop we exit without cleaning up previous
allocations. I think it's fixed in patch 2 but we cannot introduce
bugs in-between patches. I think the helpers in patch 2 don't really
help as I mentioned. Please combine the changes and keep them in the
main series (unless you have a reason not to).

>
> -       /* allocate entry */
> -       entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
> -       if (!entry) {
> -               zswap_reject_kmemcache_fail++;
> -               return -EINVAL;
> -       }
> -
> -       if (!zswap_compress(page, entry, pool))
> -               goto compress_failed;
> +               if (!zswap_compress(page, entry, pool))
> +                       goto compress_failed;
>
> -       old = xa_store(swap_zswap_tree(page_swpentry),
> -                      swp_offset(page_swpentry),
> -                      entry, GFP_KERNEL);
> -       if (xa_is_err(old)) {
> -               int err = xa_err(old);
> +               old = xa_store(swap_zswap_tree(page_swpentry),
> +                              swp_offset(page_swpentry),
> +                              entry, GFP_KERNEL);
> +               if (xa_is_err(old)) {
> +                       int err = xa_err(old);
>
> -               WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> -               zswap_reject_alloc_fail++;
> -               goto store_failed;
> -       }
> +                       WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> +                       zswap_reject_alloc_fail++;
> +                       goto store_failed;
> +               }
>
> -       /*
> -        * We may have had an existing entry that became stale when
> -        * the folio was redirtied and now the new version is being
> -        * swapped out. Get rid of the old.
> -        */
> -       if (old)
> -               zswap_entry_free(old);
> +               /*
> +                * We may have had an existing entry that became stale when
> +                * the folio was redirtied and now the new version is being
> +                * swapped out. Get rid of the old.
> +                */
> +               if (old)
> +                       zswap_entry_free(old);
>
> -       /*
> -        * The entry is successfully compressed and stored in the tree, there is
> -        * no further possibility of failure. Grab refs to the pool and objcg.
> -        * These refs will be dropped by zswap_entry_free() when the entry is
> -        * removed from the tree.
> -        */
> -       zswap_pool_get(pool);
> -       if (objcg)
> -               obj_cgroup_get(objcg);
> +               /*
> +                * The entry is successfully compressed and stored in the tree, there is
> +                * no further possibility of failure. Grab refs to the pool and objcg.
> +                * These refs will be dropped by zswap_entry_free() when the entry is
> +                * removed from the tree.
> +                */
> +               zswap_pool_get(pool);
> +               if (objcg)
> +                       obj_cgroup_get(objcg);
>
> -       /*
> -        * We finish initializing the entry while it's already in xarray.
> -        * This is safe because:
> -        *
> -        * 1. Concurrent stores and invalidations are excluded by folio lock.
> -        *
> -        * 2. Writeback is excluded by the entry not being on the LRU yet.
> -        *    The publishing order matters to prevent writeback from seeing
> -        *    an incoherent entry.
> -        */
> -       entry->pool = pool;
> -       entry->swpentry = page_swpentry;
> -       entry->objcg = objcg;
> -       entry->referenced = true;
> -       if (entry->length) {
> -               INIT_LIST_HEAD(&entry->lru);
> -               zswap_lru_add(&zswap_list_lru, entry);
> -       }
> +               /*
> +                * We finish initializing the entry while it's already in xarray.
> +                * This is safe because:
> +                *
> +                * 1. Concurrent stores and invalidations are excluded by folio lock.
> +                *
> +                * 2. Writeback is excluded by the entry not being on the LRU yet.
> +                *    The publishing order matters to prevent writeback from seeing
> +                *    an incoherent entry.
> +                */
> +               entry->pool = pool;
> +               entry->swpentry = page_swpentry;
> +               entry->objcg = objcg;
> +               entry->referenced = true;
> +               if (entry->length) {
> +                       INIT_LIST_HEAD(&entry->lru);
> +                       zswap_lru_add(&zswap_list_lru, entry);
> +               }
>
> -       return entry->length;
> +               compressed_bytes += entry->length;
> +               continue;
>
>  store_failed:
> -       zpool_free(pool->zpool, entry->handle);
> +               zpool_free(pool->zpool, entry->handle);
>  compress_failed:
> -       zswap_entry_cache_free(entry);
> -       return -EINVAL;
> +               zswap_entry_cache_free(entry);
> +               return -EINVAL;
> +       }
> +
> +       return compressed_bytes;
>  }
>
>  bool zswap_store(struct folio *folio)
> @@ -1492,7 +1510,7 @@ bool zswap_store(struct folio *folio)
>         struct zswap_pool *pool;
>         size_t compressed_bytes = 0;
>         bool ret = false;
> -       long index;
> +       long si, ei, incr = SWAP_CRYPTO_BATCH_SIZE;
>
>         VM_WARN_ON_ONCE(!folio_test_locked(folio));
>         VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
> @@ -1526,11 +1544,13 @@ bool zswap_store(struct folio *folio)
>                 mem_cgroup_put(memcg);
>         }
>
> -       for (index = 0; index < nr_pages; ++index) {
> -               struct page *page = folio_page(folio, index);
> +       /* Store the folio in batches of SWAP_CRYPTO_BATCH_SIZE pages. */
> +       for (si = 0, ei = min(si + incr - 1, nr_pages - 1);
> +            ((si < nr_pages) && (ei < nr_pages));
> +            si = ei + 1, ei = min(si + incr - 1, nr_pages - 1)) {
>                 ssize_t bytes;
>
> -               bytes = zswap_store_page(page, objcg, pool);
> +               bytes = zswap_store_pages(folio, si, ei, objcg, pool);
>                 if (bytes < 0)
>                         goto put_pool;
>                 compressed_bytes += bytes;
> @@ -1565,9 +1585,9 @@ bool zswap_store(struct folio *folio)
>                 struct zswap_entry *entry;
>                 struct xarray *tree;
>
> -               for (index = 0; index < nr_pages; ++index) {
> -                       tree = swap_zswap_tree(swp_entry(type, offset + index));
> -                       entry = xa_erase(tree, offset + index);
> +               for (si = 0; si < nr_pages; ++si) {
> +                       tree = swap_zswap_tree(swp_entry(type, offset + si));
> +                       entry = xa_erase(tree, offset + si);
>                         if (entry)
>                                 zswap_entry_free(entry);
>                 }
> --
> 2.27.0
>