On Fri, Dec 20, 2024 at 10:31 PM Kanchana P Sridhar <kanchana.p.sridhar@xxxxxxxxx> wrote: > > This patch introduces zswap_store_folio() that implements all the computes > done earlier in zswap_store_page() for a single-page, for all the pages in > a folio. This allows us to move the loop over the folio's pages from > zswap_store() to zswap_store_folio(). > > A distinct zswap_compress_folio() is also added, that simply calls > zswap_compress() for each page in the folio it is called with. The git diff looks funky, it may make things clearer to introduce zswap_compress_folio() in a separate patch. > > zswap_store_folio() starts by allocating all zswap entries required to > store the folio. Next, it calls zswap_compress_folio() and finally, adds > the entries to the xarray and LRU. > > The error handling and cleanup required for all failure scenarios that can > occur while storing a folio in zswap is now consolidated to a > "store_folio_failed" label in zswap_store_folio(). > > These changes facilitate developing support for compress batching in > zswap_store_folio(). > > Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@xxxxxxxxx> > --- > mm/zswap.c | 183 +++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 116 insertions(+), 67 deletions(-) > > diff --git a/mm/zswap.c b/mm/zswap.c > index 99cd78891fd0..1be0f1807bfc 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -1467,77 +1467,129 @@ static void shrink_worker(struct work_struct *w) > * main API > **********************************/ > > -static ssize_t zswap_store_page(struct page *page, > - struct obj_cgroup *objcg, > - struct zswap_pool *pool) > +static bool zswap_compress_folio(struct folio *folio, > + struct zswap_entry *entries[], > + struct zswap_pool *pool) > { > - swp_entry_t page_swpentry = page_swap_entry(page); > - struct zswap_entry *entry, *old; > + long index, nr_pages = folio_nr_pages(folio); > > - /* allocate entry */ > - entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page)); > - if (!entry) { > - zswap_reject_kmemcache_fail++; > - return -EINVAL; > + for (index = 0; index < nr_pages; ++index) { > + struct page *page = folio_page(folio, index); > + > + if (!zswap_compress(page, entries[index], pool)) > + return false; > } > > - if (!zswap_compress(page, entry, pool)) > - goto compress_failed; > + return true; > +} > > - old = xa_store(swap_zswap_tree(page_swpentry), > - swp_offset(page_swpentry), > - entry, GFP_KERNEL); > - if (xa_is_err(old)) { > - int err = xa_err(old); > +/* > + * Store all pages in a folio. > + * > + * The error handling from all failure points is consolidated to the > + * "store_folio_failed" label, based on the initialization of the zswap entries' > + * handles to ERR_PTR(-EINVAL) at allocation time, and the fact that the > + * entry's handle is subsequently modified only upon a successful zpool_malloc() > + * after the page is compressed. > + */ > +static ssize_t zswap_store_folio(struct folio *folio, > + struct obj_cgroup *objcg, > + struct zswap_pool *pool) > +{ > + long index, nr_pages = folio_nr_pages(folio); > + struct zswap_entry **entries = NULL; > + int node_id = folio_nid(folio); > + size_t compressed_bytes = 0; > > - WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err); > - zswap_reject_alloc_fail++; > - goto store_failed; > + entries = kmalloc(nr_pages * sizeof(*entries), GFP_KERNEL); We can probably use kcalloc() here. > + if (!entries) > + return -ENOMEM; > + > + /* allocate entries */ This comment can be dropped. > + for (index = 0; index < nr_pages; ++index) { > + entries[index] = zswap_entry_cache_alloc(GFP_KERNEL, node_id); > + > + if (!entries[index]) { > + zswap_reject_kmemcache_fail++; > + nr_pages = index; > + goto store_folio_failed; > + } > + > + entries[index]->handle = (unsigned long)ERR_PTR(-EINVAL); > } > > - /* > - * We may have had an existing entry that became stale when > - * the folio was redirtied and now the new version is being > - * swapped out. Get rid of the old. > - */ > - if (old) > - zswap_entry_free(old); > + if (!zswap_compress_folio(folio, entries, pool)) > + goto store_folio_failed; > > - /* > - * The entry is successfully compressed and stored in the tree, there is > - * no further possibility of failure. Grab refs to the pool and objcg. > - * These refs will be dropped by zswap_entry_free() when the entry is > - * removed from the tree. > - */ > - zswap_pool_get(pool); > - if (objcg) > - obj_cgroup_get(objcg); > + for (index = 0; index < nr_pages; ++index) { > + swp_entry_t page_swpentry = page_swap_entry(folio_page(folio, index)); > + struct zswap_entry *old, *entry = entries[index]; > + > + old = xa_store(swap_zswap_tree(page_swpentry), > + swp_offset(page_swpentry), > + entry, GFP_KERNEL); > + if (xa_is_err(old)) { > + int err = xa_err(old); > + > + WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err); > + zswap_reject_alloc_fail++; > + goto store_folio_failed; > + } > > - /* > - * We finish initializing the entry while it's already in xarray. > - * This is safe because: > - * > - * 1. Concurrent stores and invalidations are excluded by folio lock. > - * > - * 2. Writeback is excluded by the entry not being on the LRU yet. > - * The publishing order matters to prevent writeback from seeing > - * an incoherent entry. > - */ > - entry->pool = pool; > - entry->swpentry = page_swpentry; > - entry->objcg = objcg; > - entry->referenced = true; > - if (entry->length) { > - INIT_LIST_HEAD(&entry->lru); > - zswap_lru_add(&zswap_list_lru, entry); > + /* > + * We may have had an existing entry that became stale when > + * the folio was redirtied and now the new version is being > + * swapped out. Get rid of the old. > + */ > + if (old) > + zswap_entry_free(old); > + > + /* > + * The entry is successfully compressed and stored in the tree, there is > + * no further possibility of failure. Grab refs to the pool and objcg. > + * These refs will be dropped by zswap_entry_free() when the entry is > + * removed from the tree. > + */ > + zswap_pool_get(pool); > + if (objcg) > + obj_cgroup_get(objcg); > + > + /* > + * We finish initializing the entry while it's already in xarray. > + * This is safe because: > + * > + * 1. Concurrent stores and invalidations are excluded by folio lock. > + * > + * 2. Writeback is excluded by the entry not being on the LRU yet. > + * The publishing order matters to prevent writeback from seeing > + * an incoherent entry. > + */ > + entry->pool = pool; > + entry->swpentry = page_swpentry; > + entry->objcg = objcg; > + entry->referenced = true; > + if (entry->length) { > + INIT_LIST_HEAD(&entry->lru); > + zswap_lru_add(&zswap_list_lru, entry); > + } > + > + compressed_bytes += entry->length; > } > > - return entry->length; > + kfree(entries); > + > + return compressed_bytes; > + > +store_folio_failed: > + for (index = 0; index < nr_pages; ++index) { > + if (!IS_ERR_VALUE(entries[index]->handle)) > + zpool_free(pool->zpool, entries[index]->handle); > + > + zswap_entry_cache_free(entries[index]); > + } If there is a failure in xa_store() halfway through the entries, this loop will free all the compressed objects and entries. But, some of the entries are already in the xarray, and zswap_store() will try to free them again. This seems like a bug, or did I miss something here? > + > + kfree(entries); > > -store_failed: > - zpool_free(pool->zpool, entry->handle); > -compress_failed: > - zswap_entry_cache_free(entry); > return -EINVAL; > } > > @@ -1549,8 +1601,8 @@ bool zswap_store(struct folio *folio) > struct mem_cgroup *memcg = NULL; > struct zswap_pool *pool; > size_t compressed_bytes = 0; > + ssize_t bytes; > bool ret = false; > - long index; > > VM_WARN_ON_ONCE(!folio_test_locked(folio)); > VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); > @@ -1584,15 +1636,11 @@ bool zswap_store(struct folio *folio) > mem_cgroup_put(memcg); > } > > - for (index = 0; index < nr_pages; ++index) { > - struct page *page = folio_page(folio, index); > - ssize_t bytes; > + bytes = zswap_store_folio(folio, objcg, pool); > + if (bytes < 0) > + goto put_pool; > > - bytes = zswap_store_page(page, objcg, pool); > - if (bytes < 0) > - goto put_pool; > - compressed_bytes += bytes; > - } > + compressed_bytes = bytes; What's the point of having both compressed_bytes and bytes now? > > if (objcg) { > obj_cgroup_charge_zswap(objcg, compressed_bytes); > @@ -1622,6 +1670,7 @@ bool zswap_store(struct folio *folio) > pgoff_t offset = swp_offset(swp); > struct zswap_entry *entry; > struct xarray *tree; > + long index; > > for (index = 0; index < nr_pages; ++index) { > tree = swap_zswap_tree(swp_entry(type, offset + index)); > -- > 2.27.0 >