On Thu, Mar 28, 2024 at 1:31 AM Chengming Zhou <chengming.zhou@xxxxxxxxx> wrote: > > On 2024/3/26 07:50, Yosry Ahmed wrote: > > zswap_entry_free() performs four types of cleanups before freeing a > > zswap_entry: > > - Deletes the entry from the LRU. > > - Frees compressed memory. > > - Puts the pool reference. > > - Uncharges the compressed memory and puts the objcg. > > > > zswap_entry_free() always expects a fully initialized entry. Allow > > zswap_entry_free() to handle partially initialized entries by making it > > possible to identify what cleanups are needed as follows: > > - Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when > > the entry is allocated. Points are NULL and length is zero upon > > initialization. > > - Use zswap_entry.length to identify if there is compressed memory to > > free. This is possible now that zero-filled pages are handled > > separately, so a length of zero means we did not successfully compress > > the page. > > - Only initialize entry->objcg after the memory is charged in > > zswap_store(). > > > > With this in place, use zswap_entry_free() in the failure path of > > zswap_store() to cleanup partially initialized entries. This simplifies > > the cleanup code in zswap_store(). While we are at it, rename the > > remaining cleanup labels to more meaningful names. > > Not sure if it's worthwhile to clean up the fail path with the normal path > gets a little verbose. I was on the fence about this, so I thought I would just send it and see what others think. On one hand it makes the initialization more robust because the zswap_entry is always in a clean identifiable state, but on the other hand it adds churn to the normal path as you noticed. Also after removing handling zero-length entries from the failure path it isn't that bad without this patch anyway. So if no one else thinks this is useful, I will drop the patch in the next version. Thanks! > > > > > Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx> > > --- > > mm/zswap.c | 62 ++++++++++++++++++++++++++---------------------------- > > 1 file changed, 30 insertions(+), 32 deletions(-) > > > > diff --git a/mm/zswap.c b/mm/zswap.c > > index 9357328d940af..c50f9df230ca3 100644 > > --- a/mm/zswap.c > > +++ b/mm/zswap.c > > @@ -774,12 +774,13 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) > > **********************************/ > > static struct kmem_cache *zswap_entry_cache; > > > > -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) > > +static struct zswap_entry *zswap_entry_cache_alloc(int nid) > > { > > struct zswap_entry *entry; > > - entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); > > - if (!entry) > > - return NULL; > > + entry = kmem_cache_alloc_node(zswap_entry_cache, > > + GFP_KERNEL | __GFP_ZERO, nid); > > + if (entry) > > + INIT_LIST_HEAD(&entry->lru); > > return entry; > > } > > > > @@ -795,9 +796,12 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry) > > > > static void zswap_entry_free(struct zswap_entry *entry) > > { > > - zswap_lru_del(&zswap_list_lru, entry); > > - zpool_free(zswap_find_zpool(entry), entry->handle); > > - zswap_pool_put(entry->pool); > > + if (!list_empty(&entry->lru)) > > + zswap_lru_del(&zswap_list_lru, entry); > > + if (entry->length) > > + zpool_free(zswap_find_zpool(entry), entry->handle); > > + if (entry->pool) > > + zswap_pool_put(entry->pool); > > if (entry->objcg) { > > obj_cgroup_uncharge_zswap(entry->objcg, entry->length); > > obj_cgroup_put(entry->objcg); > > @@ -1447,7 +1451,7 @@ bool zswap_store(struct folio *folio) > > return false; > > > > if (!zswap_enabled) > > - goto check_old; > > + goto erase_old; > > > > /* Check cgroup limits */ > > objcg = get_obj_cgroup_from_folio(folio); > > @@ -1455,54 +1459,52 @@ bool zswap_store(struct folio *folio) > > memcg = get_mem_cgroup_from_objcg(objcg); > > if (shrink_memcg(memcg)) { > > mem_cgroup_put(memcg); > > - goto reject; > > + goto put_objcg; > > } > > mem_cgroup_put(memcg); > > } > > > > if (zswap_is_folio_zero_filled(folio)) { > > if (zswap_store_zero_filled(tree, offset, objcg)) > > - goto reject; > > + goto put_objcg; > > goto stored; > > } > > > > if (!zswap_non_zero_filled_pages_enabled) > > - goto reject; > > + goto put_objcg; > > > > if (!zswap_check_limit()) > > - goto reject; > > + goto put_objcg; > > > > - entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio)); > > + entry = zswap_entry_cache_alloc(folio_nid(folio)); > > if (!entry) { > > zswap_reject_kmemcache_fail++; > > - goto reject; > > + goto put_objcg; > > } > > > > - /* if entry is successfully added, it keeps the reference */ > > entry->pool = zswap_pool_current_get(); > > if (!entry->pool) > > - goto freepage; > > + goto free_entry; > > > > if (objcg) { > > memcg = get_mem_cgroup_from_objcg(objcg); > > if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) { > > mem_cgroup_put(memcg); > > - goto put_pool; > > + goto free_entry; > > } > > mem_cgroup_put(memcg); > > } > > > > if (!zswap_compress(folio, entry)) > > - goto put_pool; > > - > > - entry->swpentry = swp; > > - entry->objcg = objcg; > > + goto free_entry; > > > > if (zswap_tree_store(tree, offset, entry)) > > - goto store_failed; > > + goto free_entry; > > > > - if (objcg) > > + if (objcg) { > > obj_cgroup_charge_zswap(objcg, entry->length); > > + entry->objcg = objcg; > > + } > > > > /* > > * We finish initializing the entry while it's already in xarray. > > @@ -1514,7 +1516,7 @@ bool zswap_store(struct folio *folio) > > * The publishing order matters to prevent writeback from seeing > > * an incoherent entry. > > */ > > - INIT_LIST_HEAD(&entry->lru); > > + entry->swpentry = swp; > > zswap_lru_add(&zswap_list_lru, entry); > > > > stored: > > @@ -1525,17 +1527,13 @@ bool zswap_store(struct folio *folio) > > > > return true; > > > > -store_failed: > > - zpool_free(zswap_find_zpool(entry), entry->handle); > > -put_pool: > > - zswap_pool_put(entry->pool); > > -freepage: > > - zswap_entry_cache_free(entry); > > -reject: > > +free_entry: > > + zswap_entry_free(entry); > > +put_objcg: > > obj_cgroup_put(objcg); > > if (zswap_pool_reached_full) > > queue_work(shrink_wq, &zswap_shrink_work); > > -check_old: > > +erase_old: > > /* > > * If the zswap store fails or zswap is disabled, we must invalidate the > > * possibly stale entry which was previously stored at this offset.