Re: [RFC PATCH 1/7] mm: zswap: add pool shrinking mechanism

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jun 5, 2023 at 5:29 PM Johannes Weiner <hannes@xxxxxxxxxxx> wrote:
>
> Hi Domenico,
>
> On Mon, Jun 05, 2023 at 10:54:13AM +0200, Domenico Cerasuolo wrote:
> > @@ -364,6 +369,9 @@ static void zswap_free_entry(struct zswap_entry *entry)
> >       if (!entry->length)
> >               atomic_dec(&zswap_same_filled_pages);
> >       else {
> > +             spin_lock(&entry->pool->lock);
> > +             list_del_init(&entry->lru);
> > +             spin_unlock(&entry->pool->lock);
>
> This should be list_del(), as the entry is freed right after this and
> the list isn't reused anymore.
>
> The slab memory is recycled, but the allocation site (commented on
> below) doesn't need the list initialized.
>
> However, I think it also needs to check !zpool_evictable(). If
> alloc/store doesn't do the list_add(), this would be a double delete.
>

Thanks Johannes, I'm addressing all of your comments in the series in a V2.

> > @@ -584,14 +592,65 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> >       return NULL;
> >  }
> >
> > +static int zswap_shrink(struct zswap_pool *pool)
> > +{
> > +     struct zswap_entry *lru_entry, *tree_entry = NULL;
> > +     struct zswap_header *zhdr;
> > +     struct zswap_tree *tree;
> > +     swp_entry_t swpentry;
> > +     int ret;
> > +
> > +     /* get a reclaimable entry from LRU */
> > +     spin_lock(&pool->lock);
> > +     if (list_empty(&pool->lru)) {
> > +             spin_unlock(&pool->lock);
> > +             return -EINVAL;
> > +     }
> > +     lru_entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> > +     list_del_init(&lru_entry->lru);
> > +     zhdr = zpool_map_handle(pool->zpool, lru_entry->handle, ZPOOL_MM_RO);
> > +     tree = zswap_trees[swp_type(zhdr->swpentry)];
> > +     zpool_unmap_handle(pool->zpool, lru_entry->handle);
> > +     swpentry = zhdr->swpentry;
> > +     spin_unlock(&pool->lock);
>
> Once the pool lock is dropped, the lru_entry might get freed. But the
> swpentry is copied to the stack, and lru_entry isn't deref'd again
> until the entry is verified to still be alive in the tree.
>
> This could use a comment.
>
> > +     /* hold a reference from tree so it won't be freed during writeback */
> > +     spin_lock(&tree->lock);
> > +     tree_entry = zswap_entry_find_get(&tree->rbroot, swp_offset(swpentry));
> > +     if (tree_entry != lru_entry) {
> > +             if (tree_entry)
> > +                     zswap_entry_put(tree, tree_entry);
> > +             spin_unlock(&tree->lock);
> > +             return -EAGAIN;
> > +     }
> > +     spin_unlock(&tree->lock);
>
> It's pretty outrageous how much simpler this is compared to the
> <backend>_reclaim_page() functions! The backends have to jump through
> a lot of hoops to serialize against freeing, whereas zswap can simply
> hold a reference. This is clearly a much better design.
>
> > +     ret = zswap_writeback_entry(pool->zpool, lru_entry->handle);
> > +
> > +     spin_lock(&tree->lock);
> > +     if (ret) {
> > +             spin_lock(&pool->lock);
> > +             list_move(&lru_entry->lru, &pool->lru);
> > +             spin_unlock(&pool->lock);
> > +     }
> > +     zswap_entry_put(tree, tree_entry);
> > +     spin_unlock(&tree->lock);
> > +
> > +     return ret ? -EAGAIN : 0;
> > +}
> > +
> >  static void shrink_worker(struct work_struct *w)
> >  {
> >       struct zswap_pool *pool = container_of(w, typeof(*pool),
> >                                               shrink_work);
> >       int ret, failures = 0;
> >
> > +     /* zpool_evictable will be removed once all 3 backends have migrated*/
>
> Missing space between text and */
>
> >       do {
> > -             ret = zpool_shrink(pool->zpool, 1, NULL);
> > +             if (zpool_evictable(pool->zpool))
> > +                     ret = zpool_shrink(pool->zpool, 1, NULL);
> > +             else
> > +                     ret = zswap_shrink(pool);
> >               if (ret) {
> >                       zswap_reject_reclaim_fail++;
> >                       if (ret != -EAGAIN)
> > @@ -655,6 +714,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >        */
> >       kref_init(&pool->kref);
> >       INIT_LIST_HEAD(&pool->list);
> > +     INIT_LIST_HEAD(&pool->lru);
> > +     spin_lock_init(&pool->lock);
> >       INIT_WORK(&pool->shrink_work, shrink_worker);
> >
> >       zswap_pool_debug("created", pool);
> > @@ -1270,7 +1331,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >       }
> >
> >       /* store */
> > -     hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
> > +     hlen = sizeof(zhdr);
> >       gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> >       if (zpool_malloc_support_movable(entry->pool->zpool))
> >               gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> > @@ -1313,6 +1374,13 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
> >                       zswap_entry_put(tree, dupentry);
> >               }
> >       } while (ret == -EEXIST);
> > +     INIT_LIST_HEAD(&entry->lru);
>
> The list_add() below initializes the entry, so this shouldn't be
> needed.
>
> > +     /* zpool_evictable will be removed once all 3 backends have migrated*/
> > +     if (entry->length && !zpool_evictable(entry->pool->zpool)) {
> > +             spin_lock(&entry->pool->lock);
> > +             list_add(&entry->lru, &entry->pool->lru);
> > +             spin_unlock(&entry->pool->lock);
> > +     }
> >       spin_unlock(&tree->lock);
> >
> >       /* update stats */





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux