Re: [PATCH] mm/zswap: reverse zswap_entry tree/refcount relationship

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello Dan,

On Sat, Nov 23, 2013 at 6:10 AM, Dan Streetman <ddstreet@xxxxxxxx> wrote:
> Currently, zswap_entry_put removes the entry from its tree if
> the resulting refcount is 0.  Several places in code put an
> entry's initial reference, but they also must remove the entry
> from its tree first, which makes the tree removal in zswap_entry_put
> redundant.
>
> I believe this has the refcount model backwards - the initial
> refcount reference shouldn't be managed by multiple different places
> in code, and the put function shouldn't be removing the entry
> from the tree.  I think the correct model is for the tree to be
> the owner of the initial entry reference.  This way, the only time
> any code needs to put the entry is if it's also done a get previously.
> The various places in code that remove the entry from the tree simply
> do that, and the zswap_rb_erase function does the put of the initial
> reference.
>
> This patch moves the initial referencing completely into the tree
> functions - zswap_rb_insert gets the entry, while zswap_rb_erase
> puts the entry.  The zswap_entry_get/put functions are still available
> for any code that needs to use an entry outside of the tree lock.
> Also, the zswap_entry_find_get function is renamed to zswap_rb_search_get
> since the function behavior and return value is closer to zswap_rb_search
> than zswap_entry_get.  All code that previously removed the entry from
> the tree and put it now only remove the entry from the tree.
>
> The comment headers for most of the tree insert/search/erase functions
> and the get/put functions are updated to clarify if the tree lock
> needs to be held as well as when the caller needs to get/put an
> entry (i.e. iff the caller is using the entry outside the tree lock).

I do not like this patch idea, It breaks the zswap_rb_xxx() purity.
I think zswap_rb_xxx() should only focus on rbtree operations.

The current code might be redundant, but its logic is clear.
So it is not essential need to be changed.

If I miss something, please let me know.

Regards,

> Signed-off-by: Dan Streetman <ddstreet@xxxxxxxx>
> ---
>
> This patch requires the writethrough patch to have been applied, but
> the patch idea doesn't require the writethrough patch.
>
>  mm/zswap.c | 130 ++++++++++++++++++++++++++++++++++---------------------------
>  1 file changed, 72 insertions(+), 58 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index fc35a7a..8c27eb2 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -215,7 +215,7 @@ static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
>         entry = kmem_cache_alloc(zswap_entry_cache, gfp);
>         if (!entry)
>                 return NULL;
> -       entry->refcount = 1;
> +       entry->refcount = 0;
>         RB_CLEAR_NODE(&entry->rbnode);
>         return entry;
>  }
> @@ -228,9 +228,51 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
>  /*********************************
>  * rbtree functions
>  **********************************/
> -static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
> +
> +/*
> + * Carries out the common pattern of freeing and entry's zsmalloc allocation,
> + * freeing the entry itself, and decrementing the number of stored pages.
> + */
> +static void zswap_free_entry(struct zswap_tree *tree,
> +                       struct zswap_entry *entry)
> +{
> +       zbud_free(tree->pool, entry->handle);
> +       zswap_entry_cache_free(entry);
> +       atomic_dec(&zswap_stored_pages);
> +       zswap_pool_pages = zbud_get_pool_size(tree->pool);
> +}
> +
> +/* caller must hold the tree lock
> + * this must be used if the entry will be used outside
> + * the tree lock
> + */
> +static void zswap_entry_get(struct zswap_entry *entry)
> +{
> +       entry->refcount++;
> +}
> +
> +/* caller must hold the tree lock
> +* remove from the tree and free it, if nobody reference the entry
> +*/
> +static void zswap_entry_put(struct zswap_tree *tree,
> +                       struct zswap_entry *entry)
> +{
> +       int refcount = --entry->refcount;
> +
> +       BUG_ON(refcount < 0);
> +       if (refcount == 0)
> +               zswap_free_entry(tree, entry);
> +}
> +
> +/* caller much hold the tree lock
> + * This will find the entry for the offset, and return it
> + * If no entry is found, NULL is returned
> + * If the entry will be used outside the tree lock,
> + * then zswap_rb_search_get should be used instead
> + */
> +static struct zswap_entry *zswap_rb_search(struct zswap_tree *tree, pgoff_t offset)
>  {
> -       struct rb_node *node = root->rb_node;
> +       struct rb_node *node = tree->rbroot.rb_node;
>         struct zswap_entry *entry;
>
>         while (node) {
> @@ -246,13 +288,14 @@ static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
>  }
>
>  /*
> + * caller must hold the tree lock
>   * In the case that a entry with the same offset is found, a pointer to
>   * the existing entry is stored in dupentry and the function returns -EEXIST
>   */
> -static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
> +static int zswap_rb_insert(struct zswap_tree *tree, struct zswap_entry *entry,
>                         struct zswap_entry **dupentry)
>  {
> -       struct rb_node **link = &root->rb_node, *parent = NULL;
> +       struct rb_node **link = &tree->rbroot.rb_node, *parent = NULL;
>         struct zswap_entry *myentry;
>
>         while (*link) {
> @@ -267,60 +310,38 @@ static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
>                         return -EEXIST;
>                 }
>         }
> +       zswap_entry_get(entry);
>         rb_link_node(&entry->rbnode, parent, link);
> -       rb_insert_color(&entry->rbnode, root);
> +       rb_insert_color(&entry->rbnode, &tree->rbroot);
>         return 0;
>  }
>
> -static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
> +
> +/* caller must hold the tree lock
> + * after calling, the entry may have been freed,
> + * and so should no longer be used
> + */
> +static void zswap_rb_erase(struct zswap_tree *tree, struct zswap_entry *entry)
>  {
>         if (!RB_EMPTY_NODE(&entry->rbnode)) {
> -               rb_erase(&entry->rbnode, root);
> +               rb_erase(&entry->rbnode, &tree->rbroot);
>                 RB_CLEAR_NODE(&entry->rbnode);
> +               zswap_entry_put(tree, entry);
>         }
>  }
>
> -/*
> - * Carries out the common pattern of freeing and entry's zsmalloc allocation,
> - * freeing the entry itself, and decrementing the number of stored pages.
> - */
> -static void zswap_free_entry(struct zswap_tree *tree,
> -                       struct zswap_entry *entry)
> -{
> -       zbud_free(tree->pool, entry->handle);
> -       zswap_entry_cache_free(entry);
> -       atomic_dec(&zswap_stored_pages);
> -       zswap_pool_pages = zbud_get_pool_size(tree->pool);
> -}
> -
> -/* caller must hold the tree lock */
> -static void zswap_entry_get(struct zswap_entry *entry)
> -{
> -       entry->refcount++;
> -}
> -
>  /* caller must hold the tree lock
> -* remove from the tree and free it, if nobody reference the entry
> -*/
> -static void zswap_entry_put(struct zswap_tree *tree,
> -                       struct zswap_entry *entry)
> -{
> -       int refcount = --entry->refcount;
> -
> -       BUG_ON(refcount < 0);
> -       if (refcount == 0) {
> -               zswap_rb_erase(&tree->rbroot, entry);
> -               zswap_free_entry(tree, entry);
> -       }
> -}
> -
> -/* caller must hold the tree lock */
> -static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
> + * this is the same as zswap_rb_search but also gets
> + * the entry before returning it (if found).  This
> + * (or zswap_entry_get) must be used if the entry will be
> + * used outside the tree lock
> + */
> +static struct zswap_entry *zswap_rb_search_get(struct zswap_tree *tree,
>                                 pgoff_t offset)
>  {
>         struct zswap_entry *entry = NULL;
>
> -       entry = zswap_rb_search(root, offset);
> +       entry = zswap_rb_search(tree, offset);
>         if (entry)
>                 zswap_entry_get(entry);
>
> @@ -435,7 +456,7 @@ static int zswap_evict_entry(struct zbud_pool *pool, unsigned long handle)
>
>         /* find zswap entry */
>         spin_lock(&tree->lock);
> -       entry = zswap_rb_search(&tree->rbroot, offset);
> +       entry = zswap_rb_search(tree, offset);
>         if (!entry) {
>                 /* entry was invalidated */
>                 spin_unlock(&tree->lock);
> @@ -444,10 +465,7 @@ static int zswap_evict_entry(struct zbud_pool *pool, unsigned long handle)
>         BUG_ON(offset != entry->offset);
>
>         /* remove from rbtree */
> -       zswap_rb_erase(&tree->rbroot, entry);
> -
> -       /* drop initial reference */
> -       zswap_entry_put(tree, entry);
> +       zswap_rb_erase(tree, entry);
>
>         zswap_evicted_pages++;
>
> @@ -532,12 +550,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>         /* map */
>         spin_lock(&tree->lock);
>         do {
> -               ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
> +               ret = zswap_rb_insert(tree, entry, &dupentry);
>                 if (ret == -EEXIST) {
>                         zswap_duplicate_entry++;
>                         /* remove from rbtree */
> -                       zswap_rb_erase(&tree->rbroot, dupentry);
> -                       zswap_entry_put(tree, dupentry);
> +                       zswap_rb_erase(tree, dupentry);
>                 }
>         } while (ret == -EEXIST);
>         spin_unlock(&tree->lock);
> @@ -570,7 +587,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>
>         /* find */
>         spin_lock(&tree->lock);
> -       entry = zswap_entry_find_get(&tree->rbroot, offset);
> +       entry = zswap_rb_search_get(tree, offset);
>         if (!entry) {
>                 /* entry was evicted */
>                 spin_unlock(&tree->lock);
> @@ -604,7 +621,7 @@ static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
>
>         /* find */
>         spin_lock(&tree->lock);
> -       entry = zswap_rb_search(&tree->rbroot, offset);
> +       entry = zswap_rb_search(tree, offset);
>         if (!entry) {
>                 /* entry was evicted */
>                 spin_unlock(&tree->lock);
> @@ -612,10 +629,7 @@ static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
>         }
>
>         /* remove from rbtree */
> -       zswap_rb_erase(&tree->rbroot, entry);
> -
> -       /* drop the initial reference from entry creation */
> -       zswap_entry_put(tree, entry);
> +       zswap_rb_erase(tree, entry);
>
>         spin_unlock(&tree->lock);
>  }
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]