On 22/07/12 12:54PM, Jan Kara wrote: > Add function mb_cache_entry_delete_or_get() to delete mbcache entry if > it is unused and also add a function to wait for entry to become unused > - mb_cache_entry_wait_unused(). We do not share code between the two > deleting function as one of them will go away soon. > > CC: stable@xxxxxxxxxxxxxxx > Fixes: 82939d7999df ("ext4: convert to mbcache2") > Signed-off-by: Jan Kara <jack@xxxxxxx> > --- > fs/mbcache.c | 66 +++++++++++++++++++++++++++++++++++++++-- > include/linux/mbcache.h | 10 ++++++- > 2 files changed, 73 insertions(+), 3 deletions(-) > > diff --git a/fs/mbcache.c b/fs/mbcache.c > index cfc28129fb6f..2010bc80a3f2 100644 > --- a/fs/mbcache.c > +++ b/fs/mbcache.c > @@ -11,7 +11,7 @@ > /* > * Mbcache is a simple key-value store. Keys need not be unique, however > * key-value pairs are expected to be unique (we use this fact in > - * mb_cache_entry_delete()). > + * mb_cache_entry_delete_or_get()). > * > * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. > * Ext4 also uses it for deduplication of xattr values stored in inodes. > @@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry) > } > EXPORT_SYMBOL(__mb_cache_entry_free); > > +/* > + * mb_cache_entry_wait_unused - wait to be the last user of the entry > + * > + * @entry - entry to work on > + * > + * Wait to be the last user of the entry. > + */ > +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) > +{ > + wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3); It's not very intuitive of why we check for refcnt <= 3. A small note at top of this function might be helpful. IIUC, it is because by default when anyone creates an entry we start with a refcnt of 2 (in mb_cache_entry_create. - Now when the user of the entry wants to delete this, it will try and call mb_cache_entry_delete_or_get(). If during this function call it sees that the refcnt is elevated more than 2, that means there is another user of this entry currently active and hence we should wait before we remove this entry from the cache. So it will take an extra refcnt and return. - So then this caller will call mb_cache_entry_wait_unused() for the refcnt to be <= 3, so that the entry can be deleted. Quick qn - So now is the design like, ext4_evict_ea_inode() will be waiting indefinitely until the other user of this mb_cache entry releases the reference right? And that will not happen until, - either the shrinker removes this entry from the cache during which we are checking if the refcnt <= 3, then we call a wakeup event - Or the user removes/deletes the xattr entry Is the above understanding correct? -ritesh > +} > +EXPORT_SYMBOL(mb_cache_entry_wait_unused); > + > static struct mb_cache_entry *__entry_find(struct mb_cache *cache, > struct mb_cache_entry *entry, > u32 key) > @@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, > } > EXPORT_SYMBOL(mb_cache_entry_get); > > -/* mb_cache_entry_delete - remove a cache entry > +/* mb_cache_entry_delete - try to remove a cache entry > * @cache - cache we work with > * @key - key > * @value - value > @@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) > } > EXPORT_SYMBOL(mb_cache_entry_delete); > > +/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users > + * @cache - cache we work with > + * @key - key > + * @value - value > + * > + * Remove entry from cache @cache with key @key and value @value. The removal > + * happens only if the entry is unused. The function returns NULL in case the > + * entry was successfully removed or there's no entry in cache. Otherwise the > + * function grabs reference of the entry that we failed to delete because it > + * still has users and return it. > + */ > +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, > + u32 key, u64 value) > +{ > + struct hlist_bl_node *node; > + struct hlist_bl_head *head; > + struct mb_cache_entry *entry; > + > + head = mb_cache_entry_head(cache, key); > + hlist_bl_lock(head); > + hlist_bl_for_each_entry(entry, node, head, e_hash_list) { > + if (entry->e_key == key && entry->e_value == value) { > + if (atomic_read(&entry->e_refcnt) > 2) { > + atomic_inc(&entry->e_refcnt); > + hlist_bl_unlock(head); > + return entry; > + } > + /* We keep hash list reference to keep entry alive */ > + hlist_bl_del_init(&entry->e_hash_list); > + hlist_bl_unlock(head); > + spin_lock(&cache->c_list_lock); > + if (!list_empty(&entry->e_list)) { > + list_del_init(&entry->e_list); > + if (!WARN_ONCE(cache->c_entry_count == 0, > + "mbcache: attempt to decrement c_entry_count past zero")) > + cache->c_entry_count--; > + atomic_dec(&entry->e_refcnt); > + } > + spin_unlock(&cache->c_list_lock); > + mb_cache_entry_put(cache, entry); > + return NULL; > + } > + } > + hlist_bl_unlock(head); > + > + return NULL; > +} > +EXPORT_SYMBOL(mb_cache_entry_delete_or_get); > + > /* mb_cache_entry_touch - cache entry got used > * @cache - cache the entry belongs to > * @entry - entry that got used > diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h > index 20f1e3ff6013..8eca7f25c432 100644 > --- a/include/linux/mbcache.h > +++ b/include/linux/mbcache.h > @@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache); > int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, > u64 value, bool reusable); > void __mb_cache_entry_free(struct mb_cache_entry *entry); > +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); > static inline int mb_cache_entry_put(struct mb_cache *cache, > struct mb_cache_entry *entry) > { > - if (!atomic_dec_and_test(&entry->e_refcnt)) > + unsigned int cnt = atomic_dec_return(&entry->e_refcnt); > + > + if (cnt > 0) { > + if (cnt <= 3) > + wake_up_var(&entry->e_refcnt); > return 0; > + } > __mb_cache_entry_free(entry); > return 1; > } > > +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, > + u32 key, u64 value); > void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); > struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, > u64 value); > -- > 2.35.3 >