On 5/8/20 8:23 AM, Konstantin Khlebnikov wrote:
For disk filesystems result of every negative lookup is cached, content of
directories is usually cached too. Production of negative dentries isn't
limited with disk speed. It's really easy to generate millions of them if
system has enough memory. Negative dentries are linked into siblings list
along with normal positive dentries. Some operations walks dcache tree but
looks only for positive dentries: most important is fsnotify/inotify.
This patch moves negative dentries to the end of list at final dput() and
marks with flag which tells that all following dentries are negative too.
Reverse operation is required before instantiating negative dentry.
Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
---
fs/dcache.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++--
include/linux/dcache.h | 6 +++++
2 files changed, 66 insertions(+), 3 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 386f97eaf2ff..743255773cc7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -632,6 +632,48 @@ static inline struct dentry *lock_parent(struct dentry *dentry)
return __lock_parent(dentry);
}
+/*
+ * Move cached negative dentry to the tail of parent->d_subdirs.
+ * This lets walkers skip them all together at first sight.
+ * Must be called at dput of negative dentry.
+ */
+static void sweep_negative(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ if (!d_is_tail_negative(dentry)) {
+ parent = lock_parent(dentry);
+ if (!parent)
+ return;
+
+ if (!d_count(dentry) && d_is_negative(dentry) &&
+ !d_is_tail_negative(dentry)) {
+ dentry->d_flags |= DCACHE_TAIL_NEGATIVE;
+ list_move_tail(&dentry->d_child, &parent->d_subdirs);
+ }
+
+ spin_unlock(&parent->d_lock);
+ }
+}
+
+/*
+ * Undo sweep_negative() and move to the head of parent->d_subdirs.
+ * Must be called before converting negative dentry into positive.
+ */
+static void recycle_negative(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ spin_lock(&dentry->d_lock);
+ parent = lock_parent(dentry);
+ if (parent) {
+ list_move(&dentry->d_child, &parent->d_subdirs);
+ spin_unlock(&parent->d_lock);
+ }
+ dentry->d_flags &= ~DCACHE_TAIL_NEGATIVE;
+ spin_unlock(&dentry->d_lock);
+}
+
The name sweep_negative and recycle_negative are not very descriptive of
what the function is doing (especially the later one). I am not good at
naming, but some kind of naming scheme that can clearly show one is
opposite of another will be better.
static inline bool retain_dentry(struct dentry *dentry)
{
WARN_ON(d_in_lookup(dentry));
@@ -703,6 +745,8 @@ static struct dentry *dentry_kill(struct dentry *dentry)
spin_unlock(&inode->i_lock);
if (parent)
spin_unlock(&parent->d_lock);
+ if (d_is_negative(dentry))
+ sweep_negative(dentry);
We can potentially save an unneeded lock/unlock pair by moving it up
before "if (parent)" and pass in a flag to indicate if the parent lock
is being held.
spin_unlock(&dentry->d_lock);
return NULL;
}
@@ -718,7 +762,7 @@ static struct dentry *dentry_kill(struct dentry *dentry)
static inline bool fast_dput(struct dentry *dentry)
{
int ret;
- unsigned int d_flags;
+ unsigned int d_flags, required;
/*
* If we have a d_op->d_delete() operation, we sould not
@@ -766,6 +810,8 @@ static inline bool fast_dput(struct dentry *dentry)
* a 'delete' op, and it's referenced and already on
* the LRU list.
*
+ * Cached negative dentry must be swept to the tail.
+ *
* NOTE! Since we aren't locked, these values are
* not "stable". However, it is sufficient that at
* some point after we dropped the reference the
@@ -777,10 +823,15 @@ static inline bool fast_dput(struct dentry *dentry)
*/
smp_rmb();
d_flags = READ_ONCE(dentry->d_flags);
- d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
+
+ required = DCACHE_REFERENCED | DCACHE_LRU_LIST |
+ (d_flags_negative(d_flags) ? DCACHE_TAIL_NEGATIVE : 0);
+
+ d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST |
+ DCACHE_DISCONNECTED | DCACHE_TAIL_NEGATIVE;
/* Nothing to do? Dropping the reference was all we needed? */
- if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
+ if (d_flags == required && !d_unhashed(dentry))
return true;
/*
@@ -852,6 +903,8 @@ void dput(struct dentry *dentry)
rcu_read_unlock();
if (likely(retain_dentry(dentry))) {
+ if (d_is_negative(dentry))
+ sweep_negative(dentry);
spin_unlock(&dentry->d_lock);
return;
}
@@ -1951,6 +2004,8 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode) {
+ if (d_is_tail_negative(entry))
+ recycle_negative(entry);
Will it better to recycle_negative() inside __d_instantiate() under the
d_lock. That reduces the number of lock/unlock you need to do and
eliminate the need to change d_instantiate_new().
Cheers,
Longman