When the "dentry-dir-max" sysctl parameter is set, it will enable the checking of dentry count in the parent directory when a negative dentry is being retained. If the count exceeds the given limit, it will schedule a work function to scan the children of that parent directory to find negative dentries to be reclaimed. Positive dentries will not be touched. Signed-off-by: Waiman Long <longman@xxxxxxxxxx> --- fs/dcache.c | 207 +++++++++++++++++++++++++++++++++++++++++ include/linux/dcache.h | 2 + 2 files changed, 209 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index 8f3ac31a582b..01c6d7277244 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -133,6 +133,11 @@ static DEFINE_PER_CPU(long, nr_dentry_negative); int dcache_dentry_dir_max_sysctl __read_mostly; EXPORT_SYMBOL_GPL(dcache_dentry_dir_max_sysctl); +static LLIST_HEAD(negative_reclaim_list); +static void negative_reclaim_check(struct dentry *parent); +static void negative_reclaim_workfn(struct work_struct *work); +static DECLARE_WORK(negative_reclaim_work, negative_reclaim_workfn); + #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) /* @@ -869,7 +874,20 @@ void dput(struct dentry *dentry) rcu_read_unlock(); if (likely(retain_dentry(dentry))) { + struct dentry *neg_parent = NULL; + + if (d_is_negative(dentry)) { + neg_parent = dentry->d_parent; + rcu_read_lock(); + } spin_unlock(&dentry->d_lock); + + /* + * Negative dentry reclaim check is only done when + * a negative dentry is being put into a LRU list. + */ + if (neg_parent) + negative_reclaim_check(neg_parent); return; } @@ -1261,6 +1279,195 @@ void shrink_dcache_sb(struct super_block *sb) } EXPORT_SYMBOL(shrink_dcache_sb); +/* + * Return true if reclaiming negative dentry can happen. + */ +static inline bool can_reclaim_dentry(unsigned int flags) +{ + return !(flags & (DCACHE_SHRINK_LIST | DCACHE_GENOCIDE | + DCACHE_DENTRY_KILLED)); +} + +struct reclaim_dentry +{ + struct llist_node reclaim_node; + struct dentry *parent_dir; +}; + +/* + * Reclaim excess negative dentries in a directory + */ +static void reclaim_negative_dentry(struct dentry *parent, + struct list_head *dispose) +{ + struct dentry *child; + int limit = READ_ONCE(dcache_dentry_dir_max_sysctl); + int cnt; + + lockdep_assert_held(&parent->d_lock); + + cnt = parent->d_nchildren; + + /* + * Compute # of negative dentries to be reclaimed + * An extra 1/8 of dcache_dentry_dir_max_sysctl is added. + */ + if (cnt <= limit) + return; + cnt -= limit; + cnt += (limit >> 3); + + /* + * The d_subdirs is treated like a kind of LRU where + * non-negative dentries are skipped. Negative dentries + * with DCACHE_REFERENCED bit set are also skipped but + * with DCACHE_REFERENCED cleared. + */ + list_for_each_entry(child, &parent->d_subdirs, d_child) { + /* + * This check is racy and so it may not be accurate. + */ + if (!d_is_negative(child)) + continue; + + if (!spin_trylock(&child->d_lock)) + continue; + + /* + * Only reclaim zero-refcnt negative dentries in the + * LRU, but not in shrink list. + */ + if (can_reclaim_dentry(child->d_flags) && + d_is_negative(child) && d_in_lru(child) && + !child->d_lockref.count) { + if (child->d_flags & DCACHE_REFERENCED) { + child->d_flags &= ~DCACHE_REFERENCED; + } else { + cnt--; + d_lru_del(child); + d_shrink_add(child, dispose); + } + } + spin_unlock(&child->d_lock); + if (!cnt) { + child = list_next_entry(child, d_child); + break; + } + } + + if (&child->d_child != &parent->d_subdirs) { + /* + * Split out the childs from the head up to just + * before the current entry into a separate list and + * then splice it to the end of the child list so + * that the unscanned entries will be in the front. + * This simulates LRU. + */ + struct list_head list; + + list_cut_before(&list, &parent->d_subdirs, + &child->d_child); + list_splice_tail(&list, &parent->d_subdirs); + } +} + +/* + * Excess negative dentry reclaim work function. + */ +static void negative_reclaim_workfn(struct work_struct *work) +{ + struct llist_node *nodes, *next; + struct dentry *parent; + struct reclaim_dentry *dentry_node; + + /* + * Collect excess negative dentries in dispose list & shrink them. + */ + for (nodes = llist_del_all(&negative_reclaim_list); + nodes; nodes = next) { + LIST_HEAD(dispose); + + next = llist_next(nodes); + dentry_node = container_of(nodes, struct reclaim_dentry, + reclaim_node); + parent = dentry_node->parent_dir; + spin_lock(&parent->d_lock); + + if (d_is_dir(parent) && + can_reclaim_dentry(parent->d_flags) && + (parent->d_flags & DCACHE_RECLAIMING)) + reclaim_negative_dentry(parent, &dispose); + + if (!list_empty(&dispose)) { + spin_unlock(&parent->d_lock); + shrink_dentry_list(&dispose); + spin_lock(&parent->d_lock); + } + + parent->d_flags &= ~DCACHE_RECLAIMING; + spin_unlock(&parent->d_lock); + dput(parent); + kfree(dentry_node); + cond_resched(); + } +} + +/* + * Check the parent to see if it has too many negative dentries and queue + * it up for the negative dentry reclaim work function to handle it. + */ +static void negative_reclaim_check(struct dentry *parent) + __releases(rcu) +{ + int limit = dcache_dentry_dir_max_sysctl; + struct reclaim_dentry *dentry_node; + + if (!limit) + goto rcu_unlock_out; + + /* + * These checks are racy before spin_lock(). + */ + if (!can_reclaim_dentry(parent->d_flags) || + (parent->d_flags & DCACHE_RECLAIMING) || + (READ_ONCE(parent->d_nchildren) <= limit)) + goto rcu_unlock_out; + + spin_lock(&parent->d_lock); + if (!can_reclaim_dentry(parent->d_flags) || + (parent->d_flags & DCACHE_RECLAIMING) || + (READ_ONCE(parent->d_nchildren) <= limit)) + goto unlock_out; + + if (!d_is_dir(parent)) + goto unlock_out; + + dentry_node = kzalloc(sizeof(*dentry_node), GFP_NOWAIT); + if (!dentry_node) + goto unlock_out; + + rcu_read_unlock(); + __dget_dlock(parent); + dentry_node->parent_dir = parent; + parent->d_flags |= DCACHE_RECLAIMING; + spin_unlock(&parent->d_lock); + + /* + * Only call schedule_work() if negative_reclaim_list is previously + * empty. Otherwise, schedule_work() had been called but the workfn + * workfn hasn't retrieved the list yet. + */ + if (llist_add(&dentry_node->reclaim_node, &negative_reclaim_list)) + schedule_work(&negative_reclaim_work); + return; + +unlock_out: + spin_unlock(&parent->d_lock); +rcu_unlock_out: + rcu_read_unlock(); + return; +} + /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e9e66eb50d1a..f14d72738903 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -13,6 +13,7 @@ #include <linux/lockref.h> #include <linux/stringhash.h> #include <linux/wait.h> +#include <linux/llist.h> struct path; struct vfsmount; @@ -214,6 +215,7 @@ struct dentry_operations { #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ #define DCACHE_ENCRYPTED_NAME 0x02000000 /* Encrypted name (dir key was unavailable) */ #define DCACHE_OP_REAL 0x04000000 +#define DCACHE_RECLAIMING 0x08000000 /* Reclaiming negative dentries */ #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ #define DCACHE_DENTRY_CURSOR 0x20000000 -- 2.18.1