The negative dentry pruning is done on a specific super_block set in the ndblk.prune_sb variable. If the super_block is also being un-mounted concurrently, the content of the super_block may no longer be valid. To protect against such racing condition, a new lock is added to the ndblk structure to synchronize the negative dentry pruning and umount operation. This is a regular spinlock as the pruning operation can be quite time consuming. Signed-off-by: Waiman Long <longman@xxxxxxxxxx> --- fs/dcache.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 3482972..360185e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -141,11 +141,13 @@ struct dentry_stat_t dentry_stat = { static long neg_dentry_nfree_init __read_mostly; /* Free pool initial value */ static struct { raw_spinlock_t nfree_lock; + spinlock_t prune_lock; /* Lock for protecting pruning */ long nfree; /* Negative dentry free pool */ struct super_block *prune_sb; /* Super_block for pruning */ int neg_count, prune_count; /* Pruning counts */ } ndblk ____cacheline_aligned_in_smp; +static void clear_prune_sb_for_umount(struct super_block *sb); static void prune_negative_dentry(struct work_struct *work); static DECLARE_DELAYED_WORK(prune_neg_dentry_work, prune_negative_dentry); @@ -1355,6 +1357,7 @@ void shrink_dcache_sb(struct super_block *sb) { long freed; + clear_prune_sb_for_umount(sb); do { LIST_HEAD(dispose); @@ -1385,7 +1388,8 @@ static enum lru_status dentry_negative_lru_isolate(struct list_head *item, * list. */ if ((ndblk.neg_count >= NEG_PRUNING_SIZE) || - (ndblk.prune_count >= NEG_PRUNING_SIZE)) { + (ndblk.prune_count >= NEG_PRUNING_SIZE) || + !READ_ONCE(ndblk.prune_sb)) { ndblk.prune_count = 0; return LRU_STOP; } @@ -1441,15 +1445,24 @@ static void prune_negative_dentry(struct work_struct *work) { int freed; long nfree; - struct super_block *sb = READ_ONCE(ndblk.prune_sb); + struct super_block *sb; LIST_HEAD(dispose); - if (!sb) + /* + * The prune_lock is used to protect negative dentry pruning from + * racing with concurrent umount operation. + */ + spin_lock(&ndblk.prune_lock); + sb = READ_ONCE(ndblk.prune_sb); + if (!sb) { + spin_unlock(&ndblk.prune_lock); return; + } ndblk.neg_count = ndblk.prune_count = 0; freed = list_lru_walk(&sb->s_dentry_lru, dentry_negative_lru_isolate, &dispose, NEG_DENTRY_BATCH); + spin_unlock(&ndblk.prune_lock); if (freed) shrink_dentry_list(&dispose); @@ -1472,6 +1485,27 @@ static void prune_negative_dentry(struct work_struct *work) WRITE_ONCE(ndblk.prune_sb, NULL); } +/* + * This is called before an umount to clear ndblk.prune_sb if it + * matches the given super_block. + */ +static void clear_prune_sb_for_umount(struct super_block *sb) +{ + if (likely(READ_ONCE(ndblk.prune_sb) != sb)) + return; + WRITE_ONCE(ndblk.prune_sb, NULL); + /* + * Need to wait until an ongoing pruning operation, if present, + * is completed. + * + * Clearing ndblk.prune_sb will hasten the completion of pruning. + * In the unlikely event that ndblk.prune_sb is set to another + * super_block, the waiting will last the complete pruning operation + * which shouldn't be that long either. + */ + spin_unlock_wait(&ndblk.prune_lock); +} + /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk @@ -1794,6 +1828,7 @@ void shrink_dcache_for_umount(struct super_block *sb) WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); + clear_prune_sb_for_umount(sb); dentry = sb->s_root; sb->s_root = NULL; do_one_tree(dentry); @@ -3896,6 +3931,7 @@ static void __init neg_dentry_init(void) unsigned long cnt; raw_spin_lock_init(&ndblk.nfree_lock); + spin_lock_init(&ndblk.prune_lock); /* 20% in global pool & 80% in percpu free */ ndblk.nfree = neg_dentry_nfree_init -- 1.8.3.1