From: Dave Chinner <dchinner@xxxxxxxxxx> Convert VFS internal superblock inode iterators that cannot use referenced inodes to the new super_iter_inodes_unsafe() iterator. Dquot and inode eviction require this special handling due to special eviction handling requirements. The special nr_blockdev_pages() statistics code needs it as well, as this is called from si_meminfo() and so can potentially be run from locations where arbitrary blocking is not allowed or desirable. New cases using this iterator need careful consideration. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- block/bdev.c | 24 +++++++++++---- fs/inode.c | 79 ++++++++++++++++++++++++++---------------------- fs/quota/dquot.c | 72 ++++++++++++++++++++++++------------------- 3 files changed, 102 insertions(+), 73 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index 33f9c4605e3a..b5a362156ca1 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -472,16 +472,28 @@ void bdev_drop(struct block_device *bdev) iput(BD_INODE(bdev)); } +static int bdev_pages_count(struct inode *inode, void *data) +{ + long *pages = data; + + *pages += inode->i_mapping->nrpages; + return INO_ITER_DONE; +} + long nr_blockdev_pages(void) { - struct inode *inode; long ret = 0; - spin_lock(&blockdev_superblock->s_inode_list_lock); - list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) - ret += inode->i_mapping->nrpages; - spin_unlock(&blockdev_superblock->s_inode_list_lock); - + /* + * We can be called from contexts where blocking is not + * desirable. The count is advisory at best, and we only + * need to access the inode mapping. Hence as long as we + * have an inode existence guarantee, we can safely count + * the cached pages on each inode without needing reference + * counted inodes. + */ + super_iter_inodes_unsafe(blockdev_superblock, + bdev_pages_count, &ret); return ret; } diff --git a/fs/inode.c b/fs/inode.c index 0a53d8c34203..3f335f78c5b2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -761,8 +761,11 @@ static void evict(struct inode *inode) * Dispose-list gets a local list with local inodes in it, so it doesn't * need to worry about list corruption and SMP locks. */ -static void dispose_list(struct list_head *head) +static bool dispose_list(struct list_head *head) { + if (list_empty(head)) + return false; + while (!list_empty(head)) { struct inode *inode; @@ -772,6 +775,7 @@ static void dispose_list(struct list_head *head) evict(inode); cond_resched(); } + return true; } /** @@ -783,47 +787,50 @@ static void dispose_list(struct list_head *head) * so any inode reaching zero refcount during or after that call will * be immediately evicted. */ +static int evict_inode_fn(struct inode *inode, void *data) +{ + struct list_head *dispose = data; + + spin_lock(&inode->i_lock); + if (atomic_read(&inode->i_count) || + (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))) { + spin_unlock(&inode->i_lock); + return INO_ITER_DONE; + } + + inode->i_state |= I_FREEING; + inode_lru_list_del(inode); + spin_unlock(&inode->i_lock); + list_add(&inode->i_lru, dispose); + + /* + * If we've run long enough to need rescheduling, abort the + * iteration so we can return to evict_inodes() and dispose of the + * inodes before collecting more inodes to evict. + */ + if (need_resched()) + return INO_ITER_ABORT; + return INO_ITER_DONE; +} + void evict_inodes(struct super_block *sb) { - struct inode *inode, *next; LIST_HEAD(dispose); -again: - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - if (atomic_read(&inode->i_count)) - continue; - - spin_lock(&inode->i_lock); - if (atomic_read(&inode->i_count)) { - spin_unlock(&inode->i_lock); - continue; - } - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { - spin_unlock(&inode->i_lock); - continue; - } - - inode->i_state |= I_FREEING; - inode_lru_list_del(inode); - spin_unlock(&inode->i_lock); - list_add(&inode->i_lru, &dispose); - + do { /* - * We can have a ton of inodes to evict at unmount time given - * enough memory, check to see if we need to go to sleep for a - * bit so we don't livelock. + * We do not want to take references to inodes whilst iterating + * because we are trying to evict unreferenced inodes from + * the cache. Hence we need to use the unsafe iteration + * mechanism and do all the required inode validity checks in + * evict_inode_fn() to safely queue unreferenced inodes for + * eviction. + * + * We repeat the iteration until it doesn't find any more + * inodes to dispose of. */ - if (need_resched()) { - spin_unlock(&sb->s_inode_list_lock); - cond_resched(); - dispose_list(&dispose); - goto again; - } - } - spin_unlock(&sb->s_inode_list_lock); - - dispose_list(&dispose); + super_iter_inodes_unsafe(sb, evict_inode_fn, &dispose); + } while (dispose_list(&dispose)); } EXPORT_SYMBOL_GPL(evict_inodes); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index b40410cd39af..ea0bd807fed7 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1075,41 +1075,51 @@ static int add_dquot_ref(struct super_block *sb, int type) return err; } +struct dquot_ref_data { + int type; + int reserved; +}; + +static int remove_dquot_ref_fn(struct inode *inode, void *data) +{ + struct dquot_ref_data *ref = data; + + spin_lock(&dq_data_lock); + if (!IS_NOQUOTA(inode)) { + struct dquot __rcu **dquots = i_dquot(inode); + struct dquot *dquot = srcu_dereference_check( + dquots[ref->type], &dquot_srcu, + lockdep_is_held(&dq_data_lock)); + +#ifdef CONFIG_QUOTA_DEBUG + if (unlikely(inode_get_rsv_space(inode) > 0)) + ref->reserved++; +#endif + rcu_assign_pointer(dquots[ref->type], NULL); + if (dquot) + dqput(dquot); + } + spin_unlock(&dq_data_lock); + return INO_ITER_DONE; +} + static void remove_dquot_ref(struct super_block *sb, int type) { - struct inode *inode; -#ifdef CONFIG_QUOTA_DEBUG - int reserved = 0; -#endif - - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - /* - * We have to scan also I_NEW inodes because they can already - * have quota pointer initialized. Luckily, we need to touch - * only quota pointers and these have separate locking - * (dq_data_lock). - */ - spin_lock(&dq_data_lock); - if (!IS_NOQUOTA(inode)) { - struct dquot __rcu **dquots = i_dquot(inode); - struct dquot *dquot = srcu_dereference_check( - dquots[type], &dquot_srcu, - lockdep_is_held(&dq_data_lock)); + struct dquot_ref_data ref = { + .type = type, + }; + /* + * We have to scan I_NEW inodes because they can already + * have quota pointer initialized. Luckily, we need to touch + * only quota pointers and these have separate locking + * (dq_data_lock) so the existence guarantee that + * super_iter_inodes_unsafe() provides inodes passed to + * remove_dquot_ref_fn() is sufficient for this operation. + */ + super_iter_inodes_unsafe(sb, remove_dquot_ref_fn, &ref); #ifdef CONFIG_QUOTA_DEBUG - if (unlikely(inode_get_rsv_space(inode) > 0)) - reserved = 1; -#endif - rcu_assign_pointer(dquots[type], NULL); - if (dquot) - dqput(dquot); - } - spin_unlock(&dq_data_lock); - } - spin_unlock(&sb->s_inode_list_lock); -#ifdef CONFIG_QUOTA_DEBUG - if (reserved) { + if (ref.reserved) { printk(KERN_WARNING "VFS (%s): Writes happened after quota" " was disabled thus quota information is probably " "inconsistent. Please run quotacheck(8).\n", sb->s_id); -- 2.45.2