Delaying iput introduces a problem when it comes to unmounting. The problem lies in fs-specific put_super callbacks that will issue iput()s and assume that the iputs on their final inodes (for metadata inodes and the like) which causes problems with deferred iput() as the super_block may go away before the inodes are finalized. This patch goes and marks all inodes in a super_block whenever we see that the inodes are being invalidated with S_SYNCIPUT, which causes us to shortcircuit the deferred iput and perform a synchronous iput. Signed-off-by: Mike Waychison <mikew@xxxxxxxxxx> --- fs/block_dev.c | 2 +- fs/ext2/super.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/ops_fstype.c | 2 +- fs/inode.c | 19 ++++++++++++++----- fs/ntfs/super.c | 2 +- fs/smbfs/inode.c | 2 +- fs/super.c | 4 ++-- include/linux/fs.h | 3 ++- 9 files changed, 24 insertions(+), 14 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index b3c1eff..eb6517e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1400,7 +1400,7 @@ int __invalidate_device(struct block_device *bdev) * hold). */ shrink_dcache_sb(sb); - res = invalidate_inodes(sb); + res = invalidate_inodes(sb, 1); drop_super(sb); } invalidate_bdev(bdev); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index da8bdea..190dfc9 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1185,7 +1185,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) es = sbi->s_es; if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != (old_mount_opt & EXT2_MOUNT_XIP)) && - invalidate_inodes(sb)) + invalidate_inodes(sb, 0)) ext2_warning(sb, __func__, "busy inodes while remounting "\ "xip remain in cache (no functional problem)"); if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6b983ae..4d95373 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1574,7 +1574,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) } down_write(&gfs2_umount_flush_sem); - invalidate_inodes(sdp->sd_vfs); + invalidate_inodes(sdp->sd_vfs, 1); up_write(&gfs2_umount_flush_sem); msleep(10); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index f91eebd..5099ca5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1205,7 +1205,7 @@ fail_locking: fail_lm: gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); - while (invalidate_inodes(sb)) + while (invalidate_inodes(sb, 1)) yield(); fail_sys: gfs2_sys_fs_del(sdp); diff --git a/fs/inode.c b/fs/inode.c index 56cf6e2..9aa574d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -335,7 +335,8 @@ static void dispose_list(struct list_head *head) /* * Invalidate all inodes for a device. */ -static int invalidate_list(struct list_head *head, struct list_head *dispose) +static int invalidate_list(struct list_head *head, struct list_head *dispose, + int umount) { struct list_head *next; int busy = 0, count = 0; @@ -358,6 +359,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) break; inode = list_entry(tmp, struct inode, i_sb_list); invalidate_inode_buffers(inode); + if (umount) + inode->i_flags |= S_SYNCIPUT; if (!atomic_read(&inode->i_count)) { list_move(&inode->i_list, dispose); inode->i_state |= I_FREEING; @@ -374,12 +377,13 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) /** * invalidate_inodes - discard the inodes on a device * @sb: superblock + * @umount: are we unmounting? * * Discard all of the inodes for a given superblock. If the discard * fails because there are busy inodes then a non zero value is returned. * If the discard is successful all the inodes have been discarded. */ -int invalidate_inodes(struct super_block * sb) +int invalidate_inodes(struct super_block *sb, int umount) { int busy; LIST_HEAD(throw_away); @@ -394,7 +398,7 @@ int invalidate_inodes(struct super_block * sb) spin_lock(&inode_lock); inotify_unmount_inodes(&sb->s_inodes); - busy = invalidate_list(&sb->s_inodes, &throw_away); + busy = invalidate_list(&sb->s_inodes, &throw_away, umount); spin_unlock(&inode_lock); dispose_list(&throw_away); @@ -1372,8 +1376,12 @@ void iput(struct inode *inode) if (inode) { BUG_ON(inode->i_state == I_CLEAR); - if (!atomic_add_unless(&inode->i_count, -1, 1)) - postpone_iput(inode); + if (!(inode->i_flags & S_SYNCIPUT)) { + if (!atomic_add_unless(&inode->i_count, -1, 1)) + postpone_iput(inode); + } else { + real_iput(inode); + } } } @@ -1663,6 +1671,7 @@ static void iput_drain_per_cpu_cb(struct work_struct *dummy) void iput_drain_all(void) { + /* TODO: error checking? */ schedule_on_each_cpu(iput_drain_per_cpu_cb); } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 4a46743..04220f3 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3051,7 +3051,7 @@ iput_tmp_ino_err_out_now: * method again... FIXME: Do we need to do this twice now because of * attribute inodes? I think not, so leave as is for now... (AIA) */ - if (invalidate_inodes(sb)) { + if (invalidate_inodes(sb, 1)) { ntfs_error(sb, "Busy inodes left. This is most likely a NTFS " "driver bug."); /* Copied from fs/super.c. I just love this message. (-; */ diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index fc27fbf..4def57a 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -229,7 +229,7 @@ smb_invalidate_inodes(struct smb_sb_info *server) { VERBOSE("\n"); shrink_dcache_sb(SB_of(server)); - invalidate_inodes(SB_of(server)); + invalidate_inodes(SB_of(server), 1); } /* diff --git a/fs/super.c b/fs/super.c index 534840f..244b6f5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -306,7 +306,7 @@ void generic_shutdown_super(struct super_block *sb) async_synchronize_full_special(&sb->s_async_list); /* bad name - it should be evict_inodes() */ - invalidate_inodes(sb); + invalidate_inodes(sb, 1); lock_kernel(); if (sop->write_super && sb->s_dirt) @@ -315,7 +315,7 @@ void generic_shutdown_super(struct super_block *sb) sop->put_super(sb); /* Forget any remaining inodes */ - if (invalidate_inodes(sb)) { + if (invalidate_inodes(sb, 1)) { printk("VFS: Busy inodes after unmount of %s. " "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); diff --git a/include/linux/fs.h b/include/linux/fs.h index 47eaa71..aa90620 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -161,6 +161,7 @@ struct inodes_stat_t { #define S_NOCMTIME 128 /* Do not update file c/mtime */ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE 512 /* Inode is fs-internal */ +#define S_SYNCIPUT 1024 /* Filesystem going down, use sync iput() */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -1805,7 +1806,7 @@ extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); #endif -extern int invalidate_inodes(struct super_block *); +extern int invalidate_inodes(struct super_block *, int umount); unsigned long __invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end, bool be_atomic); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html