The I_WILL_FREE is currently set for inodes that we write out during umount after dropping their last reference. It is handled equally to I_FREEING in most places. The two execptions are: - writeback_single_inode skips all list manipulations for I_FREEING, but not for I_WILL_FREE. We don't care about which list an I_WILL_FREE inode is on, because we will remove it from the list a little bit later. - __mark_inode_dirty skips I_FREEING inodes but not I_WILL_FREE inodes. This only matters for filesystem that re-dirty the inode during writeback and then use the I_DIRTY flags inside ->evict_inode. The formers is done by XFS, but it uses it's internal state to flush the inode. I could not find any filesystem that looks at I_DIRTY inside ->evict_inode either. Besides cleaning up the code removing I_WILL_FREE will allow us to avoid one i_lock roundtrip once inode_lock is split and keep iput_final more logic. This includes removing the __remove_inode_hash call in iput_final, given that we never drop the protection from lookups now that I_FREEING is set earlier. Signed-off-by: Christoph Hellwig <hch@xxxxxx> Index: linux-2.6/fs/btrfs/inode.c =================================================================== --- linux-2.6.orig/fs/btrfs/inode.c 2010-10-24 16:30:05.000000000 +0200 +++ linux-2.6/fs/btrfs/inode.c 2010-10-24 16:32:43.647253791 +0200 @@ -3862,8 +3862,7 @@ again: else if (inode->i_ino > entry->vfs_inode.i_ino) p = &parent->rb_right; else { - WARN_ON(!(entry->vfs_inode.i_state & - (I_WILL_FREE | I_FREEING))); + WARN_ON(!(entry->vfs_inode.i_state & I_FREEING)); rb_erase(parent, &root->inode_tree); RB_CLEAR_NODE(parent); spin_unlock(&root->inode_lock); Index: linux-2.6/fs/drop_caches.c =================================================================== --- linux-2.6.orig/fs/drop_caches.c 2010-10-24 16:30:05.000000000 +0200 +++ linux-2.6/fs/drop_caches.c 2010-10-24 16:32:43.647253791 +0200 @@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct sup spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING | I_NEW)) continue; if (inode->i_mapping->nrpages == 0) continue; Index: linux-2.6/fs/gfs2/inode.c =================================================================== --- linux-2.6.orig/fs/gfs2/inode.c 2010-10-24 16:30:05.000000000 +0200 +++ linux-2.6/fs/gfs2/inode.c 2010-10-24 16:32:43.650253651 +0200 @@ -84,7 +84,7 @@ static int iget_skip_test(struct inode * struct gfs2_skip_data *data = opaque; if (ip->i_no_addr == data->no_addr) { - if (inode->i_state & (I_FREEING|I_WILL_FREE)){ + if (inode->i_state & I_FREEING){ data->skipped = 1; return 0; } Index: linux-2.6/fs/fs-writeback.c =================================================================== --- linux-2.6.orig/fs/fs-writeback.c 2010-10-24 16:32:39.000000000 +0200 +++ linux-2.6/fs/fs-writeback.c 2010-10-24 16:32:43.654254629 +0200 @@ -301,8 +301,7 @@ static void inode_wait_for_writeback(str /* * Write out an inode's dirty pages. Called under inode_lock. Either the - * caller has ref on the inode (either via __iget or via syscall against an fd) - * or the inode has I_WILL_FREE set (via generic_forget_inode) + * caller has ref on the inode or the inode is beeing freed. * * If `wait' is set, wait on the writeout. * @@ -320,9 +319,7 @@ writeback_single_inode(struct inode *ino int ret; if (!atomic_read(&inode->i_count)) - WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); - else - WARN_ON(inode->i_state & I_WILL_FREE); + WARN_ON(!(inode->i_state & I_FREEING)); if (inode->i_state & I_SYNC) { /* @@ -492,7 +489,7 @@ static int writeback_sb_inodes(struct su * kind does not need peridic writeout yet, and for the latter * kind writeout is handled by the freer. */ - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + if (inode->i_state & (I_NEW | I_FREEING)) { requeue_io(inode); continue; } @@ -1043,7 +1040,7 @@ static void wait_sb_inodes(struct super_ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { struct address_space *mapping; - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING | I_NEW)) continue; mapping = inode->i_mapping; if (mapping->nrpages == 0) @@ -1154,7 +1151,7 @@ EXPORT_SYMBOL(sync_inodes_sb); * This function commits an inode to disk immediately if it is dirty. This is * primarily needed by knfsd. * - * The caller must either have a ref on the inode or must have set I_WILL_FREE. + * The caller must either have a ref on the inode or must have set I_FREEING. */ int write_inode_now(struct inode *inode, int sync) { Index: linux-2.6/fs/inode.c =================================================================== --- linux-2.6.orig/fs/inode.c 2010-10-24 16:32:20.000000000 +0200 +++ linux-2.6/fs/inode.c 2010-10-24 16:33:46.912003547 +0200 @@ -667,7 +667,7 @@ repeat: continue; if (!test(inode, data)) continue; - if (inode->i_state & (I_FREEING|I_WILL_FREE)) { + if (inode->i_state & I_FREEING) { __wait_on_freeing_inode(inode); goto repeat; } @@ -693,7 +693,7 @@ repeat: continue; if (inode->i_sb != sb) continue; - if (inode->i_state & (I_FREEING|I_WILL_FREE)) { + if (inode->i_state & I_FREEING) { __wait_on_freeing_inode(inode); goto repeat; } @@ -964,7 +964,7 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode_lock); - if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) + if (!(inode->i_state & I_FREEING)) __iget(inode); else /* @@ -1211,7 +1211,7 @@ int insert_inode_locked(struct inode *in continue; if (old->i_sb != sb) continue; - if (old->i_state & (I_FREEING|I_WILL_FREE)) + if (old->i_state & I_FREEING) continue; break; } @@ -1250,7 +1250,7 @@ int insert_inode_locked4(struct inode *i continue; if (!test(old, data)) continue; - if (old->i_state & (I_FREEING|I_WILL_FREE)) + if (old->i_state & I_FREEING) continue; break; } @@ -1305,6 +1305,8 @@ static void iput_final(struct inode *ino const struct super_operations *op = inode->i_sb->s_op; int drop; + WARN_ON(inode->i_state & I_NEW); + if (op && op->drop_inode) drop = op->drop_inode(inode); else @@ -1313,25 +1315,19 @@ static void iput_final(struct inode *ino if (!drop) { if (sb->s_flags & MS_ACTIVE) { inode->i_state |= I_REFERENCED; - if (!(inode->i_state & (I_DIRTY|I_SYNC))) { + if (!(inode->i_state & (I_DIRTY|I_SYNC))) inode_lru_list_add(inode); - } spin_unlock(&inode_lock); return; } - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_WILL_FREE; + inode->i_state |= I_FREEING; spin_unlock(&inode_lock); write_inode_now(inode, 1); spin_lock(&inode_lock); - WARN_ON(inode->i_state & I_NEW); - inode->i_state &= ~I_WILL_FREE; - __remove_inode_hash(inode); + } else { + inode->i_state |= I_FREEING; } - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; - /* * Move the inode off the IO lists and LRU once I_FREEING is * set so that it won't get moved back on there if it is dirty. Index: linux-2.6/fs/notify/inode_mark.c =================================================================== --- linux-2.6.orig/fs/notify/inode_mark.c 2010-10-24 16:30:05.000000000 +0200 +++ linux-2.6/fs/notify/inode_mark.c 2010-10-24 16:32:43.661255957 +0200 @@ -244,11 +244,11 @@ void fsnotify_unmount_inodes(struct list struct inode *need_iput_tmp; /* - * We cannot __iget() an inode in state I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. + * We cannot __iget() an inode in state I_FREEING or I_NEW, + * which is fine because by that point the inode cannot + * have any associated watches. */ - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING | I_NEW)) continue; /* @@ -272,7 +272,7 @@ void fsnotify_unmount_inodes(struct list /* In case the dropping of a reference would nuke next_i. */ if ((&next_i->i_sb_list != list) && atomic_read(&next_i->i_count) && - !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { + !(next_i->i_state & I_FREEING)) { __iget(next_i); need_iput = next_i; } Index: linux-2.6/fs/quota/dquot.c =================================================================== --- linux-2.6.orig/fs/quota/dquot.c 2010-10-24 16:30:05.000000000 +0200 +++ linux-2.6/fs/quota/dquot.c 2010-10-24 16:32:43.665253722 +0200 @@ -898,7 +898,7 @@ static void add_dquot_ref(struct super_b spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) + if (inode->i_state & (I_FREEING | I_NEW)) continue; #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) Index: linux-2.6/fs/xfs/linux-2.6/xfs_iops.c =================================================================== --- linux-2.6.orig/fs/xfs/linux-2.6/xfs_iops.c 2010-10-24 16:30:05.000000000 +0200 +++ linux-2.6/fs/xfs/linux-2.6/xfs_iops.c 2010-10-24 16:32:43.665253722 +0200 @@ -80,7 +80,7 @@ xfs_mark_inode_dirty_sync( { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + if (!(inode->i_state & I_FREEING)) mark_inode_dirty_sync(inode); } @@ -90,7 +90,7 @@ xfs_mark_inode_dirty( { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + if (!(inode->i_state & I_FREEING)) mark_inode_dirty(inode); } Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h 2010-10-24 16:30:05.000000000 +0200 +++ linux-2.6/include/linux/fs.h 2010-10-24 16:32:43.673253652 +0200 @@ -1601,8 +1601,8 @@ struct super_operations { * I_DIRTY_DATASYNC and I_DIRTY_PAGES. * * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, - * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at - * various stages of removing an inode. + * until that flag is cleared. I_FREEING and I_CLEAR are set at various + * stages of removing an inode. * * Two bits are used for locking and completion notification, I_NEW and I_SYNC. * @@ -1613,46 +1613,39 @@ struct super_operations { * don't have to write inode on fdatasync() when only * mtime has changed in it. * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. + * I_FREEING Set when inode is about to be freed but still has dirty + * pages or buffers attached or the inode itself is still + * dirty. + * I_CLEAR Added by end_writeback(). In this state the inode is + * clean and can be destroyed. Inode keeps I_FREEING. + * I_REFERENCED Inode was recently referenced on the LRU and got another + * pass before eviction. * I_NEW Serves as both a mutex and completion notification. * New inodes set I_NEW. If two processes both create * the same inode, one of them will release its inode and * wait for I_NEW to be released before returning. - * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can - * also cause waiting on I_NEW, without I_NEW actually - * being set. find_inode() uses this to prevent returning - * nearly-dead inodes. - * I_WILL_FREE Must be set when calling write_inode_now() if i_count - * is zero. I_FREEING must be set when I_WILL_FREE is - * cleared. - * I_FREEING Set when inode is about to be freed but still has dirty - * pages or buffers attached or the inode itself is still - * dirty. - * I_CLEAR Added by end_writeback(). In this state the inode is clean - * and can be destroyed. Inode keeps I_FREEING. - * - * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are - * prohibited for many purposes. iget() must wait for - * the inode to be completely released, then create it - * anew. Other functions will just ignore such inodes, - * if appropriate. I_NEW is used for waiting. - * + * Inodes in I_FREEING state can also cause waiting on + * I_NEW, without I_NEW actually being set. find_inode() + * uses this to prevent returning nearly-dead inodes. * I_SYNC Synchonized write of dirty inode data. The bits is * set during data writeback, and cleared with a wakeup * on the bit address once it is done. * - * Q: What is the difference between I_WILL_FREE and I_FREEING? + * Inodes that have I_FREEING set are prohibited for many purposes. + * iget*_locked() must wait for the inode to be completely released, then + * create it anew. Other functions will just ignore such inodes, if + * appropriate. I_NEW is used for waiting. */ #define I_DIRTY_SYNC (1 << 0) #define I_DIRTY_DATASYNC (1 << 1) #define I_DIRTY_PAGES (1 << 2) -#define __I_NEW 3 +#define I_FREEING (1 << 3) +#define I_CLEAR (1 << 4) +#define I_REFERENCED (1 << 5) +#define __I_NEW 6 #define I_NEW (1 << __I_NEW) -#define I_WILL_FREE (1 << 4) -#define I_FREEING (1 << 5) -#define I_CLEAR (1 << 6) #define __I_SYNC 7 #define I_SYNC (1 << __I_SYNC) -#define I_REFERENCED (1 << 8) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html