The users of I_LOCK in fs/fs-writeback appear to be independent of all others. On top, sharing the same bit for __sync_single_inode() and ifind/ifind_fast caused deadlocks in both LogFS and NTFS. Introduce a new bit and use that for fs/fs-writeback.c users. --- fs/fs-writeback.c | 39 ++++++++++++++++++++++++--------------- include/linux/fs.h | 2 ++ include/linux/writeback.h | 7 +++++++ 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index a4b142a..4958fae 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -99,11 +99,11 @@ void __mark_inode_dirty(struct inode *in inode->i_state |= flags; /* - * If the inode is locked, just update its dirty state. + * If the inode is being synced, just update its dirty state. * The unlocker will place the inode on the appropriate * superblock list, based upon its state. */ - if (inode->i_state & I_LOCK) + if (inode->i_state & I_SYNC) goto out; /* @@ -139,6 +139,15 @@ static int write_inode(struct inode *ino return 0; } +static void inode_sync_complete(struct inode *inode) +{ + /* + * Prevent speculative execution through spin_unlock(&inode_lock); + */ + smp_mb(); + wake_up_bit(&inode->i_state, __I_SYNC); +} + /* * Write a single inode's dirty pages and inode data out to disk. * If `wait' is set, wait on the writeout. @@ -158,11 +167,11 @@ __sync_single_inode(struct inode *inode, int wait = wbc->sync_mode == WB_SYNC_ALL; int ret; - BUG_ON(inode->i_state & I_LOCK); + BUG_ON(inode->i_state & I_SYNC); - /* Set I_LOCK, reset I_DIRTY */ + /* Set I_SYNC, reset I_DIRTY */ dirty = inode->i_state & I_DIRTY; - inode->i_state |= I_LOCK; + inode->i_state |= I_SYNC; inode->i_state &= ~I_DIRTY; spin_unlock(&inode_lock); @@ -183,7 +192,7 @@ __sync_single_inode(struct inode *inode, } spin_lock(&inode_lock); - inode->i_state &= ~I_LOCK; + inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { if (!(inode->i_state & I_DIRTY) && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { @@ -231,7 +240,7 @@ __sync_single_inode(struct inode *inode, list_move(&inode->i_list, &inode_unused); } } - wake_up_inode(inode); + inode_sync_complete(inode); return ret; } @@ -250,7 +259,7 @@ __writeback_single_inode(struct inode *i else WARN_ON(inode->i_state & I_WILL_FREE); - if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { + if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { struct address_space *mapping = inode->i_mapping; int ret; @@ -269,16 +278,16 @@ __writeback_single_inode(struct inode *i /* * It's a data-integrity sync. We must wait. */ - if (inode->i_state & I_LOCK) { - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LOCK); + if (inode->i_state & I_SYNC) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); - wqh = bit_waitqueue(&inode->i_state, __I_LOCK); + wqh = bit_waitqueue(&inode->i_state, __I_SYNC); do { spin_unlock(&inode_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); spin_lock(&inode_lock); - } while (inode->i_state & I_LOCK); + } while (inode->i_state & I_SYNC); } return __sync_single_inode(inode, wbc); } @@ -311,7 +320,7 @@ __writeback_single_inode(struct inode *i * The inodes to be written are parked on sb->s_io. They are moved back onto * sb->s_dirty as they are selected for writing. This way, none can be missed * on the writer throttling path, and we get decent balancing between many - * throttled threads: we don't want them all piling up on __wait_on_inode. + * throttled threads: we don't want them all piling up on inode_sync_wait. */ static void sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) @@ -583,7 +592,7 @@ int write_inode_now(struct inode *inode, ret = __writeback_single_inode(inode, &wbc); spin_unlock(&inode_lock); if (sync) - wait_on_inode(inode); + inode_sync_wait(inode); return ret; } EXPORT_SYMBOL(write_inode_now); @@ -658,7 +667,7 @@ int generic_osync_inode(struct inode *in err = err2; } else - wait_on_inode(inode); + inode_sync_wait(inode); return err; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 86ec3f4..f9eb221 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1184,6 +1184,8 @@ #define I_FREEING 16 #define I_CLEAR 32 #define I_NEW 64 #define I_WILL_FREE 128 +#define __I_SYNC 8 +#define I_SYNC (1 << __I_SYNC) /* Currently being synced */ #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fc35e6b..3abc1d1 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -77,6 +77,13 @@ static inline void wait_on_inode(struct wait_on_bit(&inode->i_state, __I_LOCK, inode_wait, TASK_UNINTERRUPTIBLE); } +static inline void inode_sync_wait(struct inode *inode) +{ + might_sleep(); + wait_on_bit(&inode->i_state, __I_SYNC, inode_wait, + TASK_UNINTERRUPTIBLE); +} + /* * mm/page-writeback.c -- 1.4.2.3 - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html