Hello, Tahsin. FYI, this is the preliminary patch I'm trying to test. I'm running the repro case w/o the patch but the bug hasn't triggered yet. Thanks. Index: work/include/linux/backing-dev.h =================================================================== --- work.orig/include/linux/backing-dev.h +++ work/include/linux/backing-dev.h @@ -362,6 +362,26 @@ static inline struct bdi_writeback *inod return inode->i_wb; } +static inline struct bdi_writeback * +unlocked_inode_to_wb_lock_list(struct inode *inode) +{ + struct bdi_writeback *wb; + + rcu_read_lock(); + + while (true) { + wb = inode->i_wb; + spin_lock(&wb->list_lock); + if (likely(wb == inode->i_wb)) + break; + spin_unlock(&wb->list_lock); + cpu_relax(); + } + + rcu_read_unlock(); + return wb; +} + /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode @@ -374,8 +394,7 @@ static inline struct bdi_writeback *inod * unlocked_inode_to_wb_end(). * * The caller must call unlocked_inode_to_wb_end() with *@lockdep - * afterwards and can't sleep during transaction. IRQ may or may not be - * disabled on return. + * afterwards and can't sleep during transaction. */ static inline struct bdi_writeback * unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) @@ -388,14 +407,16 @@ unlocked_inode_to_wb_begin(struct inode */ *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; - if (unlikely(*lockedp)) - spin_lock_irq(&inode->i_mapping->tree_lock); - /* - * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. + * Protected by either !I_WB_SWITCH + rcu_read_lock(), * inode_to_wb() will bark. Deref directly. */ - return inode->i_wb; + if (likely(!*lockedp)) + return inode->i_wb; + + rcu_read_unlock(); + + return unlocked_inode_to_wb_lock_list(inode); } /** @@ -405,10 +426,10 @@ unlocked_inode_to_wb_begin(struct inode */ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) { - if (unlikely(locked)) - spin_unlock_irq(&inode->i_mapping->tree_lock); - - rcu_read_unlock(); + if (likely(!locked)) + rcu_read_unlock(); + else + spin_unlock(&inode->i_wb->list_lock); } #else /* CONFIG_CGROUP_WRITEBACK */ @@ -453,6 +474,15 @@ static inline struct bdi_writeback *inod } static inline struct bdi_writeback * +unlocked_inode_to_wb_lock_list(struct inode *inode) +{ + struct bdi_writeback *wb = inode_to_wb(inode); + + spin_lock(&wb->list_lock); + return wb; +} + +static inline struct bdi_writeback * unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) { return inode_to_wb(inode); Index: work/fs/fs-writeback.c =================================================================== --- work.orig/fs/fs-writeback.c +++ work/fs/fs-writeback.c @@ -1309,10 +1309,10 @@ __writeback_single_inode(struct inode *i * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() * and does more profound writeback list handling in writeback_sb_inodes(). */ -static int -writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, - struct writeback_control *wbc) +static int writeback_single_inode(struct inode *inode, + struct writeback_control *wbc) { + struct bdi_writeback *wb; int ret = 0; spin_lock(&inode->i_lock); @@ -1350,7 +1350,8 @@ writeback_single_inode(struct inode *ino ret = __writeback_single_inode(inode, wbc); wbc_detach_inode(wbc); - spin_lock(&wb->list_lock); + + wb = unlocked_inode_to_wb_lock_list(inode); spin_lock(&inode->i_lock); /* * If inode is clean, remove it from writeback lists. Otherwise don't @@ -1515,8 +1516,7 @@ static long writeback_sb_inodes(struct s cond_resched(); } - - spin_lock(&wb->list_lock); + wb = unlocked_inode_to_wb_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) wrote++; @@ -2310,7 +2310,6 @@ EXPORT_SYMBOL(sync_inodes_sb); */ int write_inode_now(struct inode *inode, int sync) { - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, @@ -2322,7 +2321,7 @@ int write_inode_now(struct inode *inode, wbc.nr_to_write = 0; might_sleep(); - return writeback_single_inode(inode, wb, &wbc); + return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(write_inode_now); @@ -2339,7 +2338,7 @@ EXPORT_SYMBOL(write_inode_now); */ int sync_inode(struct inode *inode, struct writeback_control *wbc) { - return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc); + return writeback_single_inode(inode, wbc); } EXPORT_SYMBOL(sync_inode); -- To unsubscribe from this list: send the line "unsubscribe cgroups" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html