Currently flusher thread pinned superblock (via grab_super_passive()) it was working on. However this is unnecessary after commit 169ebd90131b "writeback: Avoid iput() from flusher thread". Before this commit we had to block umount so that it doesn't complain about busy inodes because of elevated i_count flusher thread used. After this commit we can let umount run and it will block in evict_inodes() waiting for flusher thread to be done with the inode (thus flusher thread is also safe against inode going away from under it). Removing the superblock pinning allows us to simplify the code quite a bit. Among other things there's no need to sort b_io list in move_expired_inodes() anymore. Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/fs-writeback.c | 105 ++++++--------------------------------- include/trace/events/writeback.h | 2 +- 2 files changed, 17 insertions(+), 90 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be568b7311d6..f85ee6795a28 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -255,11 +255,7 @@ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, struct wb_writeback_work *work) { - LIST_HEAD(tmp); - struct list_head *pos, *node; - struct super_block *sb = NULL; struct inode *inode; - int do_sb_sort = 0; int moved = 0; while (!list_empty(delaying_queue)) { @@ -267,31 +263,9 @@ static int move_expired_inodes(struct list_head *delaying_queue, if (work->older_than_this && inode_dirtied_after(inode, *work->older_than_this)) break; - list_move(&inode->i_wb_list, &tmp); + list_move(&inode->i_wb_list, dispatch_queue); moved++; - if (sb_is_blkdev_sb(inode->i_sb)) - continue; - if (sb && sb != inode->i_sb) - do_sb_sort = 1; - sb = inode->i_sb; - } - - /* just one sb in list, splice to dispatch_queue and we're done */ - if (!do_sb_sort) { - list_splice(&tmp, dispatch_queue); - goto out; - } - - /* Move inodes from one superblock together */ - while (!list_empty(&tmp)) { - sb = wb_inode(tmp.prev)->i_sb; - list_for_each_prev_safe(pos, node, &tmp) { - inode = wb_inode(pos); - if (inode->i_sb == sb) - list_move(&inode->i_wb_list, dispatch_queue); - } } -out: return moved; } @@ -500,7 +474,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * * This function is designed to be called for writing back one inode which * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() - * and does more profound writeback list handling in writeback_sb_inodes(). + * and does more profound writeback list handling in writeback_inodes(). */ static int writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, @@ -570,8 +544,8 @@ static long writeback_chunk_size(struct backing_dev_info *bdi, * The intended call sequence for WB_SYNC_ALL writeback is: * * wb_writeback() - * writeback_sb_inodes() <== called only once - * write_cache_pages() <== called once for each inode + * writeback_inodes() <== called only once + * write_cache_pages() <== called once for each inode * (quickly) tag currently dirty pages * (maybe slowly) sync all tagged pages */ @@ -589,13 +563,12 @@ static long writeback_chunk_size(struct backing_dev_info *bdi, } /* - * Write a portion of b_io inodes which belong to @sb. + * Write inodes in b_io list belonging to @work->sb (if set). * * Return the number of pages and/or inodes written. */ -static long writeback_sb_inodes(struct super_block *sb, - struct bdi_writeback *wb, - struct wb_writeback_work *work) +static long writeback_inodes(struct bdi_writeback *wb, + struct wb_writeback_work *work) { struct writeback_control wbc = { .sync_mode = work->sync_mode, @@ -614,23 +587,14 @@ static long writeback_sb_inodes(struct super_block *sb, while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); - if (inode->i_sb != sb) { - if (work->sb) { - /* - * We only want to write back data for this - * superblock, move all inodes not belonging - * to it back onto the dirty list. - */ - redirty_tail(inode, wb); - continue; - } - + if (work->sb && inode->i_sb != work->sb) { /* - * The inode belongs to a different superblock. - * Bounce back to the caller to unpin this and - * pin the next superblock. + * We only want to write back data for this + * superblock, move all inodes not belonging + * to it back onto the dirty list. */ - break; + redirty_tail(inode, wb); + continue; } /* @@ -656,7 +620,7 @@ static long writeback_sb_inodes(struct super_block *sb, */ spin_unlock(&inode->i_lock); requeue_io(inode, wb); - trace_writeback_sb_inodes_requeue(inode); + trace_writeback_inodes_requeue(inode); continue; } spin_unlock(&wb->list_lock); @@ -710,40 +674,6 @@ static long writeback_sb_inodes(struct super_block *sb, return wrote; } -static long __writeback_inodes_wb(struct bdi_writeback *wb, - struct wb_writeback_work *work) -{ - unsigned long start_time = jiffies; - long wrote = 0; - - while (!list_empty(&wb->b_io)) { - struct inode *inode = wb_inode(wb->b_io.prev); - struct super_block *sb = inode->i_sb; - - if (!grab_super_passive(sb)) { - /* - * grab_super_passive() may fail consistently due to - * s_umount being grabbed by someone else. Don't use - * requeue_io() to avoid busy retrying the inode/sb. - */ - redirty_tail(inode, wb); - continue; - } - wrote += writeback_sb_inodes(sb, wb, work); - drop_super(sb); - - /* refer to the same tests at the end of writeback_sb_inodes */ - if (wrote) { - if (time_is_before_jiffies(start_time + HZ / 10UL)) - break; - if (work->nr_pages <= 0) - break; - } - } - /* Leave any unwritten inodes on b_io */ - return wrote; -} - static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason) { @@ -757,7 +687,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) queue_io(wb, &work); - __writeback_inodes_wb(wb, &work); + writeback_inodes(wb, &work); spin_unlock(&wb->list_lock); return nr_pages - work.nr_pages; @@ -857,10 +787,7 @@ static long wb_writeback(struct bdi_writeback *wb, trace_writeback_start(wb->bdi, work); if (list_empty(&wb->b_io)) queue_io(wb, work); - if (work->sb) - progress = writeback_sb_inodes(work->sb, wb, work); - else - progress = __writeback_inodes_wb(wb, work); + progress = writeback_inodes(wb, work); trace_writeback_written(wb->bdi, work); wb_update_bandwidth(wb, wb_start); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index cee02d65ab3f..9bf6f2da32d2 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -477,7 +477,7 @@ TRACE_EVENT(balance_dirty_pages, ) ); -TRACE_EVENT(writeback_sb_inodes_requeue, +TRACE_EVENT(writeback_inodes_requeue, TP_PROTO(struct inode *inode), TP_ARGS(inode), -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html