The start time in writeback_inodes_wb() is not very useful because it slips at each invocation time. Preferrably one _constant_ time shall be used at the beginning to cover the whole sync() work. The newly dirtied inodes are now guarded at the queue_io() time instead of the b_io walk time. This is more natural: non-empty b_io/b_more_io means "more work pending". The timestamp is now grabbed the sync work submission time, and may be further optimized to the initial sync() call time. CC: Jan Kara <jack@xxxxxxx> Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx> --- fs/fs-writeback.c | 16 ++++++---------- include/linux/writeback.h | 4 ++-- 2 files changed, 8 insertions(+), 12 deletions(-) --- linux-next.orig/fs/fs-writeback.c 2010-07-29 17:13:49.000000000 +0800 +++ linux-next/fs/fs-writeback.c 2010-07-29 17:13:58.000000000 +0800 @@ -228,6 +228,10 @@ static void move_expired_inodes(struct l struct inode *inode; int do_sb_sort = 0; + if (wbc->for_sync) { + expire_interval = 1; + older_than_this = wbc->sync_after; + } if (wbc->for_kupdate || wbc->for_background) { expire_interval = msecs_to_jiffies(dirty_expire_interval * 10); older_than_this = jiffies - expire_interval; @@ -507,12 +511,6 @@ static int writeback_sb_inodes(struct su requeue_io(inode); continue; } - /* - * Was this inode dirtied after sync_sb_inodes was called? - * This keeps sync from extra jobs and livelock. - */ - if (inode_dirtied_after(inode, wbc->wb_start)) - return 1; BUG_ON(inode->i_state & I_FREEING); __iget(inode); @@ -541,10 +539,9 @@ void writeback_inodes_wb(struct bdi_writ { int ret = 0; - wbc->wb_start = jiffies; /* livelock avoidance */ spin_lock(&inode_lock); - if (!(wbc->for_kupdate || wbc->for_background) || list_empty(&wb->b_io)) + if (list_empty(&wb->b_io)) queue_io(wb, wbc); while (!list_empty(&wb->b_io)) { @@ -571,9 +568,8 @@ static void __writeback_inodes_sb(struct { WARN_ON(!rwsem_is_locked(&sb->s_umount)); - wbc->wb_start = jiffies; /* livelock avoidance */ spin_lock(&inode_lock); - if (!(wbc->for_kupdate || wbc->for_background) || list_empty(&wb->b_io)) + if (list_empty(&wb->b_io)) queue_io(wb, wbc); writeback_sb_inodes(sb, wb, wbc, true); spin_unlock(&inode_lock); --- linux-next.orig/include/linux/writeback.h 2010-07-29 17:13:18.000000000 +0800 +++ linux-next/include/linux/writeback.h 2010-07-29 17:13:58.000000000 +0800 @@ -28,8 +28,8 @@ enum writeback_sync_modes { */ struct writeback_control { enum writeback_sync_modes sync_mode; - unsigned long wb_start; /* Time writeback_inodes_wb was - called. This is needed to avoid + unsigned long sync_after; /* Only sync inodes dirtied after this + timestamp. This is needed to avoid extra jobs and livelock */ long nr_to_write; /* Write this many pages, and decrement this for each page written */ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>