wakeup_flusher_thread(0) will queue work doing complete writeback for each flusher thread. Thus there is not much point in submitting another work doing full inode WB_SYNC_NONE writeback by sync_filesystems(). So change sync to do: wakeup_flusher_threads(0); for each filesystem WB_SYNC_ALL inode writeback sync_fs(wait=0) submit dirty buffers from all block devices for each filesystem sync_fs(wait=1) synchronous writeout of all block devices Note that this changes ordering of sync_fs() calls and inode writeback. Previously we called sync_fs(wait=0) after WB_SYNC_NONE inode writeback and before WB_SYNC_ALL inode writeback. Now we call it after WB_SYNC_ALL inode writeback because there is no point in calling it while flusher threads woken by wakeup_flusher_threads(0) are still writing out data and there is no easy way to find out when work submitted by wakeup_flusher_threads() is finished. Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/sync.c | 55 +++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 43 insertions(+), 12 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index f07f991..ca40cda 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -28,15 +28,16 @@ * speeds up the wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. */ -static void __sync_filesystem(struct super_block *sb, int wait) +static void __sync_filesystem(struct super_block *sb, int emergency) { - if (wait) - sync_inodes_sb(sb); - else + /* In case of emergency sync we don't want to wait for locks and IO */ + if (unlikely(emergency)) writeback_inodes_sb(sb); + else + sync_inodes_sb(sb); if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, wait); + sb->s_op->sync_fs(sb, 0); } /* @@ -60,11 +61,23 @@ int sync_filesystem(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - __sync_filesystem(sb, 0); + /* Asynchronous pass of sync to speed things up */ + writeback_inodes_sb(sb); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 0); + if (ret) + return ret; + } ret = __sync_blockdev(sb->s_bdev, 0); if (ret < 0) return ret; - __sync_filesystem(sb, 1); + /* Synchronous pass of sync to guarantee data integrity */ + sync_inodes_sb(sb); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 1); + if (ret) + return ret; + } return __sync_blockdev(sb->s_bdev, 1); } EXPORT_SYMBOL_GPL(sync_filesystem); @@ -79,9 +92,9 @@ static void sync_one_sb(struct super_block *sb, void *arg) * Sync all the data for all the filesystems (called by sys_sync() and * emergency sync) */ -static void sync_filesystems(int wait) +static void sync_filesystems(int emergency) { - iterate_supers(sync_one_sb, &wait); + iterate_supers(sync_one_sb, &emergency); } static void sync_all_bdevs(int wait) @@ -120,16 +133,34 @@ static void sync_all_bdevs(int wait) iput(old_inode); } +static void sb_sync_fs(struct super_block *sb, void *arg) +{ + /* Avoid read-only filesystems */ + if (sb->s_flags & MS_RDONLY) + return; + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); +} + /* * sync everything. Start out by waking pdflush, because that writes back * all queues in parallel. */ SYSCALL_DEFINE0(sync) { + /* Start flushing on all devices */ wakeup_flusher_threads(0); + /* + * Above call queued work doing complete writeout on each filesystem. + * Now we queue work which guarantees data integrity of all inodes + * - not much should be left for it to write. The WB_SYNC_ALL inode + * writeback also guarantees that sync_fs() is called after inodes + * are written out and thus it can do meaningful work. + */ sync_filesystems(0); - sync_filesystems(1); sync_all_bdevs(0); + /* Call blocking ->sync_fs() for each filesystem */ + iterate_supers(sb_sync_fs, NULL); sync_all_bdevs(1); if (unlikely(laptop_mode)) laptop_sync_completion(); @@ -142,8 +173,8 @@ static void do_sync_work(struct work_struct *work) * Sync twice to reduce the possibility we skipped some inodes / pages * because they were temporarily locked */ - sync_filesystems(0); - sync_filesystems(0); + sync_filesystems(1); + sync_filesystems(1); printk("Emergency Sync complete\n"); kfree(work); } -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html