[PATCH v6 08/10] ext4: introduce selective flushing in fast commit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



With fast commits, if the entire commit is contained within a single
block and there isn't any data that needs a flush, we can avoid sending
expensive cache flush to disk. Single block metadata only fast commits
can be written using FUA to guarantee consistency.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx>
---
 fs/ext4/ext4.h        | 12 ++++++++++++
 fs/ext4/ext4_jbd2.h   | 20 ++++++++++++--------
 fs/ext4/fast_commit.c | 23 ++++++++++++++++++-----
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 196c513f82dd..3721daea2890 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1744,6 +1744,13 @@ struct ext4_sb_info {
 					 */
 	struct list_head s_fc_dentry_q[2];	/* directory entry updates */
 	unsigned int s_fc_bytes;
+
+	/*
+	 * This flag indicates whether a full flush is needed on
+	 * next fast commit.
+	 */
+	int fc_flush_required;
+
 	/*
 	 * Main fast commit lock. This lock protects accesses to the
 	 * following fields:
@@ -2905,6 +2912,11 @@ void ext4_fc_del(struct inode *inode);
 bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
 void ext4_fc_replay_cleanup(struct super_block *sb);
 int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
+static inline void ext4_fc_mark_needs_flush(struct super_block *sb)
+{
+	EXT4_SB(sb)->fc_flush_required = 1;
+}
+
 int __init ext4_fc_init_dentry_cache(void);
 void ext4_fc_destroy_dentry_cache(void);
 int ext4_fc_record_regions(struct super_block *sb, int ino,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 0c77697d5e90..e3a4f5c49b6e 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -420,19 +420,23 @@ static inline int ext4_journal_force_commit(journal_t *journal)
 static inline int ext4_jbd2_inode_add_write(handle_t *handle,
 		struct inode *inode, loff_t start_byte, loff_t length)
 {
-	if (ext4_handle_valid(handle))
-		return jbd2_journal_inode_ranged_write(handle,
-				EXT4_I(inode)->jinode, start_byte, length);
-	return 0;
+	if (!ext4_handle_valid(handle))
+		return 0;
+
+	ext4_fc_mark_needs_flush(inode->i_sb);
+	return jbd2_journal_inode_ranged_write(handle,
+			EXT4_I(inode)->jinode, start_byte, length);
 }
 
 static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
 		struct inode *inode, loff_t start_byte, loff_t length)
 {
-	if (ext4_handle_valid(handle))
-		return jbd2_journal_inode_ranged_wait(handle,
-				EXT4_I(inode)->jinode, start_byte, length);
-	return 0;
+	if (!ext4_handle_valid(handle))
+		return 0;
+
+	ext4_fc_mark_needs_flush(inode->i_sb);
+	return jbd2_journal_inode_ranged_wait(handle,
+			EXT4_I(inode)->jinode, start_byte, length);
 }
 
 static inline void ext4_update_inode_fsync_trans(handle_t *handle,
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 0b7064f8dfa5..35c89bee452c 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -638,11 +638,24 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
 static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
 {
 	blk_opf_t write_flags = REQ_SYNC;
-	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct buffer_head *bh = sbi->s_fc_bh;
+	int old = 1, new = 0;
+
+	if (!is_tail) {
+		/*
+		 * This commit has at least 1 non-tail block,
+		 * thus FLUSH is required.
+		 */
+		ext4_fc_mark_needs_flush(sb);
+	} else {
+		/* Use cmpxchg to ensure that no flush requrest is lost. */
+		if (cmpxchg(&sbi->fc_flush_required, old, new))
+			/* Old value was 1, so request a flush. */
+			write_flags |= REQ_PREFLUSH;
+		write_flags |= REQ_FUA;
+	}
 
-	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
-	if (test_opt(sb, BARRIER) && is_tail)
-		write_flags |= REQ_FUA | REQ_PREFLUSH;
 	lock_buffer(bh);
 	set_buffer_dirty(bh);
 	set_buffer_uptodate(bh);
@@ -1090,7 +1103,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
 	 * If file system device is different from journal device, issue a cache
 	 * flush before we start writing fast commit blocks.
 	 */
-	if (journal->j_fs_dev != journal->j_dev)
+	if (sbi->fc_flush_required && journal->j_fs_dev != journal->j_dev)
 		blkdev_issue_flush(journal->j_fs_dev);
 
 	blk_start_plug(&plug);
-- 
2.45.1.288.g0e0cd299f1-goog





[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux