Re: [RFC 2/2] ext4: add async_checkpoint mount option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi,

Any ideas about this patchset?
From our test result, it also has some performance improvement.

Regards,
Xiaoguang Wang
Since now jbd2 supports doing transactions checkpoint asynchronously
and initiatively when free journal space is lower than user specified
threshold, here add a new mount option "async_checkpoint" for users
to enable or disable this jbd2 feature.

Usage:
     # with default threshold 50%
     sudo mount -o async_checkpoint /dev/nvme0n1 mntpoint

     # user specifies a threshold 30%
     sudo mount -o async_checkpoint=30 /dev/nvme0n1 mntpoint

     # do a remount to enable this feature with default threshold 50%
     sudo mount -o remount,async_checkpoint /dev/nvme0n1

     # do a remount to enable this feature with threshold 30%
     sudo mount -o remount,async_checkpoint=30 /dev/nvme0n1

     # disable this feature
     sudo mount -o remount,noasync_checkpoint /dev/nvme0n1

I have used fs_mark to have performance tests:
fs_mark -d mntpoint/testdir/ -D 16 -t 32 -n 500000 -s 4096 -S $sync_mode -N 256 -k
here sync_mode would be 0, 1, 2, 3, 4, 5 and 6, and transactions commit info comes
from /proc/fs/jbd2/nvme0n1-8/info.

Please also refer to fs_mark's README  for what sync_mode means.

Test 1: sync_mod = 0
   without patch:
     Average Files/sec:      96898.0
     177 transactions (177 requested), each up to 65536 block
   with patch:
     Average Files/sec:      97727.0
     177 transactions (177 requested), each up to 65536 blocks
About 0.8% improvement, not obvious.

Test 2: sync_mod = 1
   without patch:
     Average Files/sec:      46780.0
     1210422 transactions (1210422 requested), each up to 65536 blocks
   with patch:
     Average Files/sec:      49510.0
     1053905 transactions (1053905 requested), each up to 65536 blocks
About 5.8% improvement, and the number of transactions are decreased.

Test 3: sync_mod = 2
   without patch:
     Average Files/sec:      71072.0
     190 transactions (190 requested), each up to 65536 blocks
   with patch:
     Average Files/sec:      72464.0
     189 transactions (189 requested), each up to 65536 blocks
About 1.9% improvement.

Test 4: sync_mod = 3
   without patch:
     Average Files/sec:      61977.0
     282973 transactions (282973 requested), each up to 65536 blocks
   with patch:
     Average Files/sec:      70962.0
     88148 transactions (88148 requested), each up to 65536 blocks
About 14.4% improvement, it's much obvious, and the number of
transactions are decreased greatly.

Test 5: sync_mod = 4
   without patch:
     Average Files/sec:      69796.0
     190 transactions (190 requested), each up to 65536 blocks
   with patch:
     Average Files/sec:      70708.0
     189 transactions (189 requested), each up to 65536 blocks
About 1.3% improvement, not obvious.

Test 6: sync_mod = 5
   without patch:
     Average Files/sec:      61523.0
     411394 transactions (411394 requested), each up to 65536 blocks
   with patch:
     Average Files/sec:      66785.0
     280367 transactions (280367 requested), each up to 65536 blocks
About 8.5% improvement, it's obvious, and the number of
transactions are decreased greatly.

Test 7: sync_mod = 6
   without patch:
     Average Files/sec:      70129.0
     189 transactions (189 requested), each up to 65536 blocks
   with patch:
     Average Files/sec:      69194.0
     190 transactions (190 requested), each up to 65536 blocks
About 1.3% performance regression, it's not obvious.

 From above tests, we can see that in most cases, async checkpoint
will give some performance improvement.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@xxxxxxxxxxxxxxxxx>
---
  fs/ext4/ext4.h  |  2 ++
  fs/ext4/super.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 73 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1cb6785..f53a64d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1123,6 +1123,7 @@ struct ext4_inode_info {
  #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
  #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
+#define EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT	0x4000000 /* Journal Async Checkpoint */
  #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
  #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
  #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
@@ -1411,6 +1412,7 @@ struct ext4_sb_info {
  	struct mutex s_orphan_lock;
  	unsigned long s_ext4_flags;		/* Ext4 superblock flags */
  	unsigned long s_commit_interval;
+	unsigned int s_async_checkponit_thresh;
  	u32 s_max_batch_time;
  	u32 s_min_batch_time;
  	struct block_device *journal_bdev;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4079605..ae21338 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -54,6 +54,7 @@
  #include "acl.h"
  #include "mballoc.h"
  #include "fsmap.h"
+#include <linux/jbd2.h>
#define CREATE_TRACE_POINTS
  #include <trace/events/ext4.h>
@@ -1455,6 +1456,7 @@ enum {
  	Opt_dioread_nolock, Opt_dioread_lock,
  	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
  	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+	Opt_async_checkpoint, Opt_noasync_checkpoint,
  };
static const match_table_t tokens = {
@@ -1546,6 +1548,9 @@ enum {
  	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
  	{Opt_removed, "noreservation"}, /* mount option from ext2/3 */
  	{Opt_removed, "journal=%u"},	/* mount option from ext2/3 */
+	{Opt_async_checkpoint, "async_checkpoint=%u"},
+	{Opt_async_checkpoint, "async_checkpoint"},
+	{Opt_noasync_checkpoint, "noasync_checkpoint"},
  	{Opt_err, NULL},
  };
@@ -1751,6 +1756,9 @@ static int clear_qf_name(struct super_block *sb, int qtype)
  	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
  	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
  	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+	{Opt_async_checkpoint, 0, MOPT_GTE0},
+	{Opt_noasync_checkpoint, EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT,
+		MOPT_CLEAR},
  	{Opt_err, 0, 0}
  };
@@ -2016,6 +2024,11 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
  		sbi->s_mount_opt |= m->mount_opt;
  	} else if (token == Opt_data_err_ignore) {
  		sbi->s_mount_opt &= ~m->mount_opt;
+	} else if (token == Opt_async_checkpoint) {
+		set_opt(sb, JOURNAL_ASYNC_CHECKPOINT);
+		if (!args->from)
+			arg = JBD2_DEFAULT_ASYCN_CHECKPOINT_THRESH;
+		sbi->s_async_checkponit_thresh = arg;
  	} else {
  		if (!args->from)
  			arg = 1;
@@ -2234,6 +2247,11 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
  		SEQ_OPTS_PUTS("data_err=abort");
  	if (DUMMY_ENCRYPTION_ENABLED(sbi))
  		SEQ_OPTS_PUTS("test_dummy_encryption");
+	if (test_opt(sb, JOURNAL_ASYNC_CHECKPOINT) && (nodefs ||
+	    (sbi->s_async_checkponit_thresh !=
+	    JBD2_DEFAULT_ASYCN_CHECKPOINT_THRESH)))
+		SEQ_OPTS_PRINT("async_checkpoint=%u",
+			       sbi->s_async_checkponit_thresh);
ext4_show_quota_options(seq, sb);
  	return 0;
@@ -4700,6 +4718,38 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
  	write_unlock(&journal->j_state_lock);
  }
+static int ext4_init_journal_async_checkpoint(struct super_block *sb,
+			journal_t *journal)
+{
+	struct workqueue_struct *wq;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+	wq = alloc_workqueue("jbd2-checkpoint-wq",
+			     WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+	if (!wq) {
+		pr_err("%s: failed to create workqueue\n", __func__);
+		return -ENOMEM;
+	}
+	INIT_WORK(&journal->j_checkpoint_work, jbd2_log_do_checkpoint_async);
+
+	write_lock(&journal->j_state_lock);
+	journal->j_flags |= JBD2_ASYNC_CHECKPOINT;
+	journal->j_checkpoint_wq = wq;
+	journal->j_async_checkpoint_thresh =
+			sbi->s_async_checkponit_thresh;
+	journal->j_async_checkpoint_run = 0;
+	write_unlock(&journal->j_state_lock);
+	return 0;
+}
+
+static void ext4_destroy_journal_async_checkpoint(journal_t *journal)
+{
+	write_lock(&journal->j_state_lock);
+	journal->j_flags &= ~JBD2_ASYNC_CHECKPOINT;
+	write_unlock(&journal->j_state_lock);
+	jbd2_journal_destroy_async_checkpoint_wq(journal);
+}
+
  static struct inode *ext4_get_journal_inode(struct super_block *sb,
  					     unsigned int journal_inum)
  {
@@ -4737,6 +4787,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
  {
  	struct inode *journal_inode;
  	journal_t *journal;
+	int ret;
BUG_ON(!ext4_has_feature_journal(sb)); @@ -4752,6 +4803,11 @@ static journal_t *ext4_get_journal(struct super_block *sb,
  	}
  	journal->j_private = sb;
  	ext4_init_journal_params(sb, journal);
+	ret = ext4_init_journal_async_checkpoint(sb, journal);
+	if (ret) {
+		jbd2_journal_destroy(journal);
+		return NULL;
+	}
  	return journal;
  }
@@ -4767,6 +4823,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
  	unsigned long offset;
  	struct ext4_super_block *es;
  	struct block_device *bdev;
+	int ret;
BUG_ON(!ext4_has_feature_journal(sb)); @@ -4841,6 +4898,10 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
  	}
  	EXT4_SB(sb)->journal_bdev = bdev;
  	ext4_init_journal_params(sb, journal);
+	ret = ext4_init_journal_async_checkpoint(sb, journal);
+	if (ret)
+		goto out_journal;
+
  	return journal;
out_journal:
@@ -5471,6 +5532,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
  	}
  #endif
+ if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT) &&
+	    !test_opt(sb, JOURNAL_ASYNC_CHECKPOINT))
+		ext4_destroy_journal_async_checkpoint(sbi->s_journal);
+	else if (!(old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT) &&
+	    test_opt(sb, JOURNAL_ASYNC_CHECKPOINT)) {
+		err = ext4_init_journal_async_checkpoint(sb, sbi->s_journal);
+		if (err)
+			goto restore_opts;
+	}
+
  	*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
  	ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
  	kfree(orig_data);




[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux