Since now jbd2 supports doing transactions checkpoint asynchronously
and initiatively when free journal space is lower than user specified
threshold, here add a new mount option "async_checkpoint" for users
to enable or disable this jbd2 feature.
Usage:
# with default threshold 50%
sudo mount -o async_checkpoint /dev/nvme0n1 mntpoint
# user specifies a threshold 30%
sudo mount -o async_checkpoint=30 /dev/nvme0n1 mntpoint
# do a remount to enable this feature with default threshold 50%
sudo mount -o remount,async_checkpoint /dev/nvme0n1
# do a remount to enable this feature with threshold 30%
sudo mount -o remount,async_checkpoint=30 /dev/nvme0n1
# disable this feature
sudo mount -o remount,noasync_checkpoint /dev/nvme0n1
I have used fs_mark to have performance tests:
fs_mark -d mntpoint/testdir/ -D 16 -t 32 -n 500000 -s 4096 -S $sync_mode -N 256 -k
here sync_mode would be 0, 1, 2, 3, 4, 5 and 6, and transactions commit info comes
from /proc/fs/jbd2/nvme0n1-8/info.
Please also refer to fs_mark's README for what sync_mode means.
Test 1: sync_mod = 0
without patch:
Average Files/sec: 96898.0
177 transactions (177 requested), each up to 65536 block
with patch:
Average Files/sec: 97727.0
177 transactions (177 requested), each up to 65536 blocks
About 0.8% improvement, not obvious.
Test 2: sync_mod = 1
without patch:
Average Files/sec: 46780.0
1210422 transactions (1210422 requested), each up to 65536 blocks
with patch:
Average Files/sec: 49510.0
1053905 transactions (1053905 requested), each up to 65536 blocks
About 5.8% improvement, and the number of transactions are decreased.
Test 3: sync_mod = 2
without patch:
Average Files/sec: 71072.0
190 transactions (190 requested), each up to 65536 blocks
with patch:
Average Files/sec: 72464.0
189 transactions (189 requested), each up to 65536 blocks
About 1.9% improvement.
Test 4: sync_mod = 3
without patch:
Average Files/sec: 61977.0
282973 transactions (282973 requested), each up to 65536 blocks
with patch:
Average Files/sec: 70962.0
88148 transactions (88148 requested), each up to 65536 blocks
About 14.4% improvement, it's much obvious, and the number of
transactions are decreased greatly.
Test 5: sync_mod = 4
without patch:
Average Files/sec: 69796.0
190 transactions (190 requested), each up to 65536 blocks
with patch:
Average Files/sec: 70708.0
189 transactions (189 requested), each up to 65536 blocks
About 1.3% improvement, not obvious.
Test 6: sync_mod = 5
without patch:
Average Files/sec: 61523.0
411394 transactions (411394 requested), each up to 65536 blocks
with patch:
Average Files/sec: 66785.0
280367 transactions (280367 requested), each up to 65536 blocks
About 8.5% improvement, it's obvious, and the number of
transactions are decreased greatly.
Test 7: sync_mod = 6
without patch:
Average Files/sec: 70129.0
189 transactions (189 requested), each up to 65536 blocks
with patch:
Average Files/sec: 69194.0
190 transactions (190 requested), each up to 65536 blocks
About 1.3% performance regression, it's not obvious.
From above tests, we can see that in most cases, async checkpoint
will give some performance improvement.
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@xxxxxxxxxxxxxxxxx>
---
fs/ext4/ext4.h | 2 ++
fs/ext4/super.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 73 insertions(+)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1cb6785..f53a64d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1123,6 +1123,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */
+#define EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT 0x4000000 /* Journal Async Checkpoint */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
@@ -1411,6 +1412,7 @@ struct ext4_sb_info {
struct mutex s_orphan_lock;
unsigned long s_ext4_flags; /* Ext4 superblock flags */
unsigned long s_commit_interval;
+ unsigned int s_async_checkponit_thresh;
u32 s_max_batch_time;
u32 s_min_batch_time;
struct block_device *journal_bdev;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4079605..ae21338 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -54,6 +54,7 @@
#include "acl.h"
#include "mballoc.h"
#include "fsmap.h"
+#include <linux/jbd2.h>
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
@@ -1455,6 +1456,7 @@ enum {
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+ Opt_async_checkpoint, Opt_noasync_checkpoint,
};
static const match_table_t tokens = {
@@ -1546,6 +1548,9 @@ enum {
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
{Opt_removed, "noreservation"}, /* mount option from ext2/3 */
{Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
+ {Opt_async_checkpoint, "async_checkpoint=%u"},
+ {Opt_async_checkpoint, "async_checkpoint"},
+ {Opt_noasync_checkpoint, "noasync_checkpoint"},
{Opt_err, NULL},
};
@@ -1751,6 +1756,9 @@ static int clear_qf_name(struct super_block *sb, int qtype)
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_GTE0},
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+ {Opt_async_checkpoint, 0, MOPT_GTE0},
+ {Opt_noasync_checkpoint, EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT,
+ MOPT_CLEAR},
{Opt_err, 0, 0}
};
@@ -2016,6 +2024,11 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
sbi->s_mount_opt |= m->mount_opt;
} else if (token == Opt_data_err_ignore) {
sbi->s_mount_opt &= ~m->mount_opt;
+ } else if (token == Opt_async_checkpoint) {
+ set_opt(sb, JOURNAL_ASYNC_CHECKPOINT);
+ if (!args->from)
+ arg = JBD2_DEFAULT_ASYCN_CHECKPOINT_THRESH;
+ sbi->s_async_checkponit_thresh = arg;
} else {
if (!args->from)
arg = 1;
@@ -2234,6 +2247,11 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PUTS("data_err=abort");
if (DUMMY_ENCRYPTION_ENABLED(sbi))
SEQ_OPTS_PUTS("test_dummy_encryption");
+ if (test_opt(sb, JOURNAL_ASYNC_CHECKPOINT) && (nodefs ||
+ (sbi->s_async_checkponit_thresh !=
+ JBD2_DEFAULT_ASYCN_CHECKPOINT_THRESH)))
+ SEQ_OPTS_PRINT("async_checkpoint=%u",
+ sbi->s_async_checkponit_thresh);
ext4_show_quota_options(seq, sb);
return 0;
@@ -4700,6 +4718,38 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
write_unlock(&journal->j_state_lock);
}
+static int ext4_init_journal_async_checkpoint(struct super_block *sb,
+ journal_t *journal)
+{
+ struct workqueue_struct *wq;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ wq = alloc_workqueue("jbd2-checkpoint-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+ if (!wq) {
+ pr_err("%s: failed to create workqueue\n", __func__);
+ return -ENOMEM;
+ }
+ INIT_WORK(&journal->j_checkpoint_work, jbd2_log_do_checkpoint_async);
+
+ write_lock(&journal->j_state_lock);
+ journal->j_flags |= JBD2_ASYNC_CHECKPOINT;
+ journal->j_checkpoint_wq = wq;
+ journal->j_async_checkpoint_thresh =
+ sbi->s_async_checkponit_thresh;
+ journal->j_async_checkpoint_run = 0;
+ write_unlock(&journal->j_state_lock);
+ return 0;
+}
+
+static void ext4_destroy_journal_async_checkpoint(journal_t *journal)
+{
+ write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_ASYNC_CHECKPOINT;
+ write_unlock(&journal->j_state_lock);
+ jbd2_journal_destroy_async_checkpoint_wq(journal);
+}
+
static struct inode *ext4_get_journal_inode(struct super_block *sb,
unsigned int journal_inum)
{
@@ -4737,6 +4787,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
{
struct inode *journal_inode;
journal_t *journal;
+ int ret;
BUG_ON(!ext4_has_feature_journal(sb));
@@ -4752,6 +4803,11 @@ static journal_t *ext4_get_journal(struct super_block *sb,
}
journal->j_private = sb;
ext4_init_journal_params(sb, journal);
+ ret = ext4_init_journal_async_checkpoint(sb, journal);
+ if (ret) {
+ jbd2_journal_destroy(journal);
+ return NULL;
+ }
return journal;
}
@@ -4767,6 +4823,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
unsigned long offset;
struct ext4_super_block *es;
struct block_device *bdev;
+ int ret;
BUG_ON(!ext4_has_feature_journal(sb));
@@ -4841,6 +4898,10 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
}
EXT4_SB(sb)->journal_bdev = bdev;
ext4_init_journal_params(sb, journal);
+ ret = ext4_init_journal_async_checkpoint(sb, journal);
+ if (ret)
+ goto out_journal;
+
return journal;
out_journal:
@@ -5471,6 +5532,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
#endif
+ if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT) &&
+ !test_opt(sb, JOURNAL_ASYNC_CHECKPOINT))
+ ext4_destroy_journal_async_checkpoint(sbi->s_journal);
+ else if (!(old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_ASYNC_CHECKPOINT) &&
+ test_opt(sb, JOURNAL_ASYNC_CHECKPOINT)) {
+ err = ext4_init_journal_async_checkpoint(sb, sbi->s_journal);
+ if (err)
+ goto restore_opts;
+ }
+
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);