On 5/31/2024 3:46 PM, Daejun Park wrote:
The amount of node writes is small compared to the amount of user data
writes in most workloads. Therefore, even if there is enough free space
in the node section, it cannot be used by another type because the type
for its section is fixed. When using zoned storage, the free space in
node section issue can be a problem due to the large section.
This patch can avoid the problem by using a single node section without
considering the hotness of the node section. For particularly high file
system usage, two sections can be used as free sections, which makes it
more efficient.
To use single node section, add the 'single_node_sec' in mount option.
Signed-off-by: Daejun Park <daejun7.park@xxxxxxxxxxx>
---
Documentation/filesystems/f2fs.rst | 2 +
fs/f2fs/f2fs.h | 3 ++
fs/f2fs/recovery.c | 3 ++
fs/f2fs/segment.c | 77 ++++++++++++++++++++++++++++++
fs/f2fs/segment.h | 2 +
fs/f2fs/super.c | 12 +++++
6 files changed, 99 insertions(+)
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 68a0885fb5e6..ba26b2ce4fa4 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -134,6 +134,8 @@ noacl Disable POSIX Access Control List. Note: acl is enabled
active_logs=%u Support configuring the number of active logs. In the
current design, f2fs supports only 2, 4, and 6 logs.
Default number is 6.
+single_node_sec Support single node section mode, it enables single active
+ log for hot/warm/cold nodes. This is disabled by default.
disable_ext_identify Disable the extension list configured by mkfs, so f2fs
is not aware of cold files such as media files.
inline_xattr Enable the inline xattrs feature.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1974b6aff397..90f13a6b64ce 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -116,6 +116,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define F2FS_MOUNT_GC_MERGE 0x02000000
#define F2FS_MOUNT_COMPRESS_CACHE 0x04000000
#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x08000000
+#define F2FS_MOUNT_SINGLE_NODE_SEC 0x10000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -1655,6 +1656,8 @@ struct f2fs_sb_info {
struct f2fs_mount_info mount_opt; /* mount options */
+ bool single_node_sec; /* single node section */
+
/* for cleaning operations */
struct f2fs_rwsem gc_lock; /*
* semaphore for GC, avoid
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 496aee53c38a..b5cdb0845ac7 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -414,6 +414,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+ /* check hot node if single node section mode is enabled */
+ if (sbi->single_node_sec && curseg->segno == NULL_SEGNO)
+ curseg = CURSEG_I(sbi, CURSEG_HOT_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
blkaddr_fast = blkaddr;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a0ce3d080f80..c1fe5c92bdfb 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -394,6 +394,9 @@ int f2fs_commit_atomic_write(struct inode *inode)
return err;
}
+static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec);
+static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno);
+
/*
* This function balances dirty node and dentry pages.
* In addition, it controls garbage collection.
@@ -420,6 +423,58 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
if (has_enough_free_secs(sbi, 0, 0))
return;
+ if (test_opt(sbi, SINGLE_NODE_SEC) && !sbi->single_node_sec) {
+ int type, segno, left_blocks = 0;
+
+ for (type = CURSEG_HOT_NODE; type <= CURSEG_COLD_NODE; type++) {
+ segno = CURSEG_I(sbi, type)->segno;
+ left_blocks += CAP_BLKS_PER_SEC(sbi) -
+ get_ckpt_valid_blocks(sbi, segno, true);
+ }
+
+ /* enable single node section mode if we get 2 free sections */
+ if (left_blocks < CAP_BLKS_PER_SEC(sbi) * 2)
+ goto do_gc;
+
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ /* it can be enabled by others */
+ if (sbi->single_node_sec)
+ goto unlock;
+
+ /* leave current zone by allocating new section */
+ for (type = CURSEG_WARM_NODE; type <= CURSEG_COLD_NODE; type++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ segno = curseg->segno;
+ if (new_curseg(sbi, type, true)) {
+ mutex_unlock(&curseg->curseg_mutex);
+ goto unlock;
+ }
+ locate_dirty_segment(sbi, segno);
+ mutex_unlock(&curseg->curseg_mutex);
+ }
Hi Daejun,
1. It is not compatible with "F2FS_OPTION(sbi).active_logs == 2".
2. Once has_enough_free_secs is false, F2FS cannot restore to multi-node
sections even after has_enough_free_secs becomes true and the filesystem
is unmounted and remounted. This seems unreasonable.
+
+ /* clear warm node, cold node information */
+ for (type = CURSEG_WARM_NODE; type <= CURSEG_COLD_NODE; type++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ segno = curseg->segno;
+ curseg->segno = NULL_SEGNO;
+ curseg->inited = false;
+ __set_test_and_free(sbi, segno, false);
+ mutex_unlock(&curseg->curseg_mutex);
+ }
+ f2fs_notice(sbi, "single node section mode enabled");
+ sbi->single_node_sec = true;
+unlock:
+ up_write(&SIT_I(sbi)->sentry_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ }
+do_gc:
if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
sbi->gc_thread->f2fs_gc_task) {
DEFINE_WAIT(wait);
@@ -3502,6 +3557,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
inode->i_write_hint);
} else {
+ if (fio->sbi->single_node_sec)
+ return CURSEG_HOT_NODE;
+
if (IS_DNODE(fio->page))
return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
CURSEG_HOT_NODE;
@@ -4116,6 +4174,15 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
CURSEG_HOT_NODE]);
blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
CURSEG_HOT_NODE]);
+ if (segno == NULL_SEGNO && type != CURSEG_HOT_NODE) {
+ if (!test_opt(sbi, SINGLE_NODE_SEC)) {
+ f2fs_err(sbi, "single_node_sec option required");
+ return -EFAULT;
+ }
+ sbi->single_node_sec = true;
+ return 0;
+ }
+
if (__exist_node_summaries(sbi))
blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
type - CURSEG_HOT_NODE);
@@ -4884,6 +4951,8 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
struct curseg_info *curseg_t = CURSEG_I(sbi, type);
__set_test_and_inuse(sbi, curseg_t->segno);
+ if (sbi->single_node_sec && type == CURSEG_HOT_NODE)
+ break;
}
}
@@ -5027,6 +5096,10 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
return -EFSCORRUPTED;
}
+
+ /* in single node section mode, WARM/COLD NODE are invalid */
+ if (sbi->single_node_sec && i == CURSEG_HOT_NODE)
+ break;
}
return 0;
}
@@ -5153,6 +5226,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
if (!zbd)
return 0;
+ /* in single node section mode, WARM/COLD node are not valid */
+ if (sbi->single_node_sec && type > CURSEG_HOT_NODE)
+ return 0;
+
/* report zone for the sector the curseg points to */
zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
<< log_sectors_per_block;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e1c0f418aa11..152a07e61b5f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -570,6 +570,8 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
get_ckpt_valid_blocks(sbi, segno, true);
if (node_blocks > left_blocks)
return false;
+ if (sbi->single_node_sec) /* check only hot node */
+ break;
}
/* check current data section for dentry blocks. */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1f1b3647a998..c21eeca86b0a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -129,6 +129,7 @@ enum {
Opt_acl,
Opt_noacl,
Opt_active_logs,
+ Opt_single_node_sec,
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_noinline_xattr,
@@ -207,6 +208,7 @@ static match_table_t f2fs_tokens = {
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_active_logs, "active_logs=%u"},
+ {Opt_single_node_sec, "single_node_sec"},
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_noinline_xattr, "noinline_xattr"},
@@ -803,6 +805,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
F2FS_OPTION(sbi).active_logs = arg;
break;
+ case Opt_single_node_sec:
+ set_opt(sbi, SINGLE_NODE_SEC);
+ break;
case Opt_disable_ext_identify:
set_opt(sbi, DISABLE_EXT_IDENTIFY);
break;
@@ -2039,6 +2044,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
F2FS_OPTION(sbi).s_resuid),
from_kgid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resgid));
+ if (test_opt(sbi, SINGLE_NODE_SEC))
+ seq_puts(seq, ",single_node_sec");
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
@@ -3675,6 +3682,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
blocks_per_seg = BLKS_PER_SEG(sbi);
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+ /* bypass single node section mode */
+ if (le32_to_cpu(ckpt->cur_node_segno[i] == NULL_SEGNO))
+ goto check_data;
if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
return 1;
@@ -3823,6 +3833,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
init_f2fs_rwsem(&sbi->io_order_lock);
spin_lock_init(&sbi->cp_lock);
+ sbi->single_node_sec = false;
+
sbi->dirty_device = 0;
spin_lock_init(&sbi->dev_lock);