[PATCH 3/3] writeback: specify writeback period and expire interval per memcg

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



dirty_writeback_interval: dirty wakeup period
dirty_expire_interval: expire period

This patch provides per memcg setttings for writeback interval.

Dirty writeback could be triggered in the below ways:
  - mark_inode_dirty: when the first time of dirtying pages for this inode,
		it tries to wakeup the callback hook wb_workfn in
		wakeup period later.
  - wb_workfn: if there're more writeback works to do, it would wakeup the
		callback hook wb_workfn in another wakeup period later.
  - external event: kswad found dirty pages piled up at the end of inactive
		list or desktop mode timer.
  - buffered write context: balance_dirty_pages tries to wake up background
		writeback once dirty pages above freerun level of pages.
  - sync context: sync(fs sync) writeback immediately

No matter how writeback is triggered, wb_workfn is the unique callback hook
to manipulate the flushing things. Actually, wb_check_old_data_flush
handles the period writeback and decides the scope of dirty pages which
have to be written back because they were too old.

Signed-off-by: Xie Yongmei <yongmeixie@xxxxxxxxxxx>
---
 fs/fs-writeback.c          |  11 ++--
 include/linux/memcontrol.h |  16 ++++++
 mm/backing-dev.c           |   4 +-
 mm/memcontrol.c            | 114 +++++++++++++++++++++++++++++++++++++
 4 files changed, 140 insertions(+), 5 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 591fe9cf1659..f59e4709ec39 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1980,6 +1980,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 	struct inode *inode;
 	long progress;
 	struct blk_plug plug;
+	unsigned int dirty_expire = wb_dirty_expire_interval(wb);
 
 	blk_start_plug(&plug);
 	spin_lock(&wb->list_lock);
@@ -2015,7 +2016,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 */
 		if (work->for_kupdate) {
 			dirtied_before = jiffies -
-				msecs_to_jiffies(dirty_expire_interval * 10);
+				msecs_to_jiffies(dirty_expire * 10);
 		} else if (work->for_background)
 			dirtied_before = jiffies;
 
@@ -2101,15 +2102,16 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 {
 	unsigned long expired;
 	long nr_pages;
+	unsigned int writeback_interval = wb_dirty_writeback_interval(wb);
 
 	/*
 	 * When set to zero, disable periodic writeback
 	 */
-	if (!dirty_writeback_interval)
+	if (!writeback_interval)
 		return 0;
 
 	expired = wb->last_old_flush +
-			msecs_to_jiffies(dirty_writeback_interval * 10);
+			msecs_to_jiffies(writeback_interval * 10);
 	if (time_before(jiffies, expired))
 		return 0;
 
@@ -2194,6 +2196,7 @@ void wb_workfn(struct work_struct *work)
 	struct bdi_writeback *wb = container_of(to_delayed_work(work),
 						struct bdi_writeback, dwork);
 	long pages_written;
+	unsigned int writeback_interval = wb_dirty_writeback_interval(wb);
 
 	set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
 
@@ -2222,7 +2225,7 @@ void wb_workfn(struct work_struct *work)
 
 	if (!list_empty(&wb->work_list))
 		wb_wakeup(wb);
-	else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
+	else if (wb_has_dirty_io(wb) && writeback_interval)
 		wb_wakeup_delayed(wb);
 }
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 386fc9b70c95..c1dc88bb8f80 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -347,6 +347,8 @@ struct mem_cgroup {
 #ifdef CONFIG_CGROUP_WRITEBACK_PARA
 	int dirty_background_ratio;
 	int dirty_ratio;
+	int dirty_writeback_interval;
+	int dirty_expire_interval;
 #endif
 
 	struct mem_cgroup_per_node *nodeinfo[];
@@ -1642,6 +1644,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 #ifdef CONFIG_CGROUP_WRITEBACK_PARA
 unsigned int wb_dirty_background_ratio(struct bdi_writeback *wb);
 unsigned int wb_dirty_ratio(struct bdi_writeback *wb);
+unsigned int wb_dirty_writeback_interval(struct bdi_writeback *wb);
+unsigned int wb_dirty_expire_interval(struct bdi_writeback *wb);
 #else
 static inline
 unsigned int wb_dirty_background_ratio(struct bdi_writeback *wb)
@@ -1654,6 +1658,18 @@ unsigned int wb_dirty_ratio(struct bdi_writeback *wb)
 {
 	return vm_dirty_ratio;
 }
+
+static inline
+unsigned int wb_dirty_writeback_interval(struct bdi_writeback *wb)
+{
+	return dirty_writeback_interval;
+}
+
+static inline
+unsigned int wb_dirty_expire_interval(struct bdi_writeback *wb)
+{
+	return dirty_expire_interval;
+}
 #endif
 
 struct sock;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7176af65b103..685558362ad8 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -15,6 +15,7 @@
 #include <linux/writeback.h>
 #include <linux/device.h>
 #include <trace/events/writeback.h>
+#include <linux/memcontrol.h>
 
 struct backing_dev_info noop_backing_dev_info;
 EXPORT_SYMBOL_GPL(noop_backing_dev_info);
@@ -264,8 +265,9 @@ subsys_initcall(default_bdi_init);
 void wb_wakeup_delayed(struct bdi_writeback *wb)
 {
 	unsigned long timeout;
+	unsigned int dirty_interval = wb_dirty_writeback_interval(wb);
 
-	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+	timeout = msecs_to_jiffies(dirty_interval * 10);
 	spin_lock_bh(&wb->work_lock);
 	if (test_bit(WB_registered, &wb->state))
 		queue_delayed_work(bdi_wq, &wb->dwork, timeout);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b1c1b150637a..c392aec22e2e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4851,17 +4851,49 @@ unsigned int wb_dirty_ratio(struct bdi_writeback *wb)
 	return memcg->dirty_ratio;
 }
 
+unsigned int wb_dirty_writeback_interval(struct bdi_writeback *wb)
+{
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled() || !wb)
+		return dirty_writeback_interval;
+
+	memcg = mem_cgroup_from_css(wb->memcg_css);
+	if (memcg == root_mem_cgroup || memcg->dirty_writeback_interval < 0)
+		return dirty_writeback_interval;
+
+	return memcg->dirty_writeback_interval;
+}
+
+unsigned int wb_dirty_expire_interval(struct bdi_writeback *wb)
+{
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled() || !wb)
+		return dirty_expire_interval;
+
+	memcg = mem_cgroup_from_css(wb->memcg_css);
+	if (memcg == root_mem_cgroup || memcg->dirty_expire_interval < 0)
+		return dirty_expire_interval;
+
+	return memcg->dirty_expire_interval;
+}
+
 static void wb_memcg_inherit_from_parent(struct mem_cgroup *parent,
 					 struct mem_cgroup *memcg)
 {
 	memcg->dirty_background_ratio = parent->dirty_background_ratio;
 	memcg->dirty_ratio = parent->dirty_ratio;
+	memcg->dirty_writeback_interval = parent->dirty_writeback_interval;
+	memcg->dirty_expire_interval = parent->dirty_expire_interval;
 }
 
 static void wb_memcg_init(struct mem_cgroup *memcg)
 {
 	memcg->dirty_background_ratio = -1;
 	memcg->dirty_ratio = -1;
+	memcg->dirty_writeback_interval = -1;
+	memcg->dirty_expire_interval = -1;
 }
 
 static int mem_cgroup_dirty_background_ratio_show(struct seq_file *m, void *v)
@@ -4918,6 +4950,64 @@ mem_cgroup_dirty_ratio_write(struct kernfs_open_file *of,
 	memcg->dirty_ratio = dirty_ratio;
 	return nbytes;
 }
+
+static int mem_cgroup_dirty_writeback_interval_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%d\n", memcg->dirty_writeback_interval);
+	return 0;
+}
+
+static ssize_t
+mem_cgroup_dirty_writeback_interval_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes,
+					  loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, writeback_interval;
+
+	buf = strstrip(buf);
+	ret = kstrtoint(buf, 0, &writeback_interval);
+	if (ret)
+		return ret;
+
+	if (writeback_interval < -1)
+		return -EINVAL;
+
+	if (memcg->dirty_writeback_interval != writeback_interval) {
+		memcg->dirty_writeback_interval = writeback_interval;
+		wakeup_flusher_threads(WB_REASON_PERIODIC);
+	}
+	return nbytes;
+}
+
+static int mem_cgroup_dirty_expire_interval_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%d\n", memcg->dirty_expire_interval);
+	return 0;
+}
+
+static ssize_t
+mem_cgroup_dirty_expire_interval_write(struct kernfs_open_file *of,
+				       char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, expire_interval;
+
+	buf = strstrip(buf);
+	ret = kstrtoint(buf, 0, &expire_interval);
+	if (ret)
+		return ret;
+
+	if (expire_interval < -1)
+		return -EINVAL;
+
+	memcg->dirty_expire_interval = expire_interval;
+	return nbytes;
+}
 #else
 static void wb_memcg_inherit_from_parent(struct mem_cgroup *parent,
 					 struct mem_cgroup *memcg)
@@ -5067,6 +5157,18 @@ static struct cftype mem_cgroup_legacy_files[] = {
 		.seq_show = mem_cgroup_dirty_ratio_show,
 		.write = mem_cgroup_dirty_ratio_write,
 	},
+	{
+		.name = "dirty_writeback_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_writeback_interval_show,
+		.write = mem_cgroup_dirty_writeback_interval_write,
+	},
+	{
+		.name = "dirty_expire_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_expire_interval_show,
+		.write = mem_cgroup_dirty_expire_interval_write,
+	},
 #endif
 	{ },	/* terminate */
 };
@@ -6549,6 +6651,18 @@ static struct cftype memory_files[] = {
 		.seq_show = mem_cgroup_dirty_ratio_show,
 		.write = mem_cgroup_dirty_ratio_write,
 	},
+	{
+		.name = "dirty_writeback_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_writeback_interval_show,
+		.write = mem_cgroup_dirty_writeback_interval_write,
+	},
+	{
+		.name = "dirty_expire_interval_centisecs",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = mem_cgroup_dirty_expire_interval_show,
+		.write = mem_cgroup_dirty_expire_interval_write,
+	},
 #endif
 	{ }	/* terminate */
 };
-- 
2.27.0




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux