The amount of available memory to a memcg wb_domain can change as memcg configuration changes. A domain's ->dirty_limit exists to smooth out sudden drops in dirty threshold; however, when a domain's size actually drops significantly, it hinders the dirty throttling from adjusting to the new configuration leading to unexpected behaviors including unnecessary OOM kills. This patch resolves the issue by adding wb_domain_size_changed() which resets ->dirty_limit[_tstmp] and making memcg call it on configuration changes. Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> Cc: Jens Axboe <axboe@xxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: Wu Fengguang <fengguang.wu@xxxxxxxxx> Cc: Greg Thelen <gthelen@xxxxxxxxxx> --- include/linux/writeback.h | 20 ++++++++++++++++++++ mm/memcontrol.c | 12 ++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 04a3786..3b73e97 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -132,6 +132,26 @@ struct wb_domain { unsigned long dirty_limit; }; +/** + * wb_domain_size_changed - memory available to a wb_domain has changed + * @dom: wb_domain of interest + * + * This function should be called when the amount of memory available to + * @dom has changed. It resets @dom's dirty limit parameters to prevent + * the past values which don't match the current configuration from skewing + * dirty throttling. Without this, when memory size of a wb_domain is + * greatly reduced, the dirty throttling logic may allow too many pages to + * be dirtied leading to consecutive unnecessary OOMs and may get stuck in + * that situation. + */ +static inline void wb_domain_size_changed(struct wb_domain *dom) +{ + spin_lock(&dom->lock); + dom->dirty_limit_tstamp = jiffies; + dom->dirty_limit = 0; + spin_unlock(&dom->lock); +} + /* * fs/fs-writeback.c */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 436fbc2..8fbd501 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3986,6 +3986,11 @@ static void memcg_wb_domain_exit(struct mem_cgroup *memcg) wb_domain_exit(&memcg->cgwb_domain); } +static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) +{ + wb_domain_size_changed(&memcg->cgwb_domain); +} + struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) { struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); @@ -4007,6 +4012,10 @@ static void memcg_wb_domain_exit(struct mem_cgroup *memcg) { } +static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ /* @@ -4605,6 +4614,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) memcg->low = 0; memcg->high = PAGE_COUNTER_MAX; memcg->soft_limit = PAGE_COUNTER_MAX; + memcg_wb_domain_size_changed(memcg); } #ifdef CONFIG_MMU @@ -5342,6 +5352,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, memcg->high = high; + memcg_wb_domain_size_changed(memcg); return nbytes; } @@ -5374,6 +5385,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (err) return err; + memcg_wb_domain_size_changed(memcg); return nbytes; } -- 2.4.0 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html