This patch introduces infrastructure for parallel writeback. - Writeback context list and index: wb_ctx_list: An array that represents the NR_WB_CTX writeback contexts. wb_idx: An index in wb_ctx_list used to manage the assignment of writeback contexts to file-systems. - Inode lists: Each writeback context has its own separate inode lists corresponding to b_*. b_dirty -> pctx_b_dirty b_io -> pctx_b_io b_dirty_time -> pctx_b_dirty_time b_more_io -> pctx_b_more_io - Per-writeback context work: pctx_dwork to handle multiple worker threads for per-writeback context operations concurrently. - Helper functions: A set of helper functions, ctx_b_*_list(), are introduced to retrieve the list associated with a specific writeback context. Signed-off-by: Kundan Kumar <kundan.kumar@xxxxxxxxxxx> Signed-off-by: Anuj Gupta <anuj20.g@xxxxxxxxxxx> --- include/linux/backing-dev-defs.h | 61 ++++++++++++++++++++++++++++++++ mm/backing-dev.c | 21 ++++++++++- 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 2ad261082bba..df627783e879 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -37,6 +37,7 @@ enum wb_stat_item { }; #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) +#define NR_WB_CTX 8 /* * why some writeback work was initiated @@ -80,6 +81,31 @@ struct wb_completion { #define DEFINE_WB_COMPLETION(cmpl, bdi) \ struct wb_completion cmpl = WB_COMPLETION_INIT(bdi) +struct wb_ctx { + struct delayed_work pctx_dwork; + struct list_head pctx_b_dirty; + struct list_head pctx_b_io; + struct list_head pctx_b_more_io; + struct list_head pctx_b_dirty_time; + struct bdi_writeback *b_wb; + unsigned long last_old_flush; /* last old data flush */ + unsigned long state; + unsigned long bw_time_stamp; /* last time write bw is updated */ + unsigned long dirtied_stamp; + unsigned long written_stamp; /* pages written at bw_time_stamp */ + unsigned long write_bandwidth; /* the estimated write bandwidth */ + unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */ + + /* + * The base dirty throttle rate, re-calculated on every 200ms. + * All the bdi tasks' dirty rate will be curbed under it. + * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit + * in small steps and is much more smooth/stable than the latter. + */ + unsigned long dirty_ratelimit; + unsigned long balanced_dirty_ratelimit; +}; + /* * Each wb (bdi_writeback) can perform writeback operations, is measured * and throttled, independently. Without cgroup writeback, each bdi @@ -143,6 +169,8 @@ struct bdi_writeback { struct list_head bdi_node; /* anchored at bdi->wb_list */ + int wb_idx; + struct wb_ctx wb_ctx_list[NR_WB_CTX]; #ifdef CONFIG_CGROUP_WRITEBACK struct percpu_ref refcnt; /* used only for !root wb's */ struct fprop_local_percpu memcg_completions; @@ -208,6 +236,39 @@ struct wb_lock_cookie { unsigned long flags; }; +static struct wb_ctx *ctx_wb_struct(struct bdi_writeback *wb, int ctx_id) +{ + return &wb->wb_ctx_list[ctx_id]; +} + +static inline struct list_head *ctx_b_dirty_list(struct bdi_writeback *wb, int ctx_id) +{ + struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id); + + return &p_wb->pctx_b_dirty; +} + +static inline struct list_head *ctx_b_dirty_time_list(struct bdi_writeback *wb, int ctx_id) +{ + struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id); + + return &p_wb->pctx_b_dirty_time; +} + +static inline struct list_head *ctx_b_io_list(struct bdi_writeback *wb, int ctx_id) +{ + struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id); + + return &p_wb->pctx_b_io; +} + +static inline struct list_head *ctx_b_more_io_list(struct bdi_writeback *wb, int ctx_id) +{ + struct wb_ctx *p_wb = ctx_wb_struct(wb, ctx_id); + + return &p_wb->pctx_b_more_io; +} + #ifdef CONFIG_CGROUP_WRITEBACK /** diff --git a/mm/backing-dev.c b/mm/backing-dev.c index e61bbb1bd622..fc072e9fe42c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -515,7 +515,8 @@ static void wb_update_bandwidth_workfn(struct work_struct *work) static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, gfp_t gfp) { - int err; + int i, err; + struct wb_ctx *p_wb_ctx; memset(wb, 0, sizeof(*wb)); @@ -533,12 +534,30 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, wb->dirty_ratelimit = INIT_BW; wb->write_bandwidth = INIT_BW; wb->avg_write_bandwidth = INIT_BW; + wb->wb_idx = 0; spin_lock_init(&wb->work_lock); INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn); + for (i = 0; i < NR_WB_CTX; i++) { + p_wb_ctx = &wb->wb_ctx_list[i]; + p_wb_ctx->b_wb = wb; + p_wb_ctx->last_old_flush = jiffies; + p_wb_ctx->bw_time_stamp = jiffies; + p_wb_ctx->balanced_dirty_ratelimit = INIT_BW; + p_wb_ctx->dirty_ratelimit = INIT_BW; + p_wb_ctx->write_bandwidth = INIT_BW; + p_wb_ctx->avg_write_bandwidth = INIT_BW; + + INIT_LIST_HEAD(ctx_b_dirty_list(wb, i)); + INIT_LIST_HEAD(ctx_b_dirty_time_list(wb, i)); + INIT_LIST_HEAD(ctx_b_io_list(wb, i)); + INIT_LIST_HEAD(ctx_b_more_io_list(wb, i)); + + INIT_DELAYED_WORK(&p_wb_ctx->pctx_dwork, wb_workfn); + } err = fprop_local_init_percpu(&wb->completions, gfp); if (err) return err; -- 2.25.1