each queue will have a state machine. Initially queue is in LOW_LIMIT state, which means all cgroups will be throttled according to their low limit. After all cgroups with low limit cross the limit, the queue state gets upgraded to high/max state. This will guarantee cgroups with low limit have at least low limit bandwidth/iops before other cgroups can use the disk. For cgroups without low limit, they are assigned a small bps/iops to avoid completed stall. Signed-off-by: Shaohua Li <shli@xxxxxx> --- block/blk-throttle.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e69a3db..bdcf1b7 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -213,12 +213,37 @@ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq) static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw) { - return tg->bps[rw][tg->td->limit_index]; + struct blkcg_gq *blkg = tg_to_blkg(tg); + uint64_t ret; + + if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) + return -1; + ret = tg->bps[rw][tg->td->limit_index]; + if (ret == 0 && tg->td->limit_index == LIMIT_LOW) { + if (tg->iops[rw][LIMIT_LOW]) + return -1; + /* assign a small default */ + return 64 * 1024; + } + + return ret; } static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) { - return tg->iops[rw][tg->td->limit_index]; + struct blkcg_gq *blkg = tg_to_blkg(tg); + unsigned int ret; + + if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) + return -1; + ret = tg->iops[rw][tg->td->limit_index]; + if (ret == 0 && tg->td->limit_index == LIMIT_LOW) { + if (tg->bps[rw][LIMIT_LOW]) + return -1; + /* assign a small default */ + return 16; + } + return ret; } /** -- 2.8.0.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html