When queue is in LIMIT_LOW state and all cgroups with low limit cross the bps/iops limitation, we will upgrade queue's state to LIMIT_HIGH/LIMIT_MAX For a cgroup hierarchy, there are two cases. Children has lower low limit than parent. Parent's low limit is meaningless. If children's bps/iops cross low limit, we can upgrade queue state. The other case is children has higher low limit than parent. Children's low limit is meaningless. As long as parent's bps/iops cross low limit, we can upgrade queue state. Signed-off-by: Shaohua Li <shli@xxxxxx> --- block/blk-throttle.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 4 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index bdcf1b7..df9cd13e 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -468,6 +468,7 @@ static void blk_throtl_update_valid_limit(struct throtl_data *td) td->limit_valid[LIMIT_LOW] = false; } +static void throtl_upgrade_state(struct throtl_data *td); static void throtl_pd_offline(struct blkg_policy_data *pd) { struct throtl_grp *tg = pd_to_tg(pd); @@ -479,9 +480,8 @@ static void throtl_pd_offline(struct blkg_policy_data *pd) blk_throtl_update_valid_limit(tg->td); - if (tg->td->limit_index == LIMIT_LOW && - !tg->td->limit_valid[LIMIT_LOW]) - tg->td->limit_index = LIMIT_MAX; + if (!tg->td->limit_valid[tg->td->limit_index]) + throtl_upgrade_state(tg->td); } static void throtl_pd_free(struct blkg_policy_data *pd) @@ -1087,6 +1087,8 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) return nr_disp; } +static bool throtl_can_upgrade(struct throtl_data *td, + struct throtl_grp *this_tg); /** * throtl_pending_timer_fn - timer function for service_queue->pending_timer * @arg: the throtl_service_queue being serviced @@ -1113,6 +1115,9 @@ static void throtl_pending_timer_fn(unsigned long arg) int ret; spin_lock_irq(q->queue_lock); + if (throtl_can_upgrade(td, NULL)) + throtl_upgrade_state(td); + again: parent_sq = sq->parent_sq; dispatched = false; @@ -1520,6 +1525,77 @@ static struct blkcg_policy blkcg_policy_throtl = { .pd_free_fn = throtl_pd_free, }; +static bool throtl_upgrade_check_one(struct throtl_grp *tg) +{ + struct throtl_service_queue *sq = &tg->service_queue; + + if (tg->bps[READ][LIMIT_LOW] != 0 && !sq->nr_queued[READ]) + return false; + if (tg->bps[WRITE][LIMIT_LOW] != 0 && !sq->nr_queued[WRITE]) + return false; + if (tg->iops[READ][LIMIT_LOW] != 0 && !sq->nr_queued[READ]) + return false; + if (tg->iops[WRITE][LIMIT_LOW] != 0 && !sq->nr_queued[WRITE]) + return false; + return true; +} + +static bool throtl_upgrade_check_hierarchy(struct throtl_grp *tg) +{ + if (throtl_upgrade_check_one(tg)) + return true; + while (true) { + if (!tg || (cgroup_subsys_on_dfl(io_cgrp_subsys) && + !tg_to_blkg(tg)->parent)) + return false; + if (throtl_upgrade_check_one(tg)) + return true; + tg = sq_to_tg(tg->service_queue.parent_sq); + } + return false; +} + +static bool throtl_can_upgrade(struct throtl_data *td, + struct throtl_grp *this_tg) +{ + struct cgroup_subsys_state *pos_css; + struct blkcg_gq *blkg; + + if (td->limit_index != LIMIT_LOW) + return false; + + blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { + struct throtl_grp *tg = blkg_to_tg(blkg); + + if (tg == this_tg) + continue; + if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children)) + continue; + if (!throtl_upgrade_check_hierarchy(tg)) + return false; + } + return true; +} + +static void throtl_upgrade_state(struct throtl_data *td) +{ + struct cgroup_subsys_state *pos_css; + struct blkcg_gq *blkg; + + td->limit_index = LIMIT_MAX; + blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { + struct throtl_grp *tg = blkg_to_tg(blkg); + struct throtl_service_queue *sq = &tg->service_queue; + + tg->disptime = jiffies - 1; + throtl_select_dispatch(sq); + throtl_schedule_next_dispatch(sq, false); + } + throtl_select_dispatch(&td->service_queue); + throtl_schedule_next_dispatch(&td->service_queue, false); + queue_work(kthrotld_workqueue, &td->dispatch_work); +} + bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { @@ -1542,14 +1618,20 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, sq = &tg->service_queue; +again: while (true) { /* throtl is FIFO - if bios are already queued, should queue */ if (sq->nr_queued[rw]) break; /* if above limits, break to queue */ - if (!tg_may_dispatch(tg, bio, NULL)) + if (!tg_may_dispatch(tg, bio, NULL)) { + if (throtl_can_upgrade(tg->td, tg)) { + throtl_upgrade_state(tg->td); + goto again; + } break; + } /* within limits, let's charge and dispatch directly */ throtl_charge_bio(tg, bio); -- 2.8.0.rc2 -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html