Yinghai, Balbir, Arjan, Could you try the below to see if that fully does away with the /0 in the group scheduler thing? --- kernel/sched.c | 53 +++++++++++++++++++++++++++++++++-------------------- 1 files changed, 33 insertions(+), 20 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 0e76b17..45cebe0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1515,30 +1515,33 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED +struct update_shares_data { + spinlock_t lock; + unsigned long sum_weight; + unsigned long shares; + unsigned long rq_weight[NR_CPUS]; +}; + +static DEFINE_PER_CPU(struct update_shares_data, update_shares_data); + static void __set_se_shares(struct sched_entity *se, unsigned long shares); /* * Calculate and set the cpu's group shares. */ -static void -update_group_shares_cpu(struct task_group *tg, int cpu, - unsigned long sd_shares, unsigned long sd_rq_weight, - unsigned long sd_eff_weight) +static void update_group_shares_cpu(struct task_group *tg, + struct update_shares_data *usd, int cpu) { - unsigned long rq_weight; - unsigned long shares; + unsigned long shares, rq_weight; int boost = 0; if (!tg->se[cpu]) return; - rq_weight = tg->cfs_rq[cpu]->rq_weight; + rq_weight = usd->rq_weight[cpu]; if (!rq_weight) { boost = 1; rq_weight = NICE_0_LOAD; - if (sd_rq_weight == sd_eff_weight) - sd_eff_weight += NICE_0_LOAD; - sd_rq_weight = sd_eff_weight; } /* @@ -1546,7 +1549,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, * shares_i = ----------------------------- * \Sum_j rq_weight_j */ - shares = (sd_shares * rq_weight) / sd_rq_weight; + shares = (usd->shares * rq_weight) / usd->sum_weight; shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); if (abs(shares - tg->se[cpu]->load.weight) > @@ -1555,6 +1558,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, unsigned long flags; spin_lock_irqsave(&rq->lock, flags); + tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; tg->cfs_rq[cpu]->shares = boost ? 0 : shares; __set_se_shares(tg->se[cpu], shares); spin_unlock_irqrestore(&rq->lock, flags); @@ -1568,36 +1572,44 @@ update_group_shares_cpu(struct task_group *tg, int cpu, */ static int tg_shares_up(struct task_group *tg, void *data) { - unsigned long weight, rq_weight = 0, eff_weight = 0; - unsigned long shares = 0; + struct update_shares_data *usd = &get_cpu_var(update_shares_data); + unsigned long weight, sum_weight = 0, shares = 0; struct sched_domain *sd = data; + unsigned long flags; int i; + spin_lock_irqsave(&usd->lock, flags); + for_each_cpu(i, sched_domain_span(sd)) { + weight = tg->cfs_rq[i]->load.weight; + usd->rq_weight[i] = weight; + /* * If there are currently no tasks on the cpu pretend there * is one of average load so that when a new task gets to * run here it will not get delayed by group starvation. */ - weight = tg->cfs_rq[i]->load.weight; - tg->cfs_rq[i]->rq_weight = weight; - rq_weight += weight; - if (!weight) weight = NICE_0_LOAD; - eff_weight += weight; + sum_weight += weight; shares += tg->cfs_rq[i]->shares; } - if ((!shares && rq_weight) || shares > tg->shares) + if ((!shares && sum_weight) || shares > tg->shares) shares = tg->shares; if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) shares = tg->shares; + usd->sum_weight = sum_weight; + usd->shares = shares; + for_each_cpu(i, sched_domain_span(sd)) - update_group_shares_cpu(tg, i, shares, rq_weight, eff_weight); + update_group_shares_cpu(tg, usd, i); + + spin_unlock_irqrestore(&usd->lock, flags); + put_cpu_var(update_shares_data); return 0; } @@ -9449,6 +9461,7 @@ void __init sched_init(void) init_cfs_rq(&rq->cfs, rq); init_rt_rq(&rq->rt, rq); #ifdef CONFIG_FAIR_GROUP_SCHED + spin_lock_init(&per_cpu(update_shares_data, i).lock); init_task_group.shares = init_task_group_load; INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); #ifdef CONFIG_CGROUP_SCHED -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html
![]() |