On Mon, 2008-06-23 at 17:04 -0600, Gregory Haskins wrote: > We do find_busiest_groups() et. al. without locks held for normal balancing, > so lets do it for newidle as well. It will allow other cpus to make > forward progress (against our RQ) while we try to balance and allow > some interrupts to occur. Is running f_b_g really that expensive? I was under the impression that move_tasks() is the expensive one... > Signed-off-by: Gregory Haskins <ghaskins@xxxxxxxxxx> > --- > > kernel/sched.c | 44 ++++++++++++++++++++++++++++++++++++++------ > 1 files changed, 38 insertions(+), 6 deletions(-) > > diff --git a/kernel/sched.c b/kernel/sched.c > index 31f91d9..490e6bc 100644 > --- a/kernel/sched.c > +++ b/kernel/sched.c > @@ -3333,6 +3333,16 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) > int sd_idle = 0; > int all_pinned = 0; > cpumask_t cpus = CPU_MASK_ALL; > + int nr_running; > + > + schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); > + > + /* > + * We are in a preempt-disabled section, so dropping the lock/irq > + * here simply means that other cores may acquire the lock, > + * and interrupts may occur. > + */ > + spin_unlock_irq(&this_rq->lock); > > /* > * When power savings policy is enabled for the parent domain, idle > @@ -3344,7 +3354,6 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) > !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) > sd_idle = 1; > > - schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); > redo: > group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, > &sd_idle, &cpus, NULL); > @@ -3366,14 +3375,33 @@ redo: > > ld_moved = 0; > if (busiest->nr_running > 1) { > - /* Attempt to move tasks */ > - double_lock_balance(this_rq, busiest); > - /* this_rq->clock is already updated */ > - update_rq_clock(busiest); > + local_irq_disable(); > + double_rq_lock(this_rq, busiest); > + > + BUG_ON(this_cpu != smp_processor_id()); > + > + /* > + * Checking rq->nr_running covers both the case where > + * newidle-balancing pulls a task, as well as if something > + * else issued a NEEDS_RESCHED (since we would only need > + * a reschedule if something was moved to us) > + */ > + if (this_rq->nr_running) { > + double_rq_unlock(this_rq, busiest); > + local_irq_enable(); > + goto out_balanced; > + } > + > ld_moved = move_tasks(this_rq, this_cpu, busiest, > imbalance, sd, CPU_NEWLY_IDLE, > &all_pinned); > - spin_unlock(&busiest->lock); > + > + nr_running = this_rq->nr_running; > + double_rq_unlock(this_rq, busiest); > + local_irq_enable(); > + > + if (nr_running) > + goto out_balanced; > > if (unlikely(all_pinned)) { > cpu_clear(cpu_of(busiest), cpus); > @@ -3382,6 +3410,8 @@ redo: > } > } > > + spin_lock_irq(&this_rq->lock); > + > if (!ld_moved) { > schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); > if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && > @@ -3393,6 +3423,8 @@ redo: > return ld_moved; > > out_balanced: > + spin_lock_irq(&this_rq->lock); > + > schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); > if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && > !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) > -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html