The patch titled sched: optimize activate_task for RT task has been added to the -mm tree. Its filename is sched-optimize-activate_task-for-rt-task.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: sched: optimize activate_task for RT task From: "Chen, Kenneth W" <kenneth.w.chen@xxxxxxxxx> RT task does not participate in interactiveness priority and thus shouldn't be bothered with timestamp and p->sleep_type manipulation when task is being put on run queue. Bypass all of the them with a single if (rt_task) test. Signed-off-by: Ken Chen <kenneth.w.chen@xxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxx> Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx> Cc: "Siddha, Suresh B" <suresh.b.siddha@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- kernel/sched.c | 8 kernel/sched.c.orig | 352 ++++++++++++++++++++++++------------------ 2 files changed, 214 insertions(+), 146 deletions(-) diff -puN kernel/sched.c~sched-optimize-activate_task-for-rt-task kernel/sched.c --- a/kernel/sched.c~sched-optimize-activate_task-for-rt-task +++ a/kernel/sched.c @@ -939,6 +939,9 @@ static void activate_task(struct task_st { unsigned long long now; + if (rt_task(p)) + goto out; + now = sched_clock(); #ifdef CONFIG_SMP if (!local) { @@ -960,8 +963,7 @@ static void activate_task(struct task_st (now - p->timestamp) >> 20); } - if (!rt_task(p)) - p->prio = recalc_task_prio(p, now); + p->prio = recalc_task_prio(p, now); /* * This checks to make sure it's not an uninterruptible task @@ -986,7 +988,7 @@ static void activate_task(struct task_st } } p->timestamp = now; - +out: __activate_task(p, rq); } diff -puN kernel/sched.c.orig~sched-optimize-activate_task-for-rt-task kernel/sched.c.orig --- a/kernel/sched.c.orig~sched-optimize-activate_task-for-rt-task +++ a/kernel/sched.c.orig @@ -227,6 +227,7 @@ struct rq { unsigned long expired_timestamp; unsigned long long timestamp_last_tick; struct task_struct *curr, *idle; + unsigned long next_balance; struct mm_struct *prev_mm; struct prio_array *active, *expired, arrays[2]; int best_expired_prio; @@ -1952,6 +1953,7 @@ static void double_rq_lock(struct rq *rq __acquires(rq1->lock) __acquires(rq2->lock) { + BUG_ON(!irqs_disabled()); if (rq1 == rq2) { spin_lock(&rq1->lock); __acquire(rq2->lock); /* Fake it out ;) */ @@ -1991,6 +1993,11 @@ static void double_lock_balance(struct r __acquires(busiest->lock) __acquires(this_rq->lock) { + if (unlikely(!irqs_disabled())) { + /* printk() doesn't work good under rq->lock */ + spin_unlock(&this_rq->lock); + BUG_ON(1); + } if (unlikely(!spin_trylock(&busiest->lock))) { if (busiest < this_rq) { spin_unlock(&this_rq->lock); @@ -2540,8 +2547,6 @@ static inline unsigned long minus_1_or_z /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. - * - * Called with this_rq unlocked. */ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum idle_type idle) @@ -2551,6 +2556,7 @@ static int load_balance(int this_cpu, st unsigned long imbalance; struct rq *busiest; cpumask_t cpus = CPU_MASK_ALL; + unsigned long flags; /* * When power savings policy is enabled for the parent domain, idle @@ -2590,11 +2596,13 @@ redo: * still unbalanced. nr_moved simply stays zero, so it is * correctly treated as an imbalance. */ + local_irq_save(flags); double_rq_lock(this_rq, busiest); nr_moved = move_tasks(this_rq, this_cpu, busiest, minus_1_or_zero(busiest->nr_running), imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); + local_irq_restore(flags); /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(all_pinned)) { @@ -2611,13 +2619,13 @@ redo: if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { - spin_lock(&busiest->lock); + spin_lock_irqsave(&busiest->lock, flags); /* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { - spin_unlock(&busiest->lock); + spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; goto out_one_pinned; } @@ -2627,7 +2635,7 @@ redo: busiest->push_cpu = this_cpu; active_balance = 1; } - spin_unlock(&busiest->lock); + spin_unlock_irqrestore(&busiest->lock, flags); if (active_balance) wake_up_process(busiest->migration_thread); @@ -2766,14 +2774,28 @@ out_balanced: static void idle_balance(int this_cpu, struct rq *this_rq) { struct sched_domain *sd; + int pulled_task = 0; + unsigned long next_balance = jiffies + 60 * HZ; for_each_domain(this_cpu, sd) { if (sd->flags & SD_BALANCE_NEWIDLE) { /* If we've pulled tasks over stop searching: */ - if (load_balance_newidle(this_cpu, this_rq, sd)) + pulled_task = load_balance_newidle(this_cpu, + this_rq, sd); + if (time_after(next_balance, + sd->last_balance + sd->balance_interval)) + next_balance = sd->last_balance + + sd->balance_interval; + if (pulled_task) break; } } + if (!pulled_task) + /* + * We are going idle. next_balance may be set based on + * a busy processor. So reset next_balance. + */ + this_rq->next_balance = next_balance; } /* @@ -2826,26 +2848,9 @@ static void active_load_balance(struct r spin_unlock(&target_rq->lock); } -/* - * rebalance_tick will get called every timer tick, on every CPU. - * - * It checks each scheduling domain to see if it is due to be balanced, - * and initiates a balancing operation if so. - * - * Balancing parameters are set up in arch_init_sched_domains. - */ - -/* Don't have all balancing operations going off at once: */ -static inline unsigned long cpu_offset(int cpu) -{ - return jiffies + cpu * HZ / NR_CPUS; -} - -static void -rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) +static void update_load(struct rq *this_rq) { - unsigned long this_load, interval, j = cpu_offset(this_cpu); - struct sched_domain *sd; + unsigned long this_load; int i, scale; this_load = this_rq->raw_weighted_load; @@ -2865,6 +2870,34 @@ rebalance_tick(int this_cpu, struct rq * new_load += scale-1; this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; } +} + +/* + * run_rebalance_domains is triggered when needed from the scheduler tick. + * + * It checks each scheduling domain to see if it is due to be balanced, + * and initiates a balancing operation if so. + * + * Balancing parameters are set up in arch_init_sched_domains. + */ +static spinlock_t balancing; + +static void run_rebalance_domains(struct softirq_action *h) +{ + int this_cpu = smp_processor_id(); + struct rq *this_rq = cpu_rq(this_cpu); + unsigned long interval; + struct sched_domain *sd; + /* + * We are idle if there are no processes running. This + * is valid even if we are the idle process (SMT). + */ + enum idle_type idle = !this_rq->nr_running ? + SCHED_IDLE : NOT_IDLE; + /* Earliest time when we have to call run_rebalance_domains again */ + unsigned long next_balance = jiffies + 60*HZ; + + __count_vm_event(SCHED_BALANCE_SOFTIRQ); for_each_domain(this_cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -2879,7 +2912,15 @@ rebalance_tick(int this_cpu, struct rq * if (unlikely(!interval)) interval = 1; - if (j - sd->last_balance >= interval) { + if (sd->flags & SD_SERIALIZE) { + if (!spin_trylock(&balancing)) { + __count_vm_event(SCHED_BALANCE_RUNNING); + goto out; + } + } + + if (time_after_eq(jiffies, sd->last_balance + interval)) { + __count_vm_event(SCHED_LOAD_BALANCE); if (load_balance(this_cpu, this_rq, sd, idle)) { /* * We've pulled tasks over so either we're no @@ -2888,39 +2929,40 @@ rebalance_tick(int this_cpu, struct rq * */ idle = NOT_IDLE; } - sd->last_balance += interval; + sd->last_balance = jiffies; } + if (sd->flags & SD_SERIALIZE) + spin_unlock(&balancing); +out: + if (time_after(next_balance, sd->last_balance + interval)) + next_balance = sd->last_balance + interval; } + this_rq->next_balance = next_balance; } #else /* * on UP we do not need to balance between CPUs: */ -static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle) -{ -} static inline void idle_balance(int cpu, struct rq *rq) { } #endif -static inline int wake_priority_sleeper(struct rq *rq) +static inline void wake_priority_sleeper(struct rq *rq) { - int ret = 0; - #ifdef CONFIG_SCHED_SMT + if (!rq->nr_running) + return; + spin_lock(&rq->lock); /* * If an SMT sibling task has been put to sleep for priority * reasons reschedule the idle task to see if it can now run. */ - if (rq->nr_running) { + if (rq->nr_running) resched_task(rq->idle); - ret = 1; - } spin_unlock(&rq->lock); #endif - return ret; } DEFINE_PER_CPU(struct kernel_stat, kstat); @@ -3048,35 +3090,12 @@ void account_steal_time(struct task_stru cpustat->steal = cputime64_add(cpustat->steal, tmp); } -/* - * This function gets called by the timer code, with HZ frequency. - * We call it with interrupts disabled. - * - * It also gets called by the fork code, when changing the parent's - * timeslices. - */ -void scheduler_tick(void) +static void task_running_tick(struct rq *rq, struct task_struct *p) { - unsigned long long now = sched_clock(); - struct task_struct *p = current; - int cpu = smp_processor_id(); - struct rq *rq = cpu_rq(cpu); - - update_cpu_clock(p, rq, now); - - rq->timestamp_last_tick = now; - - if (p == rq->idle) { - if (wake_priority_sleeper(rq)) - goto out; - rebalance_tick(cpu, rq, SCHED_IDLE); - return; - } - - /* Task might have expired already, but not scheduled off yet */ if (p->array != rq->active) { + /* Task has expired but was not scheduled yet */ set_tsk_need_resched(p); - goto out; + return; } spin_lock(&rq->lock); /* @@ -3144,8 +3163,39 @@ void scheduler_tick(void) } out_unlock: spin_unlock(&rq->lock); -out: - rebalance_tick(cpu, rq, NOT_IDLE); +} + +/* + * This function gets called by the timer code, with HZ frequency. + * We call it with interrupts disabled. + * + * It also gets called by the fork code, when changing the parent's + * timeslices. + */ +void scheduler_tick(void) +{ + unsigned long long now = sched_clock(); + struct task_struct *p = current; + int cpu = smp_processor_id(); + struct rq *rq = cpu_rq(cpu); + + update_cpu_clock(p, rq, now); + + rq->timestamp_last_tick = now; + + if (p == rq->idle) + /* Task on the idle queue */ + wake_priority_sleeper(rq); + else + task_running_tick(rq, p); +#ifdef CONFIG_SMP + update_load(rq); + if (time_after_eq(jiffies, rq->next_balance)) { + __count_vm_event(SCHED_TICK_BALANCE); + raise_softirq(SCHED_SOFTIRQ); + } + __count_vm_event(SCHED_TICK_NO_BALANCE); +#endif } #ifdef CONFIG_SCHED_SMT @@ -5067,7 +5117,10 @@ wait_to_die: } #ifdef CONFIG_HOTPLUG_CPU -/* Figure out where task on dead CPU should go, use force if neccessary. */ +/* + * Figure out where task on dead CPU should go, use force if neccessary. + * NOTE: interrupts should be disabled by the caller + */ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { unsigned long flags; @@ -5187,6 +5240,7 @@ void idle_task_exit(void) mmdrop(mm); } +/* called under rq->lock with disabled interrupts */ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) { struct rq *rq = cpu_rq(dead_cpu); @@ -5203,10 +5257,11 @@ static void migrate_dead(unsigned int de * Drop lock around migration; if someone else moves it, * that's OK. No task can be added to this CPU, so iteration is * fine. + * NOTE: interrupts should be left disabled --dev@ */ - spin_unlock_irq(&rq->lock); + spin_unlock(&rq->lock); move_task_off_dead_cpu(dead_cpu, p); - spin_lock_irq(&rq->lock); + spin_lock(&rq->lock); put_task_struct(p); } @@ -5528,28 +5583,27 @@ static int __init isolated_cpu_setup(cha __setup ("isolcpus=", isolated_cpu_setup); /* - * init_sched_build_groups takes an array of groups, the cpumask we wish - * to span, and a pointer to a function which identifies what group a CPU - * belongs to. The return value of group_fn must be a valid index into the - * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we - * keep track of groups covered with a cpumask_t). + * init_sched_build_groups takes the cpumask we wish to span, and a pointer + * to a function which identifies what group(along with sched group) a CPU + * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS + * (due to the fact that we keep track of groups covered with a cpumask_t). * * init_sched_build_groups will build a circular linked list of the groups * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ static void -init_sched_build_groups(struct sched_group groups[], cpumask_t span, - const cpumask_t *cpu_map, - int (*group_fn)(int cpu, const cpumask_t *cpu_map)) +init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map, + int (*group_fn)(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg)) { struct sched_group *first = NULL, *last = NULL; cpumask_t covered = CPU_MASK_NONE; int i; for_each_cpu_mask(i, span) { - int group = group_fn(i, cpu_map); - struct sched_group *sg = &groups[group]; + struct sched_group *sg; + int group = group_fn(i, cpu_map, &sg); int j; if (cpu_isset(i, covered)) @@ -5559,7 +5613,7 @@ init_sched_build_groups(struct sched_gro sg->cpu_power = 0; for_each_cpu_mask(j, span) { - if (group_fn(j, cpu_map) != group) + if (group_fn(j, cpu_map, NULL) != group) continue; cpu_set(j, covered); @@ -5738,14 +5792,24 @@ static void touch_cache(void *__cache, u unsigned long *cache = __cache; int i; + /* + * We access the buffer via 6 independent 'streams' of + * read/write access which are interleaved: + * + * [---> <---|---> <---|---> <---] + * + * We touch every cacheline in the buffer. (iteration + * step is 32 bytes on 32-bit systems, 64 bytes on + * 64-bit systems) + */ for (i = 0; i < size/6; i += 8) { - switch (i % 6) { - case 0: cache[i]++; - case 1: cache[size-1-i]++; - case 2: cache[chunk1-i]++; - case 3: cache[chunk1+i]++; - case 4: cache[chunk2-i]++; - case 5: cache[chunk2+i]++; + switch (i % 6*8) { + case 0*8: cache[i]++; + case 1*8: cache[size-1-i]++; + case 2*8: cache[chunk1-i]++; + case 3*8: cache[chunk1+i]++; + case 4*8: cache[chunk2-i]++; + case 5*8: cache[chunk2+i]++; } } } @@ -6135,10 +6199,13 @@ int sched_smt_power_savings = 0, sched_m */ #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); -static struct sched_group sched_group_cpus[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); -static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map) +static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg) { + if (sg) + *sg = &per_cpu(sched_group_cpus, cpu); return cpu; } #endif @@ -6148,39 +6215,52 @@ static int cpu_to_cpu_group(int cpu, con */ #ifdef CONFIG_SCHED_MC static DEFINE_PER_CPU(struct sched_domain, core_domains); -static struct sched_group sched_group_core[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_group, sched_group_core); #endif #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) -static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) +static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg) { + int group; cpumask_t mask = cpu_sibling_map[cpu]; cpus_and(mask, mask, *cpu_map); - return first_cpu(mask); + group = first_cpu(mask); + if (sg) + *sg = &per_cpu(sched_group_core, group); + return group; } #elif defined(CONFIG_SCHED_MC) -static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map) +static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg) { + if (sg) + *sg = &per_cpu(sched_group_core, cpu); return cpu; } #endif static DEFINE_PER_CPU(struct sched_domain, phys_domains); -static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_group, sched_group_phys); -static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map) +static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg) { + int group; #ifdef CONFIG_SCHED_MC cpumask_t mask = cpu_coregroup_map(cpu); cpus_and(mask, mask, *cpu_map); - return first_cpu(mask); + group = first_cpu(mask); #elif defined(CONFIG_SCHED_SMT) cpumask_t mask = cpu_sibling_map[cpu]; cpus_and(mask, mask, *cpu_map); - return first_cpu(mask); + group = first_cpu(mask); #else - return cpu; + group = cpu; #endif + if (sg) + *sg = &per_cpu(sched_group_phys, group); + return group; } #ifdef CONFIG_NUMA @@ -6193,12 +6273,22 @@ static DEFINE_PER_CPU(struct sched_domai static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); -static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map) +static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, + struct sched_group **sg) { - return cpu_to_node(cpu); + cpumask_t nodemask = node_to_cpumask(cpu_to_node(cpu)); + int group; + + cpus_and(nodemask, nodemask, *cpu_map); + group = first_cpu(nodemask); + + if (sg) + *sg = &per_cpu(sched_group_allnodes, group); + return group; } + static void init_numa_sched_groups_power(struct sched_group *group_head) { struct sched_group *sg = group_head; @@ -6234,16 +6324,9 @@ static void free_sched_groups(const cpum int cpu, i; for_each_cpu_mask(cpu, *cpu_map) { - struct sched_group *sched_group_allnodes - = sched_group_allnodes_bycpu[cpu]; struct sched_group **sched_group_nodes = sched_group_nodes_bycpu[cpu]; - if (sched_group_allnodes) { - kfree(sched_group_allnodes); - sched_group_allnodes_bycpu[cpu] = NULL; - } - if (!sched_group_nodes) continue; @@ -6337,7 +6420,7 @@ static int build_sched_domains(const cpu struct sched_domain *sd; #ifdef CONFIG_NUMA struct sched_group **sched_group_nodes = NULL; - struct sched_group *sched_group_allnodes = NULL; + int sd_allnodes = 0; /* * Allocate the per-node list of sched groups @@ -6355,7 +6438,6 @@ static int build_sched_domains(const cpu * Set up domains for cpus specified by the cpu_map. */ for_each_cpu_mask(i, *cpu_map) { - int group; struct sched_domain *sd = NULL, *p; cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); @@ -6364,26 +6446,12 @@ static int build_sched_domains(const cpu #ifdef CONFIG_NUMA if (cpus_weight(*cpu_map) > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { - if (!sched_group_allnodes) { - sched_group_allnodes - = kmalloc_node(sizeof(struct sched_group) - * MAX_NUMNODES, - GFP_KERNEL, - cpu_to_node(i)); - if (!sched_group_allnodes) { - printk(KERN_WARNING - "Can not alloc allnodes sched group\n"); - goto error; - } - sched_group_allnodes_bycpu[i] - = sched_group_allnodes; - } sd = &per_cpu(allnodes_domains, i); *sd = SD_ALLNODES_INIT; sd->span = *cpu_map; - group = cpu_to_allnodes_group(i, cpu_map); - sd->groups = &sched_group_allnodes[group]; + cpu_to_allnodes_group(i, cpu_map, &sd->groups); p = sd; + sd_allnodes = 1; } else p = NULL; @@ -6398,36 +6466,33 @@ static int build_sched_domains(const cpu p = sd; sd = &per_cpu(phys_domains, i); - group = cpu_to_phys_group(i, cpu_map); *sd = SD_CPU_INIT; sd->span = nodemask; sd->parent = p; if (p) p->child = sd; - sd->groups = &sched_group_phys[group]; + cpu_to_phys_group(i, cpu_map, &sd->groups); #ifdef CONFIG_SCHED_MC p = sd; sd = &per_cpu(core_domains, i); - group = cpu_to_core_group(i, cpu_map); *sd = SD_MC_INIT; sd->span = cpu_coregroup_map(i); cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; - sd->groups = &sched_group_core[group]; + cpu_to_core_group(i, cpu_map, &sd->groups); #endif #ifdef CONFIG_SCHED_SMT p = sd; sd = &per_cpu(cpu_domains, i); - group = cpu_to_cpu_group(i, cpu_map); *sd = SD_SIBLING_INIT; sd->span = cpu_sibling_map[i]; cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; - sd->groups = &sched_group_cpus[group]; + cpu_to_cpu_group(i, cpu_map, &sd->groups); #endif } @@ -6439,8 +6504,7 @@ static int build_sched_domains(const cpu if (i != first_cpu(this_sibling_map)) continue; - init_sched_build_groups(sched_group_cpus, this_sibling_map, - cpu_map, &cpu_to_cpu_group); + init_sched_build_groups(this_sibling_map, cpu_map, &cpu_to_cpu_group); } #endif @@ -6451,8 +6515,7 @@ static int build_sched_domains(const cpu cpus_and(this_core_map, this_core_map, *cpu_map); if (i != first_cpu(this_core_map)) continue; - init_sched_build_groups(sched_group_core, this_core_map, - cpu_map, &cpu_to_core_group); + init_sched_build_groups(this_core_map, cpu_map, &cpu_to_core_group); } #endif @@ -6465,15 +6528,13 @@ static int build_sched_domains(const cpu if (cpus_empty(nodemask)) continue; - init_sched_build_groups(sched_group_phys, nodemask, - cpu_map, &cpu_to_phys_group); + init_sched_build_groups(nodemask, cpu_map, &cpu_to_phys_group); } #ifdef CONFIG_NUMA /* Set up node groups */ - if (sched_group_allnodes) - init_sched_build_groups(sched_group_allnodes, *cpu_map, - cpu_map, &cpu_to_allnodes_group); + if (sd_allnodes) + init_sched_build_groups(*cpu_map, cpu_map, &cpu_to_allnodes_group); for (i = 0; i < MAX_NUMNODES; i++) { /* Set up node groups */ @@ -6565,10 +6626,10 @@ static int build_sched_domains(const cpu for (i = 0; i < MAX_NUMNODES; i++) init_numa_sched_groups_power(sched_group_nodes[i]); - if (sched_group_allnodes) { - int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map); - struct sched_group *sg = &sched_group_allnodes[group]; + if (sd_allnodes) { + struct sched_group *sg; + cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg); init_numa_sched_groups_power(sg); } #endif @@ -6823,8 +6884,13 @@ void __init sched_init(void) set_load_weight(&init_task); +#ifdef CONFIG_SMP + open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); +#endif + #ifdef CONFIG_RT_MUTEXES plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); + spin_lock_init(&balancing); #endif /* _ Patches currently in -mm which might be from kenneth.w.chen@xxxxxxxxx are __unmap_hugepage_range-add-comment.patch shared-page-table-for-hugetlb-page-v4.patch htlb-forget-rss-with-pt-sharing.patch sched-avoid-taking-rq-lock-in-wake_priority_sleeper.patch sched-remove-staggering-of-load-balancing.patch sched-disable-interrupts-for-locking-in-load_balance.patch sched-extract-load-calculation-from-rebalance_tick.patch sched-move-idle-status-calculation-into-rebalance_tick.patch sched-use-softirq-for-load-balancing.patch sched-call-tasklet-less-frequently.patch sched-add-option-to-serialize-load-balancing.patch mm-only-sched-add-a-few-scheduler-event-counters.patch sched-optimize-activate_task-for-rt-task.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html