The patch titled sched: stagger load balancing in build_sched_domains has been added to the -mm tree. Its filename is sched-stagger-load-balancing-in-build_sched_domains.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: sched: stagger load balancing in build_sched_domains From: Christoph Lameter <clameter@xxxxxxx> Instead of dealing with the staggering of load balancing during actual load balancing we just do it once when the sched domains set up. Signed-off-by: Christoph Lameter <clameter@xxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxx> Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx> Cc: "Siddha, Suresh B" <suresh.b.siddha@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- kernel/sched.c | 26 +++++++++++++++++--------- 1 files changed, 17 insertions(+), 9 deletions(-) diff -puN kernel/sched.c~sched-stagger-load-balancing-in-build_sched_domains kernel/sched.c --- a/kernel/sched.c~sched-stagger-load-balancing-in-build_sched_domains +++ a/kernel/sched.c @@ -2848,17 +2848,10 @@ static void update_load(struct rq *this_ * * Balancing parameters are set up in arch_init_sched_domains. */ - -/* Don't have all balancing operations going off at once: */ -static inline unsigned long cpu_offset(int cpu) -{ - return jiffies + cpu * HZ / NR_CPUS; -} - static void rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) { - unsigned long interval, j = cpu_offset(this_cpu); + unsigned long interval; struct sched_domain *sd; for_each_domain(this_cpu, sd) { @@ -2874,7 +2867,7 @@ rebalance_tick(int this_cpu, struct rq * if (unlikely(!interval)) interval = 1; - if (j - sd->last_balance >= interval) { + if (jiffies - sd->last_balance >= interval) { if (load_balance(this_cpu, this_rq, sd, idle)) { /* * We've pulled tasks over so either we're no @@ -6332,6 +6325,16 @@ static void init_sched_groups_power(int } /* + * Calculate jiffies start to use for each cpu. On sched domain + * initialization this jiffy value is used to stagger the load balancing + * of the cpus so that they do not load balance all at at the same time. + */ +static inline unsigned long cpu_offset(int cpu) +{ + return jiffies + cpu * HZ / NR_CPUS; +} + +/* * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ @@ -6387,6 +6390,7 @@ static int build_sched_domains(const cpu sd->span = *cpu_map; group = cpu_to_allnodes_group(i, cpu_map); sd->groups = &sched_group_allnodes[group]; + sd->last_balance = cpu_offset(i); p = sd; } else p = NULL; @@ -6395,6 +6399,7 @@ static int build_sched_domains(const cpu *sd = SD_NODE_INIT; sd->span = sched_domain_node_span(cpu_to_node(i)); sd->parent = p; + sd->last_balance = cpu_offset(i); if (p) p->child = sd; cpus_and(sd->span, sd->span, *cpu_map); @@ -6406,6 +6411,7 @@ static int build_sched_domains(const cpu *sd = SD_CPU_INIT; sd->span = nodemask; sd->parent = p; + sd->last_balance = cpu_offset(i); if (p) p->child = sd; sd->groups = &sched_group_phys[group]; @@ -6418,6 +6424,7 @@ static int build_sched_domains(const cpu sd->span = cpu_coregroup_map(i); cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; + sd->last_balance = cpu_offset(i); p->child = sd; sd->groups = &sched_group_core[group]; #endif @@ -6430,6 +6437,7 @@ static int build_sched_domains(const cpu sd->span = cpu_sibling_map[i]; cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; + sd->last_balance = cpu_offset(i); p->child = sd; sd->groups = &sched_group_cpus[group]; #endif _ Patches currently in -mm which might be from clameter@xxxxxxx are create-compat_sys_migrate_pages.patch wire-up-sys_migrate_pages.patch memory-page-alloc-minor-cleanups.patch memory-page-alloc-minor-cleanups-fix.patch get-rid-of-zone_table.patch deal-with-cases-of-zone_dma-meaning-the-first-zone.patch get-rid-of-zone_table-fix-3.patch introduce-config_zone_dma.patch optional-zone_dma-in-the-vm.patch optional-zone_dma-in-the-vm-no-gfp_dma-check-in-the-slab-if-no-config_zone_dma-is-set.patch optional-zone_dma-in-the-vm-no-gfp_dma-check-in-the-slab-if-no-config_zone_dma-is-set-reduce-config_zone_dma-ifdefs.patch optional-zone_dma-for-ia64.patch remove-zone_dma-remains-from-parisc.patch remove-zone_dma-remains-from-sh-sh64.patch set-config_zone_dma-for-arches-with-generic_isa_dma.patch zoneid-fix-up-calculations-for-zoneid_pgshift.patch radix-tree-rcu-lockless-readside.patch sched-avoid-taking-rq-lock-in-wake_priority_sleeper.patch sched-disable-interrupts-for-locking-in-load_balance.patch sched-extract-load-calculation-from-rebalance_tick.patch sched-stagger-load-balancing-in-build_sched_domains.patch sched-move-idle-stat-calculation-into-rebalance_tick.patch sched-use-tasklet-to-call-balancing.patch sched-call-tasklet-less-frequently.patch zvc-support-nr_slab_reclaimable--nr_slab_unreclaimable-swap_prefetch.patch reduce-max_nr_zones-swap_prefetch-remove-incorrect-use-of-zone_highmem.patch numa-add-zone_to_nid-function-swap_prefetch.patch readahead-state-based-method-aging-accounting.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html