The patch titled sched: add option to serialize load balancing has been added to the -mm tree. Its filename is sched-add-option-to-serialize-load-balancing.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: sched: add option to serialize load balancing From: Christoph Lameter <clameter@xxxxxxx> Large sched domains can be very expensive to scan. Add an option SD_SERIALIZE to the sched domain flags. If that flag is set then we make sure that no other such domain is being balanced. Signed-off-by: Christoph Lameter <clameter@xxxxxxx> Cc: Peter Williams <pwil3058@xxxxxxxxxxxxxx> Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx> Cc: Christoph Lameter <clameter@xxxxxxx> Cc: "Siddha, Suresh B" <suresh.b.siddha@xxxxxxxxx> Cc: "Chen, Kenneth W" <kenneth.w.chen@xxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxx> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- include/asm-i386/topology.h | 1 + include/asm-ia64/topology.h | 1 + include/asm-powerpc/topology.h | 1 + include/asm-x86_64/topology.h | 1 + include/linux/sched.h | 1 + include/linux/topology.h | 3 ++- kernel/sched.c | 10 ++++++++++ 7 files changed, 17 insertions(+), 1 deletion(-) diff -puN include/asm-i386/topology.h~sched-add-option-to-serialize-load-balancing include/asm-i386/topology.h --- a/include/asm-i386/topology.h~sched-add-option-to-serialize-load-balancing +++ a/include/asm-i386/topology.h @@ -89,6 +89,7 @@ static inline int node_to_first_cpu(int .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff -puN include/asm-ia64/topology.h~sched-add-option-to-serialize-load-balancing include/asm-ia64/topology.h --- a/include/asm-ia64/topology.h~sched-add-option-to-serialize-load-balancing +++ a/include/asm-ia64/topology.h @@ -101,6 +101,7 @@ void build_cpu_to_node_map(void); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 64, \ diff -puN include/asm-powerpc/topology.h~sched-add-option-to-serialize-load-balancing include/asm-powerpc/topology.h --- a/include/asm-powerpc/topology.h~sched-add-option-to-serialize-load-balancing +++ a/include/asm-powerpc/topology.h @@ -59,6 +59,7 @@ extern int pcibus_to_node(struct pci_bus | SD_BALANCE_EXEC \ | SD_BALANCE_NEWIDLE \ | SD_WAKE_IDLE \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff -puN include/asm-x86_64/topology.h~sched-add-option-to-serialize-load-balancing include/asm-x86_64/topology.h --- a/include/asm-x86_64/topology.h~sched-add-option-to-serialize-load-balancing +++ a/include/asm-x86_64/topology.h @@ -47,6 +47,7 @@ extern int __node_distance(int, int); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff -puN include/linux/sched.h~sched-add-option-to-serialize-load-balancing include/linux/sched.h --- a/include/linux/sched.h~sched-add-option-to-serialize-load-balancing +++ a/include/linux/sched.h @@ -646,6 +646,7 @@ enum idle_type #define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ +#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define BALANCE_FOR_MC_POWER \ (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) diff -puN include/linux/topology.h~sched-add-option-to-serialize-load-balancing include/linux/topology.h --- a/include/linux/topology.h~sched-add-option-to-serialize-load-balancing +++ a/include/linux/topology.h @@ -194,7 +194,8 @@ .wake_idx = 0, /* unused */ \ .forkexec_idx = 0, /* unused */ \ .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE, \ + .flags = SD_LOAD_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ .nr_balance_failed = 0, \ diff -puN kernel/sched.c~sched-add-option-to-serialize-load-balancing kernel/sched.c --- a/kernel/sched.c~sched-add-option-to-serialize-load-balancing +++ a/kernel/sched.c @@ -2880,6 +2880,7 @@ static void update_load(struct rq *this_ * * Balancing parameters are set up in arch_init_sched_domains. */ +static spinlock_t balancing; static void run_rebalance_domains(struct softirq_action *h) { @@ -2909,6 +2910,11 @@ static void run_rebalance_domains(struct if (unlikely(!interval)) interval = 1; + if (sd->flags & SD_SERIALIZE) { + if (!spin_trylock(&balancing)) + goto out; + } + if (time_after_eq(jiffies, sd->last_balance + interval)) { if (load_balance(this_cpu, this_rq, sd, idle)) { /* @@ -2920,6 +2926,9 @@ static void run_rebalance_domains(struct } sd->last_balance = jiffies; } + if (sd->flags & SD_SERIALIZE) + spin_unlock(&balancing); +out: if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; } @@ -6873,6 +6882,7 @@ void __init sched_init(void) #ifdef CONFIG_RT_MUTEXES plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); + spin_lock_init(&balancing); #endif /* _ Patches currently in -mm which might be from clameter@xxxxxxx are memory-page-alloc-minor-cleanups.patch memory-page-alloc-minor-cleanups-fix.patch get-rid-of-zone_table.patch deal-with-cases-of-zone_dma-meaning-the-first-zone.patch get-rid-of-zone_table-fix-3.patch introduce-config_zone_dma.patch optional-zone_dma-in-the-vm.patch optional-zone_dma-in-the-vm-no-gfp_dma-check-in-the-slab-if-no-config_zone_dma-is-set.patch optional-zone_dma-in-the-vm-no-gfp_dma-check-in-the-slab-if-no-config_zone_dma-is-set-reduce-config_zone_dma-ifdefs.patch optional-zone_dma-for-ia64.patch remove-zone_dma-remains-from-parisc.patch remove-zone_dma-remains-from-sh-sh64.patch set-config_zone_dma-for-arches-with-generic_isa_dma.patch zoneid-fix-up-calculations-for-zoneid_pgshift.patch radix-tree-rcu-lockless-readside.patch sched-domain-move-sched-group-allocations-to-percpu-area.patch sched-avoid-taking-rq-lock-in-wake_priority_sleeper.patch sched-remove-staggering-of-load-balancing.patch sched-disable-interrupts-for-locking-in-load_balance.patch sched-extract-load-calculation-from-rebalance_tick.patch sched-move-idle-status-calculation-into-rebalance_tick.patch sched-use-softirq-for-load-balancing.patch sched-call-tasklet-less-frequently.patch sched-add-option-to-serialize-load-balancing.patch mm-only-sched-add-a-few-scheduler-event-counters.patch zvc-support-nr_slab_reclaimable--nr_slab_unreclaimable-swap_prefetch.patch reduce-max_nr_zones-swap_prefetch-remove-incorrect-use-of-zone_highmem.patch numa-add-zone_to_nid-function-swap_prefetch.patch readahead-state-based-method-aging-accounting.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html