+ sched-use-softirq-for-load-balancing.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     sched: use softirq for load balancing
has been added to the -mm tree.  Its filename is
     sched-use-softirq-for-load-balancing.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: sched: use softirq for load balancing
From: Christoph Lameter <clameter@xxxxxxx>

Call rebalance_tick (renamed to run_rebalance_domains) from a newly introduced
softirq.

We calculate the earliest time for each layer of sched domains to be rescanned
(this is the rescan time for idle) and use the earliest of those to schedule
the softirq via a new field "next_balance" added to struct rq.

Signed-off-by: Christoph Lameter <clameter@xxxxxxx>
Cc: Peter Williams <pwil3058@xxxxxxxxxxxxxx>
Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx>
Cc: Christoph Lameter <clameter@xxxxxxx>
Cc: "Siddha, Suresh B" <suresh.b.siddha@xxxxxxxxx>
Cc: "Chen, Kenneth W" <kenneth.w.chen@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 include/linux/interrupt.h |    3 ++-
 kernel/sched.c            |   22 +++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff -puN include/linux/interrupt.h~sched-use-softirq-for-load-balancing include/linux/interrupt.h
--- a/include/linux/interrupt.h~sched-use-softirq-for-load-balancing
+++ a/include/linux/interrupt.h
@@ -236,7 +236,8 @@ enum
 	NET_TX_SOFTIRQ,
 	NET_RX_SOFTIRQ,
 	BLOCK_SOFTIRQ,
-	TASKLET_SOFTIRQ
+	TASKLET_SOFTIRQ,
+	SCHED_SOFTIRQ,
 };
 
 /* softirq mask and active fields moved to irq_cpustat_t in
diff -puN kernel/sched.c~sched-use-softirq-for-load-balancing kernel/sched.c
--- a/kernel/sched.c~sched-use-softirq-for-load-balancing
+++ a/kernel/sched.c
@@ -227,6 +227,7 @@ struct rq {
 	unsigned long expired_timestamp;
 	unsigned long long timestamp_last_tick;
 	struct task_struct *curr, *idle;
+	unsigned long next_balance;
 	struct mm_struct *prev_mm;
 	struct prio_array *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -2858,7 +2859,7 @@ static void update_load(struct rq *this_
 }
 
 /*
- * rebalance_tick will get called every timer tick, on every CPU.
+ * run_rebalance_domains is triggered when needed from the scheduler tick.
  *
  * It checks each scheduling domain to see if it is due to be balanced,
  * and initiates a balancing operation if so.
@@ -2866,9 +2867,10 @@ static void update_load(struct rq *this_
  * Balancing parameters are set up in arch_init_sched_domains.
  */
 
-static void
-rebalance_tick(int this_cpu, struct rq *this_rq)
+static void run_rebalance_domains(struct softirq_action *h)
 {
+	int this_cpu = smp_processor_id();
+	struct rq *this_rq = cpu_rq(this_cpu);
 	unsigned long interval;
 	struct sched_domain *sd;
 	/*
@@ -2877,6 +2879,8 @@ rebalance_tick(int this_cpu, struct rq *
 	 */
 	enum idle_type idle = !this_rq->nr_running ?
 				SCHED_IDLE : NOT_IDLE;
+	/* Earliest time when we have to call run_rebalance_domains again */
+	unsigned long next_balance = jiffies + 60*HZ;
 
 	for_each_domain(this_cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -2891,7 +2895,7 @@ rebalance_tick(int this_cpu, struct rq *
 		if (unlikely(!interval))
 			interval = 1;
 
-		if (jiffies - sd->last_balance >= interval) {
+		if (time_after_eq(jiffies, sd->last_balance + interval)) {
 			if (load_balance(this_cpu, this_rq, sd, idle)) {
 				/*
 				 * We've pulled tasks over so either we're no
@@ -2902,7 +2906,10 @@ rebalance_tick(int this_cpu, struct rq *
 			}
 			sd->last_balance += interval;
 		}
+		if (time_after(next_balance, sd->last_balance + interval))
+			next_balance = sd->last_balance + interval;
 	}
+	this_rq->next_balance = next_balance;
 }
 #else
 /*
@@ -3155,7 +3162,8 @@ void scheduler_tick(void)
 		task_running_tick(rq, p);
 #ifdef CONFIG_SMP
 	update_load(rq);
-	rebalance_tick(cpu, rq);
+	if (time_after_eq(jiffies, rq->next_balance))
+		raise_softirq(SCHED_SOFTIRQ);
 #endif
 }
 
@@ -6845,6 +6853,10 @@ void __init sched_init(void)
 
 	set_load_weight(&init_task);
 
+#ifdef CONFIG_SMP
+	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
+#endif
+
 #ifdef CONFIG_RT_MUTEXES
 	plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
 #endif
_

Patches currently in -mm which might be from clameter@xxxxxxx are

memory-page-alloc-minor-cleanups.patch
memory-page-alloc-minor-cleanups-fix.patch
get-rid-of-zone_table.patch
deal-with-cases-of-zone_dma-meaning-the-first-zone.patch
get-rid-of-zone_table-fix-3.patch
introduce-config_zone_dma.patch
optional-zone_dma-in-the-vm.patch
optional-zone_dma-in-the-vm-no-gfp_dma-check-in-the-slab-if-no-config_zone_dma-is-set.patch
optional-zone_dma-in-the-vm-no-gfp_dma-check-in-the-slab-if-no-config_zone_dma-is-set-reduce-config_zone_dma-ifdefs.patch
optional-zone_dma-for-ia64.patch
remove-zone_dma-remains-from-parisc.patch
remove-zone_dma-remains-from-sh-sh64.patch
set-config_zone_dma-for-arches-with-generic_isa_dma.patch
zoneid-fix-up-calculations-for-zoneid_pgshift.patch
radix-tree-rcu-lockless-readside.patch
sched-domain-move-sched-group-allocations-to-percpu-area.patch
sched-avoid-taking-rq-lock-in-wake_priority_sleeper.patch
sched-remove-staggering-of-load-balancing.patch
sched-disable-interrupts-for-locking-in-load_balance.patch
sched-extract-load-calculation-from-rebalance_tick.patch
sched-move-idle-status-calculation-into-rebalance_tick.patch
sched-use-softirq-for-load-balancing.patch
sched-call-tasklet-less-frequently.patch
sched-add-option-to-serialize-load-balancing.patch
mm-only-sched-add-a-few-scheduler-event-counters.patch
zvc-support-nr_slab_reclaimable--nr_slab_unreclaimable-swap_prefetch.patch
reduce-max_nr_zones-swap_prefetch-remove-incorrect-use-of-zone_highmem.patch
numa-add-zone_to_nid-function-swap_prefetch.patch
readahead-state-based-method-aging-accounting.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux