Re: [patch] generic-ipi: remove kmalloc, cleanup

Peter Zijlstra <peterz@xxxxxxxxxxxxx> · Thu, 12 Feb 2009 13:09:14 +0100

On Thu, 2009-02-12 at 11:16 +0100, Peter Zijlstra wrote:
> On Thu, 2009-02-12 at 11:07 +0100, Ingo Molnar wrote:
> > * Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote:
> > 
> > > On Thu, 2009-02-12 at 09:19 +0100, Ingo Molnar wrote:
> > > > * Ingo Molnar <mingo@xxxxxxx> wrote:
> > > > 
> > > > > hm, that's a complex one - we do kfree() from IPI context, [...]
> > > > 
> > > > The patch below might do the trick - it offloads this to a softirq.
> > > > Not tested yet.
> > > 
> > > The simple fix is something like:
> > > 
> > > ---
> > >  kernel/smp.c |    8 ++++++++
> > >  1 files changed, 8 insertions(+), 0 deletions(-)
> > 
> > ok, i made it unconditional (not just a PREEMPT_RT hac) and did the
> > cleanup below on top of it.
> > 
> > I dont think repeat, queued IPIs are all that interesting from a
> > performance point of view. If they are, it will all be clearly
> > bisectable.
> 
> Right, except I really don't like the smp_call_function_many() slow path
> that's now the only path.
> 
> Rusty did that I think, but he also had some idea on how to fix it, I
> think it boiled down to sticking a count in the call data instead of the
> full cpumask.
> 
> So I'd rather we first fix that code, and then remove the kmalloc
> all-together like you propose here.

Right, I can't see a way around carrying that cpumask, there's just too
much that can go wrong without it.

So it put in unconditionally, how about this?


--
Subject: generic-smp: remove single ipi fallback for smp_call_function_many()

In preparation of removing the kmalloc() calls from the generic-ipi code
get rid of the single ipi fallback for smp_call_function_many().

Because we cannot get around carrying the cpumask in the data -- imagine
2 such calls with different but overlapping masks -- put in a full mask.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
 kernel/smp.c |   47 ++++++++++++++++++++++++-----------------------
 1 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index bbedbb7..e6658e5 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -26,7 +26,7 @@ struct call_function_data {
 	spinlock_t lock;
 	unsigned int refs;
 	struct rcu_head rcu_head;
-	unsigned long cpumask_bits[];
+	struct cpumask cpumask;
 };
 
 struct call_single_queue {
@@ -111,13 +111,13 @@ void generic_smp_call_function_interrupt(void)
 	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
 		int refs;
 
-		if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
+		if (!cpumask_test_cpu(cpu, &data->cpumask))
 			continue;
 
 		data->csd.func(data->csd.info);
 
 		spin_lock(&data->lock);
-		cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
+		cpumask_clear_cpu(cpu, &data->cpumask);
 		WARN_ON(data->refs == 0);
 		data->refs--;
 		refs = data->refs;
@@ -137,8 +137,10 @@ void generic_smp_call_function_interrupt(void)
 			 */
 			smp_wmb();
 			data->csd.flags &= ~CSD_FLAG_WAIT;
-		}
-		if (data->csd.flags & CSD_FLAG_ALLOC)
+		} else if (data->csd.flags & CSD_FLAG_LOCK) {
+			smp_wmb();
+			data->csd.flags &= ~CSD_FLAG_LOCK;
+		} else if (data->csd.flags & CSD_FLAG_ALLOC)
 			call_rcu(&data->rcu_head, rcu_free_call_data);
 	}
 	rcu_read_unlock();
@@ -302,6 +304,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
 	arch_send_call_function_ipi(*(maskp))
 #endif
 
+static DEFINE_PER_CPU(struct call_function_data, cfd_data);
+
 /**
  * smp_call_function_many(): Run a function on a set of other CPUs.
  * @mask: The set of cpus to run on (only runs on online subset).
@@ -323,14 +327,14 @@ void smp_call_function_many(const struct cpumask *mask,
 {
 	struct call_function_data *data;
 	unsigned long flags;
-	int cpu, next_cpu;
+	int cpu, next_cpu, me = smp_processor_id();
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
 
 	/* So, what's a CPU they want?  Ignoring this one. */
 	cpu = cpumask_first_and(mask, cpu_online_mask);
-	if (cpu == smp_processor_id())
+	if (cpu == me)
 		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
 	/* No online cpus?  We're done. */
 	if (cpu >= nr_cpu_ids)
@@ -338,7 +342,7 @@ void smp_call_function_many(const struct cpumask *mask,
 
 	/* Do we have another CPU which isn't us? */
 	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
-	if (next_cpu == smp_processor_id())
+	if (next_cpu == me)
 		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
 
 	/* Fastpath: do that cpu by itself. */
@@ -347,27 +351,24 @@ void smp_call_function_many(const struct cpumask *mask,
 		return;
 	}
 
-	data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
-	if (unlikely(!data)) {
-		/* Slow path. */
-		for_each_online_cpu(cpu) {
-			if (cpu == smp_processor_id())
-				continue;
-			if (cpumask_test_cpu(cpu, mask))
-				smp_call_function_single(cpu, func, info, wait);
-		}
-		return;
+	data = kmalloc(sizeof(*data), GFP_ATOMIC);
+	if (data)
+		data->csd.flags = CSD_FLAG_ALLOC;
+	else {
+		data = &per_cpu(cfd_data, me);
+		while (data->csd.flags & CSD_FLAG_LOCK)
+			cpu_relax();
+		data->csd.flags = CSD_FLAG_LOCK;
 	}
 
 	spin_lock_init(&data->lock);
-	data->csd.flags = CSD_FLAG_ALLOC;
 	if (wait)
 		data->csd.flags |= CSD_FLAG_WAIT;
 	data->csd.func = func;
 	data->csd.info = info;
-	cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
-	cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
-	data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
+	cpumask_and(&data->cpumask, mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &data->cpumask);
+	data->refs = cpumask_weight(&data->cpumask);
 
 	spin_lock_irqsave(&call_function_lock, flags);
 	list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -379,7 +380,7 @@ void smp_call_function_many(const struct cpumask *mask,
 	smp_mb();
 
 	/* Send a message to all CPUs in the map */
-	arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits));
+	arch_send_call_function_ipi_mask(&data->cpumask);
 
 	/* optionally wait for the CPUs to complete */
 	if (wait)

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html