Re: qemu-kvm.git build problem

Jan Kiszka <jan.kiszka@xxxxxx> · Fri, 15 Jan 2010 09:56:42 +0100

Paul E. McKenney wrote:
> On Thu, Jan 14, 2010 at 01:11:33AM +0100, Jan Kiszka wrote:
>> Paul E. McKenney wrote:
>>> On Tue, Jan 12, 2010 at 09:28:15AM +0100, Jan Kiszka wrote:
>>>> If so, I will try to write something like this the next days. Will
>>>> surely appreciate your review afterwards!
>>> Sounds good!
>>>
>> Here we go, find the commit inlined below. You may be primarily
>> interested in kvm_synchronize_sched_expedited() and the helper thread
>> function kvm_rcu_sync_thread. As I use a dedicated thread I simplified
>> some parts compared to upstream. Light testing indicates that it works.
>>
>> The full set of changes (specifically interesting for anyone who wants
>> to test it) is available on kernel.org. There is now also a
>> commit [1] included for CPU hotplug support, but it's untested as I don't
>> have the required hardware at hand.
> 
> This patch is only for backporting, right?  In mainline, you should be
> able to simply call synchronize_srcu_expedited().

Right. kvm-kmod [1] is like compat-wireless: Take latest modules sources
from the kernel, patch them a bit (what the sync script does), wrap a
bit, and you are able to build and run the result on older kernels.

So I was forced to provide synchronize_srcu_expedited for <= 2.6.31.
Actually, I wrap 2.6.32 as well to have a version that includes your
latest optimization.

> 
>> Thanks in advance,
>> Jan
>>
>> [1] http://git.kernel.org/?p=virt/kvm/kvm-kmod.git;a=commitdiff;h=81a3ddcd8ea343a9570f3164bb31160ff4be0ab7
>>
>> --------->
>>
>> From fe13d0785586623176879a87949ad626dace192c Mon Sep 17 00:00:00 2001
>> From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
>> Date: Wed, 13 Jan 2010 09:32:50 +0100
>> Subject: [PATCH] Compat support for synchronize_srcu_sync
>>
>> This services was first introduced to 2.6.32 and further optimized in
>> 2.6.33. KVM makes use of it for replacing the rw-sem based slot lock
>> with SRCU.
>>
>> TODO: Add CPU-hotplug support to the sync kthreads.
> 
> Oh.  Never mind my complaint about lack of CPU-hotplug support below.  ;-)
> 
> The function migration_call() in kernel/sched.c has the code you would
> want to harvest for this.  And re-reading your text above, perhaps you
> already have done so.

Yep (stolen from softirq.c):

From 81a3ddcd8ea343a9570f3164bb31160ff4be0ab7 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
Date: Thu, 14 Jan 2010 00:26:07 +0100
Subject: [PATCH] Add CPU hotplug support for kvmsrcsync thread

Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
---
 srcu.c |   64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/srcu.c b/srcu.c
index 985d627..841394d 100644
--- a/srcu.c
+++ b/srcu.c
@@ -399,13 +399,66 @@ void kvm_synchronize_srcu_expedited(struct srcu_struct *sp)
 }
 EXPORT_SYMBOL_GPL(kvm_synchronize_srcu_expedited);
 
+static struct sched_param sync_thread_param = {
+	.sched_priority = MAX_RT_PRIO-1
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <linux/cpumask.h>
+
+static int cpu_callback(struct notifier_block *nfb, unsigned long action,
+			void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+	struct task_struct *p;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		p = kthread_create(kvm_rcu_sync_thread, hcpu,
+				   "kvmsrcusync/%d", hotcpu);
+		if (IS_ERR(p)) {
+			printk(KERN_ERR "kvm: kvmsrcsync for %d failed\n",
+			       hotcpu);
+			return NOTIFY_BAD;
+		}
+		kthread_bind(p, hotcpu);
+		sched_setscheduler(p, SCHED_FIFO, &sync_thread_param);
+		per_cpu(sync_thread, hotcpu) = p;
+		break;
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		wake_up_process(per_cpu(sync_thread, hotcpu));
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		if (!per_cpu(sync_thread, hotcpu))
+			break;
+		/* Unbind so it can run.  Fall thru. */
+		kthread_bind(per_cpu(sync_thread, hotcpu),
+			     cpumask_any(cpu_online_mask));
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		p = per_cpu(sync_thread, hotcpu);
+		per_cpu(sync_thread, hotcpu) = NULL;
+		kthread_stop(p);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpu_nfb = {
+	.notifier_call = cpu_callback
+};
+#endif /* CONFIG_HOTPLUG_CPU */
+
 int kvm_init_srcu(void)
 {
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 	struct task_struct *p;
 	int cpu;
 	int err;
 
+	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		p = kthread_create(kvm_rcu_sync_thread, (void *)(long)cpu,
 				   "kvmsrcusync/%d", cpu);
@@ -413,13 +466,19 @@ int kvm_init_srcu(void)
 			goto error_out;
 
 		kthread_bind(p, cpu);
-		sched_setscheduler(p, SCHED_FIFO, &param);
+		sched_setscheduler(p, SCHED_FIFO, &sync_thread_param);
 		per_cpu(sync_thread, cpu) = p;
 		wake_up_process(p);
 	}
+#ifdef CONFIG_HOTPLUG_CPU
+	register_cpu_notifier(&cpu_nfb);
+#endif /* CONFIG_HOTPLUG_CPU */
+	put_online_cpus();
+
 	return 0;
 
 error_out:
+	put_online_cpus();
 	printk(KERN_ERR "kvm: kvmsrcsync for %d failed\n", cpu);
 	err = PTR_ERR(p);
 	kvm_exit_srcu();
@@ -430,6 +489,7 @@ void kvm_exit_srcu(void)
 {
 	int cpu;
 
+	unregister_cpu_notifier(&cpu_nfb);
 	for_each_online_cpu(cpu)
 		if (per_cpu(sync_thread, cpu))
 			kthread_stop(per_cpu(sync_thread, cpu));
-- 
1.6.0.2

> 
> Anyway, looks good.  There are some questions interspersed below to make
> sure I understand the situation.  There is one cut-and-paste spelling
> error and another cut-and-paste C-standard violation, the latter being
> my fault, and as far as I know, not a problem in practice.
> 
>> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
>> ---
>>  external-module-compat-comm.h |   17 ++
>>  srcu.c                        |  382 ++++++++++++++++++++++++++++++-----------
>>  sync                          |   27 ++-
>>  3 files changed, 316 insertions(+), 110 deletions(-)
>>
>> diff --git a/external-module-compat-comm.h b/external-module-compat-comm.h
>> index ca57567..eabf8a1 100644
>> --- a/external-module-compat-comm.h
>> +++ b/external-module-compat-comm.h
>> @@ -1053,3 +1053,20 @@ static inline void kvm_fire_urn(void)
>>  static inline void kvm_fire_urn(void) {}
>>  
>>  #endif /* CONFIG_USER_RETURN_NOTIFIER */
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33)
>> +
>> +#ifdef CONFIG_SMP
>> +void kvm_synchronize_srcu_expedited(struct srcu_struct *sp);
>> +#else
>> +static inline void kvm_synchronize_srcu_expedited(struct srcu_struct *sp) { }
>> +#endif
>> +
>> +#else
>> +
>> +#define kvm_synchronize_srcu_expedited synchronize_srcu_expedited
>> +
>> +#endif
>> +
>> +int kvm_init_srcu(void);
>> +void kvm_exit_srcu(void);
>> diff --git a/srcu.c b/srcu.c
> 
> Are we in kernel/srcu.c or a kvm-specific file?

We are in kvm-kmod/srcu.c, the wrapping code for srcu on older kernels
(used to work down to 2.6.16).

> 
> My guess is that we are in kernel/srcu.c for a backport-only patch, but
> have to ask!
> 
>> index e9734bc..985d627 100644
>> --- a/srcu.c
>> +++ b/srcu.c
>> @@ -24,8 +24,6 @@
>>   *
>>   */
>>  
>> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
>> -
>>  #include <linux/module.h>
>>  #include <linux/mutex.h>
>>  #include <linux/percpu.h>
>> @@ -35,6 +33,118 @@
>>  #include <linux/slab.h>
>>  #include <linux/smp.h>
>>  #include <linux/srcu.h>
>> +#include <linux/kthread.h>
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) || \
>> +    (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) && defined(CONFIG_SMP))
>> +
>> +/*
>> + * srcu_readers_active_idx -- returns approximate number of readers
>> + *	active on the specified rank of per-CPU counters.
>> + */
>> +
>> +static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
>> +{
>> +	int cpu;
>> +	int sum;
>> +
>> +	sum = 0;
>> +	for_each_possible_cpu(cpu)
>> +		sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
>> +	return sum;
>> +}
>> +
>> +/*
>> + * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
>> + */
>> +static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
>> +{
>> +	int idx;
>> +
>> +	idx = sp->completed;
>> +	mutex_lock(&sp->mutex);
>> +
>> +	/*
>> +	 * Check to see if someone else did the work for us while we were
>> +	 * waiting to acquire the lock.  We need -two- advances of
>> +	 * the counter, not just one.  If there was but one, we might have
>> +	 * shown up -after- our helper's first synchronize_sched(), thus
>> +	 * having failed to prevent CPU-reordering races with concurrent
>> +	 * srcu_read_unlock()s on other CPUs (see comment below).  So we
>> +	 * either (1) wait for two or (2) supply the second ourselves.
>> +	 */
>> +
>> +	if ((sp->completed - idx) >= 2) {
>> +		mutex_unlock(&sp->mutex);
>> +		return;
>> +	}
>> +
>> +	sync_func();  /* Force memory barrier on all CPUs. */
>> +
>> +	/*
>> +	 * The preceding synchronize_sched() ensures that any CPU that
>> +	 * sees the new value of sp->completed will also see any preceding
>> +	 * changes to data structures made by this CPU.  This prevents
>> +	 * some other CPU from reordering the accesses in its SRCU
>> +	 * read-side critical section to precede the corresponding
>> +	 * srcu_read_lock() -- ensuring that such references will in
>> +	 * fact be protected.
>> +	 *
>> +	 * So it is now safe to do the flip.
>> +	 */
>> +
>> +	idx = sp->completed & 0x1;
>> +	sp->completed++;
>> +
>> +	sync_func();  /* Force memory barrier on all CPUs. */
>> +
>> +	/*
>> +	 * At this point, because of the preceding synchronize_sched(),
>> +	 * all srcu_read_lock() calls using the old counters have completed.
>> +	 * Their corresponding critical sections might well be still
>> +	 * executing, but the srcu_read_lock() primitives themselves
>> +	 * will have finished executing.
>> +	 */
>> +
>> +	while (srcu_readers_active_idx(sp, idx))
>> +		schedule_timeout_interruptible(1);
>> +
>> +	sync_func();  /* Force memory barrier on all CPUs. */
>> +
>> +	/*
>> +	 * The preceding synchronize_sched() forces all srcu_read_unlock()
>> +	 * primitives that were executing concurrently with the preceding
>> +	 * for_each_possible_cpu() loop to have completed by this point.
>> +	 * More importantly, it also forces the corresponding SRCU read-side
>> +	 * critical sections to have also completed, and the corresponding
>> +	 * references to SRCU-protected data items to be dropped.
>> +	 *
>> +	 * Note:
>> +	 *
>> +	 *	Despite what you might think at first glance, the
>> +	 *	preceding synchronize_sched() -must- be within the
>> +	 *	critical section ended by the following mutex_unlock().
>> +	 *	Otherwise, a task taking the early exit can race
>> +	 *	with a srcu_read_unlock(), which might have executed
>> +	 *	just before the preceding srcu_readers_active() check,
>> +	 *	and whose CPU might have reordered the srcu_read_unlock()
>> +	 *	with the preceding critical section.  In this case, there
>> +	 *	is nothing preventing the synchronize_sched() task that is
>> +	 *	taking the early exit from freeing a data structure that
>> +	 *	is still being referenced (out of order) by the task
>> +	 *	doing the srcu_read_unlock().
>> +	 *
>> +	 *	Alternatively, the comparison with "2" on the early exit
>> +	 *	could be changed to "3", but this increases synchronize_srcu()
>> +	 *	latency for bulk loads.  So the current code is preferred.
>> +	 */
>> +
>> +	mutex_unlock(&sp->mutex);
>> +}
>> +
>> +#endif
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
>>  
>>  #undef kvm_init_srcu_struct
>>  #undef kvm_cleanup_srcu_struct
>> @@ -42,6 +152,11 @@
>>  #undef kvm_srcu_read_unlock
>>  #undef kvm_synchronize_srcu
>>  #undef kvm_srcu_batches_completed
>> +
>> +static int srcu_readers_active_idx(struct srcu_struct *sp, int idx);
>> +static void
>> +__synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void));
>> +
>>  /**
>>   * init_srcu_struct - initialize a sleep-RCU structure
>>   * @sp: structure to initialize.
>> @@ -58,22 +173,6 @@ int kvm_init_srcu_struct(struct srcu_struct *sp)
>>  	return (sp->per_cpu_ref ? 0 : -ENOMEM);
>>  }
>>  
>> -/*
>> - * srcu_readers_active_idx -- returns approximate number of readers
>> - *	active on the specified rank of per-CPU counters.
>> - */
>> -
>> -static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
>> -{
>> -	int cpu;
>> -	int sum;
>> -
>> -	sum = 0;
>> -	for_each_possible_cpu(cpu)
>> -		sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
>> -	return sum;
>> -}
>> -
>>  /**
>>   * srcu_readers_active - returns approximate number of readers.
>>   * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
>> @@ -154,94 +253,14 @@ void kvm_srcu_read_unlock(struct srcu_struct *sp, int idx)
>>   * synchronizing concurrent updates.  Can block; must be called from
>>   * process context.
>>   *
>> - * Note that it is illegal to call synchornize_srcu() from the corresponding
>> + * Note that it is illegal to call synchronize_srcu() from the corresponding
>>   * SRCU read-side critical section; doing so will result in deadlock.
>>   * However, it is perfectly legal to call synchronize_srcu() on one
>>   * srcu_struct from some other srcu_struct's read-side critical section.
>>   */
>>  void kvm_synchronize_srcu(struct srcu_struct *sp)
>>  {
>> -	int idx;
>> -
>> -	idx = sp->completed;
>> -	mutex_lock(&sp->mutex);
>> -
>> -	/*
>> -	 * Check to see if someone else did the work for us while we were
>> -	 * waiting to acquire the lock.  We need -two- advances of
>> -	 * the counter, not just one.  If there was but one, we might have
>> -	 * shown up -after- our helper's first synchronize_sched(), thus
>> -	 * having failed to prevent CPU-reordering races with concurrent
>> -	 * srcu_read_unlock()s on other CPUs (see comment below).  So we
>> -	 * either (1) wait for two or (2) supply the second ourselves.
>> -	 */
>> -
>> -	if ((sp->completed - idx) >= 2) {
>> -		mutex_unlock(&sp->mutex);
>> -		return;
>> -	}
>> -
>> -	synchronize_sched();  /* Force memory barrier on all CPUs. */
>> -
>> -	/*
>> -	 * The preceding synchronize_sched() ensures that any CPU that
>> -	 * sees the new value of sp->completed will also see any preceding
>> -	 * changes to data structures made by this CPU.  This prevents
>> -	 * some other CPU from reordering the accesses in its SRCU
>> -	 * read-side critical section to precede the corresponding
>> -	 * srcu_read_lock() -- ensuring that such references will in
>> -	 * fact be protected.
>> -	 *
>> -	 * So it is now safe to do the flip.
>> -	 */
>> -
>> -	idx = sp->completed & 0x1;
>> -	sp->completed++;
>> -
>> -	synchronize_sched();  /* Force memory barrier on all CPUs. */
>> -
>> -	/*
>> -	 * At this point, because of the preceding synchronize_sched(),
>> -	 * all srcu_read_lock() calls using the old counters have completed.
>> -	 * Their corresponding critical sections might well be still
>> -	 * executing, but the srcu_read_lock() primitives themselves
>> -	 * will have finished executing.
>> -	 */
>> -
>> -	while (srcu_readers_active_idx(sp, idx))
>> -		schedule_timeout_interruptible(1);
>> -
>> -	synchronize_sched();  /* Force memory barrier on all CPUs. */
>> -
>> -	/*
>> -	 * The preceding synchronize_sched() forces all srcu_read_unlock()
>> -	 * primitives that were executing concurrently with the preceding
>> -	 * for_each_possible_cpu() loop to have completed by this point.
>> -	 * More importantly, it also forces the corresponding SRCU read-side
>> -	 * critical sections to have also completed, and the corresponding
>> -	 * references to SRCU-protected data items to be dropped.
>> -	 *
>> -	 * Note:
>> -	 *
>> -	 *	Despite what you might think at first glance, the
>> -	 *	preceding synchronize_sched() -must- be within the
>> -	 *	critical section ended by the following mutex_unlock().
>> -	 *	Otherwise, a task taking the early exit can race
>> -	 *	with a srcu_read_unlock(), which might have executed
>> -	 *	just before the preceding srcu_readers_active() check,
>> -	 *	and whose CPU might have reordered the srcu_read_unlock()
>> -	 *	with the preceding critical section.  In this case, there
>> -	 *	is nothing preventing the synchronize_sched() task that is
>> -	 *	taking the early exit from freeing a data structure that
>> -	 *	is still being referenced (out of order) by the task
>> -	 *	doing the srcu_read_unlock().
>> -	 *
>> -	 *	Alternatively, the comparison with "2" on the early exit
>> -	 *	could be changed to "3", but this increases synchronize_srcu()
>> -	 *	latency for bulk loads.  So the current code is preferred.
>> -	 */
>> -
>> -	mutex_unlock(&sp->mutex);
>> +	__synchronize_srcu(sp, synchronize_sched);
>>  }
> 
> Hmmm...  Why not simply "synchronize_srcu()"?  Is this to allow a smaller
> piece of code to cover all the combinations of Linux kernel versions?

This is for kernels that do not have it.

> 
>>  /**
>> @@ -265,3 +284,166 @@ EXPORT_SYMBOL_GPL(kvm_synchronize_srcu);
>>  EXPORT_SYMBOL_GPL(kvm_srcu_batches_completed);
>>  
>>  #endif
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) && defined(CONFIG_SMP)
>> +
>> +struct sync_req {
>> +	struct list_head list;
>> +	bool pending;
>> +	bool success;
>> +	struct completion done;
>> +};
>> +
>> +static DEFINE_PER_CPU(struct sync_req, sync_req);
>> +static DEFINE_PER_CPU(struct task_struct *, sync_thread);
>> +static DEFINE_MUTEX(rcu_sched_expedited_mutex);
>> +
>> +static long synchronize_sched_expedited_count;
> 
> Sigh!!!  You copied one of my theoretical bugs.
> 
> According to the C standard, overflowing a signed integer is undefined
> (though it has been a -long- time since I have seen hardware that
> did anything other than wrap like you would expect).  My todo list
> includes converting these to "unsigned long".  I can do certainly add
> this instance to my list, but I would of course be quite happy if you
> made the appropriate adjustments.

Will do! :)

> 
> (This effort got preempted by applying lockdep to RCU, given that Thomas
> Gleixner found some non-theoretical RCU-usage bugs.)
> 
>> +static int kvm_rcu_sync_thread(void *data)
>> +{
>> +	int badcpu;
>> +	int cpu = (long)data;
>> +	struct sync_req *req = &per_cpu(sync_req, cpu);
>> +
>> +	set_current_state(TASK_INTERRUPTIBLE);
>> +	while (!kthread_should_stop()) {
>> +		if (!req->pending) {
>> +			schedule();
>> +			set_current_state(TASK_INTERRUPTIBLE);
>> +			continue;
>> +		}
>> +		req->pending = false;
>> +
>> +		preempt_disable();
>> +		badcpu = smp_processor_id();
>> +		if (likely(cpu == badcpu)) {
>> +			req->success = true;
>> +		} else {
>> +			req->success = false;
>> +			WARN_ONCE(1, "kvm_rcu_sync_thread() on CPU %d, "
>> +				  "expected %d\n", badcpu, cpu);
>> +		}
>> +		preempt_enable();
>> +
>> +		complete(&req->done);
>> +	}
>> +	__set_current_state(TASK_RUNNING);
>> +
>> +	return 0;
>> +}
>> +
>> +static void kvm_synchronize_sched_expedited(void)
>> +{
>> +	int cpu;
>> +	bool need_full_sync = 0;
>> +	struct sync_req *req;
>> +	long snap;
>> +	int trycount = 0;
>> +
>> +	smp_mb();  /* ensure prior mod happens before capturing snap. */
>> +	snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
>> +	get_online_cpus();
>> +	while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
>> +		put_online_cpus();
>> +		if (trycount++ < 10)
>> +			udelay(trycount * num_online_cpus());
>> +		else {
>> +			synchronize_sched();
>> +			return;
>> +		}
>> +		if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
>> +			smp_mb(); /* ensure test happens before caller kfree */
>> +			return;
>> +		}
>> +		get_online_cpus();
>> +	}
>> +	for_each_online_cpu(cpu) {
>> +		req = &per_cpu(sync_req, cpu);
>> +		init_completion(&req->done);
>> +		smp_wmb();
>> +		req->pending = true;
>> +		wake_up_process(per_cpu(sync_thread, cpu));
>> +	}
>> +	for_each_online_cpu(cpu) {
>> +		req = &per_cpu(sync_req, cpu);
>> +		wait_for_completion(&req->done);
>> +		if (unlikely(!req->success))
>> +			need_full_sync = 1;
>> +	}
>> +	synchronize_sched_expedited_count++;
>> +	mutex_unlock(&rcu_sched_expedited_mutex);
>> +	put_online_cpus();
>> +	if (need_full_sync)
>> +		synchronize_sched();
>> +}
>> +
>> +/**
>> + * synchronize_srcu_expedited - like synchronize_srcu, but less patient
> 
> "kvm_synchronize_srcu_expedited", right?  ;-)

Yes. But it's actually synchronize_srcu_expedited as we patch all
call-sides in the kvm module code to avoid namespace conflicts on
2.6.32.

> 
>> + * @sp: srcu_struct with which to synchronize.
>> + *
>> + * Flip the completed counter, and wait for the old count to drain to zero.
>> + * As with classic RCU, the updater must use some separate means of
>> + * synchronizing concurrent updates.  Can block; must be called from
>> + * process context.
>> + *
>> + * Note that it is illegal to call synchronize_srcu_expedited()
>> + * from the corresponding SRCU read-side critical section; doing so
>> + * will result in deadlock.  However, it is perfectly legal to call
>> + * synchronize_srcu_expedited() on one srcu_struct from some other
>> + * srcu_struct's read-side critical section.
>> + */
>> +void kvm_synchronize_srcu_expedited(struct srcu_struct *sp)
>> +{
>> +	__synchronize_srcu(sp, kvm_synchronize_sched_expedited);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_synchronize_srcu_expedited);
>> +
>> +int kvm_init_srcu(void)
>> +{
>> +	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
>> +	struct task_struct *p;
>> +	int cpu;
>> +	int err;
>> +
>> +	for_each_online_cpu(cpu) {
>> +		p = kthread_create(kvm_rcu_sync_thread, (void *)(long)cpu,
>> +				   "kvmsrcusync/%d", cpu);
>> +		if (IS_ERR(p))
>> +			goto error_out;
>> +
>> +		kthread_bind(p, cpu);
>> +		sched_setscheduler(p, SCHED_FIFO, &param);
>> +		per_cpu(sync_thread, cpu) = p;
>> +		wake_up_process(p);
>> +	}
>> +	return 0;
>> +
>> +error_out:
>> +	printk(KERN_ERR "kvm: kvmsrcsync for %d failed\n", cpu);
>> +	err = PTR_ERR(p);
>> +	kvm_exit_srcu();
>> +	return err;
>> +}
> 
> CPU hotplug?
> 
>> +void kvm_exit_srcu(void)
>> +{
>> +	int cpu;
>> +
>> +	for_each_online_cpu(cpu)
>> +		if (per_cpu(sync_thread, cpu))
>> +			kthread_stop(per_cpu(sync_thread, cpu));
>> +}
>> +
>> +#else
>> +
>> +int kvm_init_srcu(void)
>> +{
>> +	return 0;
>> +}
>> +
>> +void kvm_exit_srcu(void)
>> +{
>> +}
>> +
>> +#endif
> 
> I do not understand what follows -- a KVM-specific test suite or some
> such?

Patch script that mangles the source when importing it from the kernel
into kvm-kmod. E.g. we patch synchronize_srcu_expedited to redirect it
to our implementation.

> 
>> diff --git a/sync b/sync
>> index 0eaff31..d67469e 100755
>> --- a/sync
>> +++ b/sync
>> @@ -40,8 +40,9 @@ def __hack(data):
>>          'do_machine_check eventfd_signal get_desc_base get_desc_limit '
>>          'vma_kernel_pagesize native_read_tsc user_return_notifier '
>>          'user_return_notifier_register user_return_notifier_unregister '
>> +        'synchronize_srcu_expedited '
>>          )
>> -    anon_inodes = anon_inodes_exit = False
>> +    kvm_init = kvm_exit = False
>>      mce = False
>>      result = []
>>      pr_fmt = ''
>> @@ -58,24 +59,30 @@ def __hack(data):
>>              pr_fmt = sub(r'#define pr_fmt\([^)]*\) ("[^"]*").*', r'\1', line) + ' '
>>              line = ''
>>          line = sub(r'pr_debug\(([^),]*)', r'pr_debug(' + pr_fmt + r'\1', line)
>> -        if match(r'^int kvm_init\('): anon_inodes = True
>> -        if match(r'r = kvm_arch_init\(opaque\);') and anon_inodes:
>> +        if match(r'^int kvm_init\('): kvm_init = True
>> +        if match(r'r = kvm_arch_init\(opaque\);') and kvm_init:
>>              w('\tr = kvm_init_anon_inodes();')
>>              w('\tif (r)')
>>              w('\t\treturn r;\n')
>> +            w('\tr = kvm_init_srcu();')
>> +            w('\tif (r)')
>> +            w('\t\tgoto cleanup_anon_inodes;\n')
>>              w('\tpreempt_notifier_sys_init();')
>>              w('\thrtimer_kallsyms_resolve();\n')
>> -        if match(r'return 0;') and anon_inodes:
>> +        if match(r'return 0;') and kvm_init:
>>              w('\tprintk("loaded kvm module (%s)\\n");\n' % (version,))
>> -        if match(r'return r;') and anon_inodes:
>> -            w('\tkvm_exit_anon_inodes();')
>> +        if match(r'return r;') and kvm_init:
>>              w('\tpreempt_notifier_sys_exit();')
>> -            anon_inodes = False
>> -        if match(r'^void kvm_exit'): anon_inodes_exit = True
>> -        if match(r'\}') and anon_inodes_exit:
>> +            w('\tkvm_exit_srcu();')
>> +            w('cleanup_anon_inodes:')
>>              w('\tkvm_exit_anon_inodes();')
>> +            kvm_init = False
>> +        if match(r'^void kvm_exit'): kvm_exit = True
>> +        if match(r'\}') and kvm_exit:
>>              w('\tpreempt_notifier_sys_exit();')
>> -            anon_inodes_exit = False
>> +            w('\tkvm_exit_srcu();')
>> +            w('\tkvm_exit_anon_inodes();')
>> +            kvm_exit = False
>>          if match(r'^int kvm_arch_init'): kvm_arch_init = True
>>          if match(r'\btsc_khz\b') and kvm_arch_init:
>>              line = sub(r'\btsc_khz\b', 'kvm_tsc_khz', line)
>> -- 
>> 1.6.0.2
>>
> 
> 

Thanks for review!
Jan

[1] http://www.linux-kvm.org/page/Getting_the_kvm_kernel_modules

Attachment:
signature.asc

Description: OpenPGP digital signature