Only for walking the list of VMs, we do not need to hold the preemption disabling kvm_lock. Convert stat services, the cpufreq callback and mmu_shrink to RCU. For the latter, special care is required to synchronize its list_move_tail with kvm_destroy_vm. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- arch/x86/kvm/mmu.c | 14 +++++++++----- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 17 ++++++++++------- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b6a9963..e9d0ed8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3587,9 +3587,9 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) if (nr_to_scan == 0) goto out; - raw_spin_lock(&kvm_lock); + rcu_read_lock(); - list_for_each_entry(kvm, &vm_list, vm_list) { + list_for_each_entry_rcu(kvm, &vm_list, vm_list) { int idx, freed_pages; LIST_HEAD(invalid_list); @@ -3607,10 +3607,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); } - if (kvm_freed) - list_move_tail(&kvm_freed->vm_list, &vm_list); + if (kvm_freed) { + raw_spin_lock(&kvm_lock); + if (!kvm->deleted) + list_move_tail(&kvm_freed->vm_list, &vm_list); + raw_spin_unlock(&kvm_lock); + } - raw_spin_unlock(&kvm_lock); + rcu_read_unlock(); out: return percpu_counter_read_positive(&kvm_total_used_mmu_pages); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae6e75b..01fa1ea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4578,7 +4578,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - raw_spin_lock(&kvm_lock); + rcu_read_lock(); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) @@ -4588,7 +4588,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va send_ipi = 1; } } - raw_spin_unlock(&kvm_lock); + rcu_read_unlock(); if (freq->old < freq->new && send_ipi) { /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c8dee22..9074cac 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,6 +234,7 @@ struct kvm { #endif struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; atomic_t online_vcpus; + bool deleted; struct list_head vm_list; struct mutex lock; struct kvm_io_bus *buses[KVM_NR_BUSES]; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7b70c67..b5a05b5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -546,9 +546,13 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); + raw_spin_lock(&kvm_lock); list_del(&kvm->vm_list); + kvm->deleted = true; raw_spin_unlock(&kvm_lock); + synchronize_rcu(); + kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); @@ -2347,10 +2351,10 @@ static int vm_stat_get(void *_offset, u64 *val) struct kvm *kvm; *val = 0; - raw_spin_lock(&kvm_lock); - list_for_each_entry(kvm, &vm_list, vm_list) + rcu_read_lock(); + list_for_each_entry_rcu(kvm, &vm_list, vm_list) *val += *(u32 *)((void *)kvm + offset); - raw_spin_unlock(&kvm_lock); + rcu_read_unlock(); return 0; } @@ -2364,12 +2368,11 @@ static int vcpu_stat_get(void *_offset, u64 *val) int i; *val = 0; - raw_spin_lock(&kvm_lock); - list_for_each_entry(kvm, &vm_list, vm_list) + rcu_read_lock(); + list_for_each_entry_rcu(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) *val += *(u32 *)((void *)vcpu + offset); - - raw_spin_unlock(&kvm_lock); + rcu_read_unlock(); return 0; } -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html