[PATCH 1/2] kvm: Fix mmu_notifier release race

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The KVM uses mmu_notifier (wherever available) to keep track
of the changes to the mm of the guest. The guest shadow page
tables are released when the VM exits via mmu_notifier->ops.release().
There is a rare chance that the mmu_notifier->release could be
called more than once via two different paths, which could end
up in use-after-free of kvm instance (such as [0]).

e.g:

thread A                                        thread B
-------                                         --------------

 get_signal->                                   kvm_destroy_vm()->
 do_exit->                                        mmu_notifier_unregister->
 exit_mm->                                        kvm_arch_flush_shadow_all()->
 exit_mmap->                                      spin_lock(&kvm->mmu_lock)
 mmu_notifier_release->                           ....
  kvm_arch_flush_shadow_all()->                   .....
  ... spin_lock(&kvm->mmu_lock)                   .....
                                                  spin_unlock(&kvm->mmu_lock)
                                                kvm_arch_free_kvm()
   *** use after free of kvm ***

This patch attempts to solve the problem by holding a reference to the KVM
for the mmu_notifier, which is dropped only from notifier->ops.release().
This will ensure that the KVM struct is available till we reach the
kvm_mmu_notifier_release, and the kvm_destroy_vm is called only from/after
it. So, we can unregister the notifier with no_release option and hence
avoiding the race above. However, we need to make sure that the KVM is
freed only after the mmu_notifier has finished processing the notifier due to
the following possible path of execution :

mmu_notifier_release -> kvm_mmu_notifier_release -> kvm_put_kvm ->
  kvm_destroy_vm -> kvm_arch_free_kvm

[0] http://lkml.kernel.org/r/CAAeHK+x8udHKq9xa1zkTO6ax5E8Dk32HYWfaT05FMchL2cr48g@xxxxxxxxxxxxxx

Fixes: commit 85db06e514422 ("KVM: mmu_notifiers release method")
Reported-by: andreyknvl@xxxxxxxxxx
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx>
Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
Cc: Christoffer Dall <christoffer.dall@xxxxxxxxxx>
Cc: andreyknvl@xxxxxxxxxx
Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
Tested-by: Mark Rutland <mark.rutland@xxxxxxx>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 59 ++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d025074..561e968 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -424,6 +424,7 @@ struct kvm {
 	struct mmu_notifier mmu_notifier;
 	unsigned long mmu_notifier_seq;
 	long mmu_notifier_count;
+	struct rcu_head mmu_notifier_rcu;
 #endif
 	long tlbs_dirty;
 	struct list_head devices;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 88257b3..2c3fdd4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -471,6 +471,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 	idx = srcu_read_lock(&kvm->srcu);
 	kvm_arch_flush_shadow_all(kvm);
 	srcu_read_unlock(&kvm->srcu, idx);
+	kvm_put_kvm(kvm);
 }
 
 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
@@ -486,8 +487,46 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 
 static int kvm_init_mmu_notifier(struct kvm *kvm)
 {
+	int rc;
 	kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
-	return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+	rc = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+	/*
+	 * We hold a reference to KVM here to make sure that the KVM
+	 * doesn't get free'd before ops->release() completes.
+	 */
+	if (!rc)
+		kvm_get_kvm(kvm);
+	return rc;
+}
+
+static void kvm_free_vm_rcu(struct rcu_head *rcu)
+{
+	struct kvm *kvm = container_of(rcu, struct kvm, mmu_notifier_rcu);
+	kvm_arch_free_vm(kvm);
+}
+
+static void kvm_flush_shadow_mmu(struct kvm *kvm)
+{
+	/*
+	 * We hold a reference to kvm instance for mmu_notifier and is
+	 * only released when ops->release() is called via exit_mmap path.
+	 * So, when we reach here ops->release() has been called already, which
+	 * flushes the shadow page tables. Hence there is no need to call the
+	 * release() again when we unregister the notifier. However, we need
+	 * to delay freeing up the kvm until the release() completes, since
+	 * we could reach here via :
+	 *  kvm_mmu_notifier_release() -> kvm_put_kvm() -> kvm_destroy_vm()
+	 */
+	mmu_notifier_unregister_no_release(&kvm->mmu_notifier, kvm->mm);
+}
+
+static void kvm_free_vm(struct kvm *kvm)
+{
+	/*
+	 * Wait until the mmu_notifier has finished the release().
+	 * See comments above in kvm_flush_shadow_mmu.
+	 */
+	mmu_notifier_call_srcu(&kvm->mmu_notifier_rcu, kvm_free_vm_rcu);
 }
 
 #else  /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
@@ -497,6 +536,16 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 	return 0;
 }
 
+static void kvm_flush_shadow_mmu(struct kvm *kvm)
+{
+	kvm_arch_flush_shadow_all(kvm);
+}
+
+static void kvm_free_vm(struct kvm *kvm)
+{
+	kvm_arch_free_vm(kvm);
+}
+
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
 
 static struct kvm_memslots *kvm_alloc_memslots(void)
@@ -733,18 +782,14 @@ static void kvm_destroy_vm(struct kvm *kvm)
 		kvm->buses[i] = NULL;
 	}
 	kvm_coalesced_mmio_free(kvm);
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
-	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
-#else
-	kvm_arch_flush_shadow_all(kvm);
-#endif
+	kvm_flush_shadow_mmu(kvm);
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 		kvm_free_memslots(kvm, kvm->memslots[i]);
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
-	kvm_arch_free_vm(kvm);
+	kvm_free_vm(kvm);
 	preempt_notifier_dec();
 	hardware_disable_all();
 	mmdrop(mm);
-- 
2.7.4




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux