Now that we run the exit handler with preemption disabled, we start seeing some very ugly things going on: BUG: scheduling while atomic: qemu-system-arm/1144/0x00000002 Modules linked in: [<c0015f3c>] (unwind_backtrace+0x0/0xf8) from [<c03f54e0>] (__schedule_bug+0x44/0x58) [<c03f54e0>] (__schedule_bug+0x44/0x58) from [<c0402690>] (__schedule+0x620/0x644) [<c0402690>] (__schedule+0x620/0x644) from [<c0402ad8>] (schedule_preempt_disabled+0x24/0x34) [<c0402ad8>] (schedule_preempt_disabled+0x24/0x34) from [<c04016ac>] (__mutex_lock_slowpath+0x120/0x1a8) [<c04016ac>] (__mutex_lock_slowpath+0x120/0x1a8) from [<c040176c>] (mutex_lock+0x38/0x3c) [<c040176c>] (mutex_lock+0x38/0x3c) from [<c0026ab4>] (user_mem_abort.isra.9+0xbc/0x1fc) [<c0026ab4>] (user_mem_abort.isra.9+0xbc/0x1fc) from [<c00273bc>] (kvm_handle_guest_abort+0xdc/0x164) [<c00273bc>] (kvm_handle_guest_abort+0xdc/0x164) from [<c0024fcc>] (handle_exit+0x74/0x11c) [<c0024fcc>] (handle_exit+0x74/0x11c) from [<c00255c0>] (kvm_arch_vcpu_ioctl_run+0x138/0x21c) [<c00255c0>] (kvm_arch_vcpu_ioctl_run+0x138/0x21c) from [<c002214c>] (kvm_vcpu_ioctl+0x3d8/0x6bc) [<c002214c>] (kvm_vcpu_ioctl+0x3d8/0x6bc) from [<c00e0990>] (do_vfs_ioctl+0x7c/0x2d0) [<c00e0990>] (do_vfs_ioctl+0x7c/0x2d0) from [<c00e0c1c>] (sys_ioctl+0x38/0x5c) [<c00e0c1c>] (sys_ioctl+0x38/0x5c) from [<c000eec0>] (ret_fast_syscall+0x0/0x30) The culprit is the stage-2 lock, which is defined as a mutex, and really should be a spinlock. Signed-off-by: Marc Zyngier <marc.zyngier at arm.com> --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/kvm/arm.c | 2 +- arch/arm/kvm/mmu.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c7e782..9d0cc83 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -40,7 +40,7 @@ struct kvm_arch { u32 vmid; /* 1-level 2nd stage table and lock */ - struct mutex pgd_mutex; + spinlock_t pgd_lock; pgd_t *pgd; /* VTTBR value associated with above pgd and vmid */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f3b206a..44691e1 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -97,7 +97,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ret = kvm_alloc_stage2_pgd(kvm); if (ret) goto out_fail_alloc; - mutex_init(&kvm->arch.pgd_mutex); + spin_lock_init(&kvm->arch.pgd_lock); ret = create_hyp_mappings(kvm, kvm + 1); if (ret) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 8c30376..4b174e6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -354,14 +354,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - mutex_lock(&vcpu->kvm->arch.pgd_mutex); + spin_lock(&vcpu->kvm->arch.pgd_lock); new_pte = pfn_pte(pfn, PAGE_KVM_GUEST); if (writable) new_pte |= L_PTE2_WRITE; ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte); if (ret) put_page(pfn_to_page(pfn)); - mutex_unlock(&vcpu->kvm->arch.pgd_mutex); + spin_unlock(&vcpu->kvm->arch.pgd_lock); return ret; } @@ -611,13 +611,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) if (!kvm->arch.pgd) return 0; - mutex_lock(&kvm->arch.pgd_mutex); + spin_lock(&kvm->arch.pgd_lock); found = hva_to_gpa(kvm, hva, &gpa); if (found) { stage2_set_pte(kvm, gpa, &null_pte); __kvm_tlb_flush_vmid(kvm); } - mutex_unlock(&kvm->arch.pgd_mutex); + spin_unlock(&kvm->arch.pgd_lock); return 0; } @@ -629,7 +629,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) if (!kvm->arch.pgd) return; - mutex_lock(&kvm->arch.pgd_mutex); + spin_lock(&kvm->arch.pgd_lock); found = hva_to_gpa(kvm, hva, &gpa); if (found) { stage2_set_pte(kvm, gpa, &pte); @@ -640,5 +640,5 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) */ __kvm_tlb_flush_vmid(kvm); } - mutex_unlock(&kvm->arch.pgd_mutex); + spin_unlock(&kvm->arch.pgd_lock); } -- 1.7.10.3