Re: [PATCH 1/4] ARM: KVM: Convert stage-2 mutex to a spinlock

Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> · Tue, 31 Jul 2012 14:27:58 +0200

On Thu, Jul 5, 2012 at 5:03 PM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote:
> Now that we run the exit handler with preemption disabled, we start
> seeing some very ugly things going on:
>
> BUG: scheduling while atomic: qemu-system-arm/1144/0x00000002
> Modules linked in:
> [<c0015f3c>] (unwind_backtrace+0x0/0xf8) from [<c03f54e0>] (__schedule_bug+0x44/0x58)
> [<c03f54e0>] (__schedule_bug+0x44/0x58) from [<c0402690>] (__schedule+0x620/0x644)
> [<c0402690>] (__schedule+0x620/0x644) from [<c0402ad8>] (schedule_preempt_disabled+0x24/0x34)
> [<c0402ad8>] (schedule_preempt_disabled+0x24/0x34) from [<c04016ac>] (__mutex_lock_slowpath+0x120/0x1a8)
> [<c04016ac>] (__mutex_lock_slowpath+0x120/0x1a8) from [<c040176c>] (mutex_lock+0x38/0x3c)
> [<c040176c>] (mutex_lock+0x38/0x3c) from [<c0026ab4>] (user_mem_abort.isra.9+0xbc/0x1fc)
> [<c0026ab4>] (user_mem_abort.isra.9+0xbc/0x1fc) from [<c00273bc>] (kvm_handle_guest_abort+0xdc/0x164)
> [<c00273bc>] (kvm_handle_guest_abort+0xdc/0x164) from [<c0024fcc>] (handle_exit+0x74/0x11c)
> [<c0024fcc>] (handle_exit+0x74/0x11c) from [<c00255c0>] (kvm_arch_vcpu_ioctl_run+0x138/0x21c)
> [<c00255c0>] (kvm_arch_vcpu_ioctl_run+0x138/0x21c) from [<c002214c>] (kvm_vcpu_ioctl+0x3d8/0x6bc)
> [<c002214c>] (kvm_vcpu_ioctl+0x3d8/0x6bc) from [<c00e0990>] (do_vfs_ioctl+0x7c/0x2d0)
> [<c00e0990>] (do_vfs_ioctl+0x7c/0x2d0) from [<c00e0c1c>] (sys_ioctl+0x38/0x5c)
> [<c00e0c1c>] (sys_ioctl+0x38/0x5c) from [<c000eec0>] (ret_fast_syscall+0x0/0x30)
>
> The culprit is the stage-2 lock, which is defined as a mutex, and
> really should be a spinlock.
>
> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
> ---
>  arch/arm/include/asm/kvm_host.h |    2 +-
>  arch/arm/kvm/arm.c              |    2 +-
>  arch/arm/kvm/mmu.c              |   12 ++++++------
>  3 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 0c7e782..9d0cc83 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -40,7 +40,7 @@ struct kvm_arch {
>         u32    vmid;
>
>         /* 1-level 2nd stage table and lock */
> -       struct mutex pgd_mutex;
> +       spinlock_t pgd_lock;
>         pgd_t *pgd;
>
>         /* VTTBR value associated with above pgd and vmid */
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index f3b206a..44691e1 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -97,7 +97,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>         ret = kvm_alloc_stage2_pgd(kvm);
>         if (ret)
>                 goto out_fail_alloc;
> -       mutex_init(&kvm->arch.pgd_mutex);
> +       spin_lock_init(&kvm->arch.pgd_lock);
>
>         ret = create_hyp_mappings(kvm, kvm + 1);
>         if (ret)
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 8c30376..4b174e6 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -354,14 +354,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>                 return -EFAULT;
>         }
>
> -       mutex_lock(&vcpu->kvm->arch.pgd_mutex);
> +       spin_lock(&vcpu->kvm->arch.pgd_lock);
>         new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
>         if (writable)
>                 new_pte |= L_PTE2_WRITE;
>         ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte);
>         if (ret)
>                 put_page(pfn_to_page(pfn));
> -       mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
> +       spin_unlock(&vcpu->kvm->arch.pgd_lock);
>
>         return ret;
>  }
> @@ -611,13 +611,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
>         if (!kvm->arch.pgd)
>                 return 0;
>
> -       mutex_lock(&kvm->arch.pgd_mutex);
> +       spin_lock(&kvm->arch.pgd_lock);

eh, hva_to_gpa calls it's own mutex and may sleep while holding a spinlock...

also stage2_set_pte below allocates memory and may sleep...

Perhaps we should instead call the cpu-local-critical cache functions
on the required CPU and run the whole thing with preemption enabled
instead....?

>         found = hva_to_gpa(kvm, hva, &gpa);
>         if (found) {
>                 stage2_set_pte(kvm, gpa, &null_pte);
>                 __kvm_tlb_flush_vmid(kvm);
>         }
> -       mutex_unlock(&kvm->arch.pgd_mutex);
> +       spin_unlock(&kvm->arch.pgd_lock);
>         return 0;
>  }
>
> @@ -629,7 +629,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
>         if (!kvm->arch.pgd)
>                 return;
>
> -       mutex_lock(&kvm->arch.pgd_mutex);
> +       spin_lock(&kvm->arch.pgd_lock);
>         found = hva_to_gpa(kvm, hva, &gpa);
>         if (found) {
>                 stage2_set_pte(kvm, gpa, &pte);
> @@ -640,5 +640,5 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
>                  */
>                 __kvm_tlb_flush_vmid(kvm);
>         }
> -       mutex_unlock(&kvm->arch.pgd_mutex);
> +       spin_unlock(&kvm->arch.pgd_lock);
>  }
> --
> 1.7.10.3
>
>
_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm