On Tue, Jul 31, 2012 at 2:27 PM, Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> wrote: > On Thu, Jul 5, 2012 at 5:03 PM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote: >> Now that we run the exit handler with preemption disabled, we start >> seeing some very ugly things going on: >> >> BUG: scheduling while atomic: qemu-system-arm/1144/0x00000002 >> Modules linked in: >> [<c0015f3c>] (unwind_backtrace+0x0/0xf8) from [<c03f54e0>] (__schedule_bug+0x44/0x58) >> [<c03f54e0>] (__schedule_bug+0x44/0x58) from [<c0402690>] (__schedule+0x620/0x644) >> [<c0402690>] (__schedule+0x620/0x644) from [<c0402ad8>] (schedule_preempt_disabled+0x24/0x34) >> [<c0402ad8>] (schedule_preempt_disabled+0x24/0x34) from [<c04016ac>] (__mutex_lock_slowpath+0x120/0x1a8) >> [<c04016ac>] (__mutex_lock_slowpath+0x120/0x1a8) from [<c040176c>] (mutex_lock+0x38/0x3c) >> [<c040176c>] (mutex_lock+0x38/0x3c) from [<c0026ab4>] (user_mem_abort.isra.9+0xbc/0x1fc) >> [<c0026ab4>] (user_mem_abort.isra.9+0xbc/0x1fc) from [<c00273bc>] (kvm_handle_guest_abort+0xdc/0x164) >> [<c00273bc>] (kvm_handle_guest_abort+0xdc/0x164) from [<c0024fcc>] (handle_exit+0x74/0x11c) >> [<c0024fcc>] (handle_exit+0x74/0x11c) from [<c00255c0>] (kvm_arch_vcpu_ioctl_run+0x138/0x21c) >> [<c00255c0>] (kvm_arch_vcpu_ioctl_run+0x138/0x21c) from [<c002214c>] (kvm_vcpu_ioctl+0x3d8/0x6bc) >> [<c002214c>] (kvm_vcpu_ioctl+0x3d8/0x6bc) from [<c00e0990>] (do_vfs_ioctl+0x7c/0x2d0) >> [<c00e0990>] (do_vfs_ioctl+0x7c/0x2d0) from [<c00e0c1c>] (sys_ioctl+0x38/0x5c) >> [<c00e0c1c>] (sys_ioctl+0x38/0x5c) from [<c000eec0>] (ret_fast_syscall+0x0/0x30) >> >> The culprit is the stage-2 lock, which is defined as a mutex, and >> really should be a spinlock. >> >> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> >> --- >> arch/arm/include/asm/kvm_host.h | 2 +- >> arch/arm/kvm/arm.c | 2 +- >> arch/arm/kvm/mmu.c | 12 ++++++------ >> 3 files changed, 8 insertions(+), 8 deletions(-) >> >> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h >> index 0c7e782..9d0cc83 100644 >> --- a/arch/arm/include/asm/kvm_host.h >> +++ b/arch/arm/include/asm/kvm_host.h >> @@ -40,7 +40,7 @@ struct kvm_arch { >> u32 vmid; >> >> /* 1-level 2nd stage table and lock */ >> - struct mutex pgd_mutex; >> + spinlock_t pgd_lock; >> pgd_t *pgd; >> >> /* VTTBR value associated with above pgd and vmid */ >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >> index f3b206a..44691e1 100644 >> --- a/arch/arm/kvm/arm.c >> +++ b/arch/arm/kvm/arm.c >> @@ -97,7 +97,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) >> ret = kvm_alloc_stage2_pgd(kvm); >> if (ret) >> goto out_fail_alloc; >> - mutex_init(&kvm->arch.pgd_mutex); >> + spin_lock_init(&kvm->arch.pgd_lock); >> >> ret = create_hyp_mappings(kvm, kvm + 1); >> if (ret) >> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c >> index 8c30376..4b174e6 100644 >> --- a/arch/arm/kvm/mmu.c >> +++ b/arch/arm/kvm/mmu.c >> @@ -354,14 +354,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, >> return -EFAULT; >> } >> >> - mutex_lock(&vcpu->kvm->arch.pgd_mutex); >> + spin_lock(&vcpu->kvm->arch.pgd_lock); >> new_pte = pfn_pte(pfn, PAGE_KVM_GUEST); >> if (writable) >> new_pte |= L_PTE2_WRITE; >> ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte); >> if (ret) >> put_page(pfn_to_page(pfn)); >> - mutex_unlock(&vcpu->kvm->arch.pgd_mutex); >> + spin_unlock(&vcpu->kvm->arch.pgd_lock); >> >> return ret; >> } >> @@ -611,13 +611,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) >> if (!kvm->arch.pgd) >> return 0; >> >> - mutex_lock(&kvm->arch.pgd_mutex); >> + spin_lock(&kvm->arch.pgd_lock); > > eh, hva_to_gpa calls it's own mutex and may sleep while holding a spinlock... > > also stage2_set_pte below allocates memory and may sleep... > > Perhaps we should instead call the cpu-local-critical cache functions > on the required CPU and run the whole thing with preemption enabled > instead....? > In fact, it looks simple enough to me (comments please): commit 95d11541f77783f00b8799a94ed99f1b1b321b1a Author: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> Date: Tue Jul 31 10:00:29 2012 -0400 ARM: KVM: Run exit handling under preemption again This makes for a much cleaner approach with less worries during exit handling and allows for correct cache maintenance for set/way operations on SMP, at the cost of taking a slight performance hit when the guest does set/way based cache operations _and_ when we are preempted between guest exit and emulation code. Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e50d9b1..0006264 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -113,6 +113,7 @@ struct kvm_vcpu_arch { instructions */ /* dcache set/way operation pending */ + int last_pcpu; cpumask_t require_dcache_flush; /* IO related fields */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 52e449e..38e9448 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -285,8 +285,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* * Check whether this vcpu requires the cache to be flushed on * this physical CPU. This is a consequence of doing dcache - * operations by set/way on this vcpu. We do it here in order - * to be in a non-preemptible section. + * operations by set/way on this vcpu. We do it here to be in + * a non-preemptible section. */ if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ @@ -535,15 +535,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) cond_resched(); update_vttbr(vcpu->kvm); - /* - * Make sure preemption is disabled while calling handle_exit - * as exit handling touches CPU-specific resources, such as - * caches, and we must stay on the same CPU. - * - * Code that might sleep must enable preemption and access - * CPU-specific resources first. - */ - preempt_disable(); local_irq_disable(); /* @@ -556,7 +547,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { local_irq_enable(); - preempt_enable(); continue; } @@ -572,6 +562,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = __kvm_vcpu_run(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; + vcpu->arch.last_pcpu = smp_processor_id(); vcpu->stat.exits++; kvm_guest_exit(); trace_kvm_exit(vcpu->arch.regs.pc); @@ -582,7 +573,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) *************************************************************/ ret = handle_exit(vcpu, run, ret); - preempt_enable(); } if (vcpu->sigset_active) diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index 039fc3a..4a89710 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -20,6 +20,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_host.h> #include <asm/kvm_emulate.h> +#include <asm/cacheflush.h> #include <trace/events/kvm.h> #include "trace.h" @@ -299,6 +300,18 @@ static bool write_dcsw(struct kvm_vcpu *vcpu, unsigned long cp15_reg) { u32 val; + int cpu; + + cpu = get_cpu(); + + cpumask_setall(&vcpu->arch.require_dcache_flush); + cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); + + /* If we were already preempted, take the long way around */ + if (cpu != vcpu->arch.last_pcpu) { + flush_cache_all(); + goto done; + } val = *vcpu_reg(vcpu, p->Rt1); @@ -313,8 +326,8 @@ static bool write_dcsw(struct kvm_vcpu *vcpu, break; } - cpumask_setall(&vcpu->arch.require_dcache_flush); - cpumask_clear_cpu(vcpu->cpu, &vcpu->arch.require_dcache_flush); +done: + put_cpu(); return true; } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 24769af..b4927ce 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -341,10 +341,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* preemption disabled for handle_exit, gfn_to_pfn may sleep */ - preempt_enable(); pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); - preempt_disable(); if (is_error_pfn(pfn)) { put_page(pfn_to_page(pfn)); _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm