On 08/08/2017 10:42, David Hildenbrand wrote: > >> +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) >> +{ >> + return false; >> +} > > why don't we need an EXPORT_SYMBOL here? Is it used outside the KVM module? I think no architecture actually needs to export it. >> -void kvm_vcpu_on_spin(struct kvm_vcpu *me) >> +void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool me_in_kern) >> { >> struct kvm *kvm = me->kvm; >> struct kvm_vcpu *vcpu; >> @@ -2348,6 +2348,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) >> continue; >> if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) >> continue; >> + if (me_in_kern && !kvm_arch_vcpu_in_kernel(vcpu)) >> + continue; > > > hm, does this patch compile? (me_in_kern) Why not? :) This is what I have: >From d62a40d49f44ff7e789a15416316ef1cba93fa85 Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" <longpeng2@xxxxxxxxxx> Date: Tue, 8 Aug 2017 12:05:32 +0800 Subject: [PATCH 1/4] KVM: add spinlock optimization framework If a vcpu exits due to request a user mode spinlock, then the spinlock-holder may be preempted in user mode or kernel mode. (Note that not all architectures trap spin loops in user mode, only AMD x86 and ARM/ARM64 currently do). But if a vcpu exits in kernel mode, then the holder must be preempted in kernel mode, so we should choose a vcpu in kernel mode as a more likely candidate for the lock holder. This introduces kvm_arch_vcpu_in_kernel() to decide whether the vcpu is in kernel-mode when it's preempted. kvm_vcpu_on_spin's new argument says the same of the spinning VCPU. Signed-off-by: Longpeng(Mike) <longpeng2@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/arm/kvm/handle_exit.c | 2 +- arch/arm64/kvm/handle_exit.c | 2 +- arch/mips/kvm/mips.c | 5 +++++ arch/powerpc/kvm/powerpc.c | 5 +++++ arch/s390/kvm/diag.c | 2 +- arch/s390/kvm/kvm-s390.c | 5 +++++ arch/x86/kvm/hyperv.c | 2 +- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm_host.h | 3 ++- virt/kvm/arm/arm.c | 5 +++++ virt/kvm/kvm_main.c | 4 +++- 13 files changed, 36 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 54442e375354..196122bb6968 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { trace_kvm_wfx(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, false); } else { trace_kvm_wfx(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 17d8a1677a0b..da57622cacca 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, false); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d4b2ad18eef2..70208bed5a15 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return !!(vcpu->arch.pending_exceptions); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1a75c0b5f4ca..6184c45015f3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index ce865bd4f81d..6182edebea3d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, false); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index af09d3437631..0b0c689f1d9a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2447,6 +2447,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_s390_vcpu_has_irq(vcpu, 0); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) { atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index bf9992300efa..5243d54f73ab 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1274,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) switch (code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, false); break; case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2432bb952a30..0cc486fd9871 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3749,7 +3749,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { - kvm_vcpu_on_spin(&(svm->vcpu)); + kvm_vcpu_on_spin(&svm->vcpu, false); return 1; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2c0f5287fb78..fef784c22190 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6781,7 +6781,7 @@ static int handle_pause(struct kvm_vcpu *vcpu) if (ple_gap) grow_ple_window(vcpu); - kvm_vcpu_on_spin(vcpu); + kvm_vcpu_on_spin(vcpu, false); return kvm_skip_emulated_instruction(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33fd6b6419ef..aba9d038d09e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8432,6 +8432,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 28112d7917c1..6882538eda32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -720,7 +720,7 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); -void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); @@ -800,6 +800,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); #ifndef __KVM_HAVE_ARCH_VM_ALLOC diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a39a1e161e63..862f820d06d4 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -416,6 +416,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) && !v->arch.power_off && !v->arch.pause); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + /* Just ensure a guest exit from a particular CPU */ static void exit_vm_noop(void *info) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 15252d723b54..e17c40d986f3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2317,7 +2317,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) #endif } -void kvm_vcpu_on_spin(struct kvm_vcpu *me) +void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; @@ -2348,6 +2348,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; + if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) + continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; -- 1.8.3.1 >From 325c00511ab67d7e46e887ecea0d8b6b91005948 Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" <longpeng2@xxxxxxxxxx> Date: Tue, 8 Aug 2017 12:05:33 +0800 Subject: [PATCH 2/4] KVM: X86: implement the logic for spinlock optimization get_cpl requires vcpu_load, so we must cache the result (whether the vcpu was preempted when its cpl=0) in kvm_vcpu_arch. Signed-off-by: Longpeng(Mike) <longpeng2@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/hyperv.c | 2 +- arch/x86/kvm/svm.c | 5 ++++- arch/x86/kvm/vmx.c | 8 +++++++- arch/x86/kvm/x86.c | 7 ++++++- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fba6d8e..1679aabcabe5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -688,6 +688,9 @@ struct kvm_vcpu_arch { /* GPA available (AMD only) */ bool gpa_available; + + /* be preempted when it's in kernel-mode(cpl=0) */ + bool preempted_in_kernel; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5243d54f73ab..dc97f2544b6f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1274,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) switch (code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu, false); + kvm_vcpu_on_spin(vcpu, true); break; case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0cc486fd9871..fc027f2e87d1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3749,7 +3749,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { - kvm_vcpu_on_spin(&svm->vcpu, false); + struct kvm_vcpu *vcpu = &svm->vcpu; + bool in_kernel = (svm_get_cpl(vcpu) == 0); + + kvm_vcpu_on_spin(vcpu, in_kernel); return 1; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fef784c22190..46d08b389e36 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6781,7 +6781,13 @@ static int handle_pause(struct kvm_vcpu *vcpu) if (ple_gap) grow_ple_window(vcpu); - kvm_vcpu_on_spin(vcpu, false); + /* + * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" + * VM-execution control is ignored if CPL > 0. OTOH, KVM + * never set PAUSE_EXITING and just only uses PLE, + * so the vcpu must be CPL=0 if it gets a PAUSE exit. + */ + kvm_vcpu_on_spin(vcpu, true); return kvm_skip_emulated_instruction(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index aba9d038d09e..bee9ad0ed4cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2873,6 +2873,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; + + if (vcpu->preempted) + vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); + /* * Disable page faults because we're in atomic context here. * kvm_write_guest_offset_cached() would call might_fault() @@ -7985,6 +7989,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; + vcpu->arch.preempted_in_kernel = false; kvm_hv_vcpu_init(vcpu); @@ -8434,7 +8439,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { - return false; + return vcpu->arch.preempted_in_kernel; } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) -- 1.8.3.1 >From c83cf5cf8d972ea37b55db7e059767f5e3af55d0 Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" <longpeng2@xxxxxxxxxx> Date: Tue, 8 Aug 2017 12:05:34 +0800 Subject: [PATCH 3/4] KVM: s390: implements the kvm_arch_vcpu_in_kernel() This implements kvm_arch_vcpu_in_kernel() for s390. DIAG is a privileged operation, so it cannot be called from problem state (user mode). Signed-off-by: Longpeng(Mike) <longpeng2@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/s390/kvm/diag.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 6182edebea3d..5ee90020382d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; - kvm_vcpu_on_spin(vcpu, false); + kvm_vcpu_on_spin(vcpu, true); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0b0c689f1d9a..e46177bde829 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2449,7 +2449,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { - return false; + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); } void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) -- 1.8.3.1 >From 3d7c9a55166d7f0d955774360bca72a13b4ee96c Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" <longpeng2@xxxxxxxxxx> Date: Tue, 8 Aug 2017 12:05:35 +0800 Subject: [PATCH 4/4] KVM: arm: implements the kvm_arch_vcpu_in_kernel() This implements the kvm_arch_vcpu_in_kernel() for ARM, and adjusts the calls to kvm_vcpu_on_spin(). Signed-off-by: Longpeng(Mike) <longpeng2@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/arm/kvm/handle_exit.c | 2 +- arch/arm64/kvm/handle_exit.c | 2 +- virt/kvm/arm/arm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 196122bb6968..cf8bf6bf87c4 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { trace_kvm_wfx(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu, false); + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index da57622cacca..7debb74843a0 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; - kvm_vcpu_on_spin(vcpu, false); + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 862f820d06d4..b9f68e4add71 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -418,7 +418,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { - return false; + return vcpu_mode_priv(vcpu); } /* Just ensure a guest exit from a particular CPU */ -- 1.8.3.1