1. Implements the kvm_arch_vcpu_in_kernel(), because get_cpl requires vcpu_load, so we must cache the result(whether the vcpu was preempted when its cpl=0) in kvm_vcpu_arch. 2. Add ->spin_in_kernel hook, because we can get benefit from VMX. Signed-off-by: Longpeng(Mike) <longpeng2@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 5 +++++ arch/x86/kvm/hyperv.c | 2 +- arch/x86/kvm/svm.c | 8 +++++++- arch/x86/kvm/vmx.c | 16 +++++++++++++++- arch/x86/kvm/x86.c | 7 ++++++- 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fb..d2b2d57 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -688,6 +688,9 @@ struct kvm_vcpu_arch { /* GPA available (AMD only) */ bool gpa_available; + + /* be preempted when it's in kernel-mode(cpl=0) */ + bool preempted_in_kernel; }; struct kvm_lpage_info { @@ -1057,6 +1060,8 @@ struct kvm_x86_ops { void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); void (*setup_mce)(struct kvm_vcpu *vcpu); + + bool (*spin_in_kernel)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index cd0e6e6..dec5e8a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1268,7 +1268,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) switch (code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu)); + kvm_vcpu_on_spin(vcpu, kvm_x86_ops->spin_in_kernel(vcpu)); break; case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e6ed24e..ccb6df7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3751,7 +3751,7 @@ static int pause_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &(svm->vcpu); - kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu)); + kvm_vcpu_on_spin(vcpu, kvm_x86_ops->spin_in_kernel(vcpu)); return 1; } @@ -5364,6 +5364,11 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) vcpu->arch.mcg_cap &= 0x1ff; } +static bool svm_spin_in_kernel(struct kvm_vcpu *vcpu) +{ + return svm_get_cpl(vcpu) == 0; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -5476,6 +5481,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) .deliver_posted_interrupt = svm_deliver_avic_intr, .update_pi_irte = svm_update_pi_irte, .setup_mce = svm_setup_mce, + .spin_in_kernel = svm_spin_in_kernel, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9d6223a..297a158 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6761,7 +6761,8 @@ static int handle_pause(struct kvm_vcpu *vcpu) if (ple_gap) grow_ple_window(vcpu); - kvm_vcpu_on_spin(vcpu, kvm_arch_vcpu_in_kernel(vcpu)); + /* See comments in vmx_spin_in_kernel() */ + kvm_vcpu_on_spin(vcpu, true); return kvm_skip_emulated_instruction(vcpu); } @@ -11636,6 +11637,17 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEATURE_CONTROL_LMCE; } +static bool vmx_spin_in_kernel(struct kvm_vcpu *vcpu) +{ + /* + * Intel sdm vol3 ch-25.1.3 says: The “PAUSE-loop exiting” + * VM-execution control is ignored if CPL > 0. OTOH, KVM + * never set PAUSE_EXITING and just set PLE if supported, + * so the vcpu must be CPL=0 if it gets a PAUSE exit. + */ + return true; +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -11763,6 +11775,8 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) #endif .setup_mce = vmx_setup_mce, + + .spin_in_kernel = vmx_spin_in_kernel, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4430be6..28299b9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2881,6 +2881,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; + + if (vcpu->preempted) + vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); + /* * Disable page faults because we're in atomic context here. * kvm_write_guest_offset_cached() would call might_fault() @@ -7992,6 +7996,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; + vcpu->arch.preempted_in_kernel = false; kvm_hv_vcpu_init(vcpu); @@ -8441,7 +8446,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { - return false; + return vcpu->arch.preempted_in_kernel; } EXPORT_SYMBOL_GPL(kvm_arch_vcpu_in_kernel); -- 1.8.3.1