> On 19 Nov 2019, at 20:36, Sean Christopherson <sean.j.christopherson@xxxxxxxxx> wrote: > > On Tue, Nov 19, 2019 at 02:36:28PM +0800, Wanpeng Li wrote: >> From: Wanpeng Li <wanpengli@xxxxxxxxxxx> >> >> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in >> our product observation, multicast IPIs are not as common as unicast >> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc. >> >> This patch tries to optimize x2apic physical destination mode, fixed >> delivery mode single target IPI by delivering IPI to receiver as soon >> as possible after sender writes ICR vmexit to avoid various checks >> when possible, especially when running guest w/ --overcommit cpu-pm=on >> or guest can keep running, IPI can be injected to target vCPU by >> posted-interrupt immediately. >> >> Testing on Xeon Skylake server: >> >> The virtual IPI latency from sender send to receiver receive reduces >> more than 200+ cpu cycles. >> >> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx> >> --- >> v1 -> v2: >> * add tracepoint >> * Instead of a separate vcpu->fast_vmexit, set exit_reason >> to vmx->exit_reason to -1 if the fast path succeeds. >> * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit >> * moving the handling into vmx_handle_exit_irqoff() >> >> arch/x86/include/asm/kvm_host.h | 4 ++-- >> arch/x86/include/uapi/asm/vmx.h | 1 + >> arch/x86/kvm/svm.c | 4 ++-- >> arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++++++++++++++++++++++--- >> arch/x86/kvm/x86.c | 5 +++-- >> 5 files changed, 45 insertions(+), 9 deletions(-) >> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >> index 898ab9e..0daafa9 100644 >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -1084,7 +1084,7 @@ struct kvm_x86_ops { >> void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); >> >> void (*run)(struct kvm_vcpu *vcpu); >> - int (*handle_exit)(struct kvm_vcpu *vcpu); >> + int (*handle_exit)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason); >> int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); >> void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); >> u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); >> @@ -1134,7 +1134,7 @@ struct kvm_x86_ops { >> int (*check_intercept)(struct kvm_vcpu *vcpu, >> struct x86_instruction_info *info, >> enum x86_intercept_stage stage); >> - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); >> + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason); >> bool (*mpx_supported)(void); >> bool (*xsaves_supported)(void); >> bool (*umip_emulated)(void); >> diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h >> index 3eb8411..b33c6e1 100644 >> --- a/arch/x86/include/uapi/asm/vmx.h >> +++ b/arch/x86/include/uapi/asm/vmx.h >> @@ -88,6 +88,7 @@ >> #define EXIT_REASON_XRSTORS 64 >> #define EXIT_REASON_UMWAIT 67 >> #define EXIT_REASON_TPAUSE 68 >> +#define EXIT_REASON_NEED_SKIP_EMULATED_INSN -1 >> >> #define VMX_EXIT_REASONS \ >> { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ > > Rather than pass a custom exit reason around, can we simply handle *all* > x2apic ICR writes during handle_exit_irqoff() for both VMX and SVM? The > only risk I can think of is that KVM could stall too long before enabling > IRQs. > I agree that if it doesn’t cause to run with interrupts disabled then this is a nicer approach. However, I think we may generalise a bit this patch to a clear code-path where accelerated exit handling should be put. See my other reply in this email thread and tell me what you think: https://www.spinics.net/lists/kernel/msg3322282.html -Liran > > From 1ea8ff1aa766928c869ef7c1eb437fe4f7b8daf9 Mon Sep 17 00:00:00 2001 > From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> > Date: Tue, 19 Nov 2019 09:50:42 -0800 > Subject: [PATCH] KVM: x86: Add a fast path for sending virtual IPIs in x2APIC > mode > > Add a fast path to handle writes to the ICR when the local APIC is > emulated in the kernel and x2APIC is enabled. The fast path is invoked > at ->handle_exit_irqoff() to emulate only the effect of the ICR write > itself, i.e. the sending of IPIs. Sending IPIs early in the VM-Exit > flow reduces the latency of virtual IPIs by avoiding the expensive bits > of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock. > > Suggested-by: Wanpeng Li <wanpengli@xxxxxxxxxxx> > Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> > --- > arch/x86/kvm/emulate.c | 1 - > arch/x86/kvm/lapic.c | 5 +++-- > arch/x86/kvm/lapic.h | 25 +++++++++++++++++++++++++ > arch/x86/kvm/svm.c | 3 +++ > arch/x86/kvm/vmx/vmx.c | 2 ++ > 5 files changed, 33 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index 952d1a4f4d7e..8313234e7d64 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -19,7 +19,6 @@ > */ > > #include <linux/kvm_host.h> > -#include "kvm_cache_regs.h" > #include <asm/kvm_emulate.h> > #include <linux/stringify.h> > #include <asm/debugreg.h> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 452cedd6382b..0f02820332d4 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -2655,9 +2655,10 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (reg == APIC_ICR2) > return 1; > > - /* if this is ICR write vector before command */ > + /* ICR writes are handled early by kvm_x2apic_fast_icr_write(). */ > if (reg == APIC_ICR) > - kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); > + return 0; > + > return kvm_lapic_reg_write(apic, reg, (u32)data); > } > > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index c1d77436126a..19fd2734d9e6 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -6,6 +6,8 @@ > > #include <linux/kvm_host.h> > > +#include "kvm_cache_regs.h" > + > #define KVM_APIC_INIT 0 > #define KVM_APIC_SIPI 1 > #define KVM_APIC_LVT_NUM 6 > @@ -245,4 +247,27 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base) > return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); > } > > +/* > + * Fast path for sending virtual IPIs immediately after VM-Exit. Fault > + * detection and injection, e.g. if x2apic is disabled, tracing and/or skipping > + * of the emulated instruction are all handled in the standard WRMSR path, > + * kvm_emulate_wrmsr(). > + */ > +static inline void kvm_x2apic_fast_icr_write(struct kvm_vcpu *vcpu) > +{ > + struct kvm_lapic *apic = vcpu->arch.apic; > + u64 data; > + > + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) > + return; > + > + if (kvm_rcx_read(vcpu) != (APIC_BASE_MSR + (APIC_ICR >> 4))) > + return; > + > + data = kvm_read_edx_eax(vcpu); > + > + kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); > + WARN_ON_ONCE(kvm_lapic_reg_write(apic, APIC_ICR, (u32)data)); > +} > + > #endif > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index d02a73a48461..713510210b29 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -6189,7 +6189,10 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, > > static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) > { > + struct vcpu_svm *svm = to_svm(vcpu); > > + if (svm->vmcb->control.exit_code && svm->vmcb->control.exit_info_1) > + kvm_x2apic_fast_icr_write(vcpu); > } > > static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 621142e55e28..82412c4085fc 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -6231,6 +6231,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) > handle_external_interrupt_irqoff(vcpu); > else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) > handle_exception_nmi_irqoff(vmx); > + else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE) > + kvm_x2apic_fast_icr_write(vcpu); > } > > static bool vmx_has_emulated_msr(int index) > -- > 2.24.0 >