Re: [PATCH v2 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On 19 Nov 2019, at 8:36, Wanpeng Li <kernellwp@xxxxxxxxx> wrote:
> 
> From: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> 
> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in 
> our product observation, multicast IPIs are not as common as unicast 
> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
> 
> This patch tries to optimize x2apic physical destination mode, fixed 
> delivery mode single target IPI by delivering IPI to receiver as soon 
> as possible after sender writes ICR vmexit to avoid various checks 
> when possible, especially when running guest w/ --overcommit cpu-pm=on
> or guest can keep running, IPI can be injected to target vCPU by 
> posted-interrupt immediately.
> 
> Testing on Xeon Skylake server:
> 
> The virtual IPI latency from sender send to receiver receive reduces 
> more than 200+ cpu cycles.
> 
> Signed-off-by: Wanpeng Li <wanpengli@xxxxxxxxxxx>
> ---
> v1 -> v2:
> * add tracepoint
> * Instead of a separate vcpu->fast_vmexit, set exit_reason
>   to vmx->exit_reason to -1 if the fast path succeeds.
> * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
> * moving the handling into vmx_handle_exit_irqoff()
> 
> arch/x86/include/asm/kvm_host.h |  4 ++--
> arch/x86/include/uapi/asm/vmx.h |  1 +
> arch/x86/kvm/svm.c              |  4 ++--
> arch/x86/kvm/vmx/vmx.c          | 40 +++++++++++++++++++++++++++++++++++++---
> arch/x86/kvm/x86.c              |  5 +++--
> 5 files changed, 45 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 898ab9e..0daafa9 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1084,7 +1084,7 @@ struct kvm_x86_ops {
> 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
> 
> 	void (*run)(struct kvm_vcpu *vcpu);
> -	int (*handle_exit)(struct kvm_vcpu *vcpu);
> +	int (*handle_exit)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason);
> 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
> 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> @@ -1134,7 +1134,7 @@ struct kvm_x86_ops {
> 	int (*check_intercept)(struct kvm_vcpu *vcpu,
> 			       struct x86_instruction_info *info,
> 			       enum x86_intercept_stage stage);
> -	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
> +	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason);
> 	bool (*mpx_supported)(void);
> 	bool (*xsaves_supported)(void);
> 	bool (*umip_emulated)(void);
> diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
> index 3eb8411..b33c6e1 100644
> --- a/arch/x86/include/uapi/asm/vmx.h
> +++ b/arch/x86/include/uapi/asm/vmx.h
> @@ -88,6 +88,7 @@
> #define EXIT_REASON_XRSTORS             64
> #define EXIT_REASON_UMWAIT              67
> #define EXIT_REASON_TPAUSE              68
> +#define EXIT_REASON_NEED_SKIP_EMULATED_INSN -1
> 
> #define VMX_EXIT_REASONS \
> 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d02a73a..c8e063a 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4929,7 +4929,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> 	*info2 = control->exit_info_2;
> }
> 
> -static int handle_exit(struct kvm_vcpu *vcpu)
> +static int handle_exit(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason)
> {
> 	struct vcpu_svm *svm = to_svm(vcpu);
> 	struct kvm_run *kvm_run = vcpu->run;
> @@ -6187,7 +6187,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
> 	return ret;
> }
> 
> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason)
> {
> 
> }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 621142e5..b98198d 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5792,7 +5792,7 @@ void dump_vmcs(void)
>  * The guest has exited.  See if we can fix it or if we need userspace
>  * assistance.
>  */
> -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> +static int vmx_handle_exit(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason)

vmx_handle_exit() should get second parameter by value and not by pointer. As it doesn’t need to modify it.

I would also rename parameter to “accel_exit_completion” to indicate this is additional work that needs to happen to complete accelerated-exit handling.
This parameter should be an enum that currently only have 2 values: ACCEL_EXIT_NONE and ACCEL_EXIT_SKIP_EMUL_INS.

> {
> 	struct vcpu_vmx *vmx = to_vmx(vcpu);
> 	u32 exit_reason = vmx->exit_reason;
> @@ -5878,7 +5878,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> 		}
> 	}
> 
> -	if (exit_reason < kvm_vmx_max_exit_handlers
> +	if (*vcpu_exit_reason == EXIT_REASON_NEED_SKIP_EMULATED_INSN) {
> +		kvm_skip_emulated_instruction(vcpu);
> +		return 1;
> +	} else if (exit_reason < kvm_vmx_max_exit_handlers
> 	    && kvm_vmx_exit_handlers[exit_reason]) {
> #ifdef CONFIG_RETPOLINE
> 		if (exit_reason == EXIT_REASON_MSR_WRITE)
> @@ -6223,7 +6226,36 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
> }
> STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
> 
> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static u32 handle_ipi_fastpath(struct kvm_vcpu *vcpu)
> +{
> +	u32 index;
> +	u64 data;
> +	int ret = 0;
> +
> +	if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic)) {
> +		/*
> +		 * fastpath to IPI target, FIXED+PHYSICAL which is popular
> +		 */
> +		index = kvm_rcx_read(vcpu);
> +		data = kvm_read_edx_eax(vcpu);
> +
> +		if (((index - APIC_BASE_MSR) << 4 == APIC_ICR) &&
> +			((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
> +			((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
> +
> +			trace_kvm_msr_write(index, data);

On a standard EXIT_REASON_MSR_WRITE VMExit, this trace will be printed only after LAPIC emulation logic happens.
You should preserve same ordering.

> +			kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
> +			ret = kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
> +
> +			if (ret == 0)
> +				return EXIT_REASON_NEED_SKIP_EMULATED_INSN;
> +		}
> +	}
> +
> +	return ret;
> +}

Maybe it would be more elegant to modify this function as follows?

static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
    if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
        ((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
        ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {

        kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
        return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
    }

    return 1;
}

static enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
{
    u32 msr = kvm_rcx_read(vcpu);
    u64 data = kvm_read_edx_eax(vcpu);
    int ret = 0;

    switch (msr) {
    case APIC_BASE_MSR + (APIC_ICR >> 4):
        ret = handle_accel_set_x2apic_icr_irqoff(vcpu, msr, data);
        break;
    default:
        return ACCEL_EXIT_NONE;
    }

    if (!ret) {
        trace_kvm_msr_write(msr, data);
        return ACCEL_EXIT_SKIP_EMUL_INS;
    }

    return ACCEL_EXIT_NONE;
}

> +
> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, u32 *exit_reason)
> {
> 	struct vcpu_vmx *vmx = to_vmx(vcpu);
> 
> @@ -6231,6 +6263,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> 		handle_external_interrupt_irqoff(vcpu);
> 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
> 		handle_exception_nmi_irqoff(vmx);
> +	else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> +		*exit_reason = handle_ipi_fastpath(vcpu);

1) This case requires a comment as the only reason it is called here is an optimisation.
In contrast to the other cases which must be called before interrupts are enabled on the host.

2) I would rename handler to handle_accel_set_msr_irqoff().
To signal this handler runs with host interrupts disabled and to make it a general place for accelerating WRMSRs in case we would require more in the future.

-Liran

> }
> 
> static bool vmx_has_emulated_msr(int index)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 991dd01..a53bce3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7981,6 +7981,7 @@ EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
> static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> {
> 	int r;
> +	u32 exit_reason = 0;
> 	bool req_int_win =
> 		dm_request_for_irq_injection(vcpu) &&
> 		kvm_cpu_accept_dm_intr(vcpu);
> @@ -8226,7 +8227,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 	vcpu->mode = OUTSIDE_GUEST_MODE;
> 	smp_wmb();
> 
> -	kvm_x86_ops->handle_exit_irqoff(vcpu);
> +	kvm_x86_ops->handle_exit_irqoff(vcpu, &exit_reason);
> 
> 	/*
> 	 * Consume any pending interrupts, including the possible source of
> @@ -8270,7 +8271,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> 		kvm_lapic_sync_from_vapic(vcpu);
> 
> 	vcpu->arch.gpa_available = false;
> -	r = kvm_x86_ops->handle_exit(vcpu);
> +	r = kvm_x86_ops->handle_exit(vcpu, &exit_reason);
> 	return r;
> 
> cancel_injection:
> -- 
> 2.7.4
> 





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux