Re: [PATCH] KVM: nVMX: nested VPID emulation

Bandan Das <bsd@xxxxxxxxxx> · Mon, 14 Sep 2015 21:38:03 +0530

Wanpeng Li <wanpeng.li@xxxxxxxxxxx> writes:

> VPID is used to tag address space and avoid a TLB flush. Currently L0 use 
> the same VPID to run L1 and all its guests. KVM flushes VPID when switching 
> between L1 and L2. 
>
> This patch advertises VPID to the L1 hypervisor, then address space of L1 and 
> L2 can be separately treated and avoid TLB flush when swithing between L1 and 
> L2. This patch gets ~3x performance improvement for lmbench 8p/64k ctxsw.

TLB flush does context invalidation and while that should result in
some improvement, I never expected a 3x improvement for any workload!
Interesting :)

> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
>  arch/x86/kvm/vmx.c | 39 ++++++++++++++++++++++++++++++++-------
>  1 file changed, 32 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index da1590e..06bc31e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1157,6 +1157,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
>  	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
>  }
>  
> +static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
> +{
> +	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
> +}
> +
>  static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
>  {
>  	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
> @@ -2471,6 +2476,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>  		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>  		SECONDARY_EXEC_RDTSCP |
>  		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
> +		SECONDARY_EXEC_ENABLE_VPID |
>  		SECONDARY_EXEC_APIC_REGISTER_VIRT |
>  		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
>  		SECONDARY_EXEC_WBINVD_EXITING |
> @@ -4160,7 +4166,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
>  	int vpid;
>  
>  	vmx->vpid = 0;
> -	if (!enable_vpid)
> +	if (!enable_vpid || is_guest_mode(&vmx->vcpu))
>  		return;
>  	spin_lock(&vmx_vpid_lock);
>  	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> @@ -6738,6 +6744,14 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
>  	}
>  	vmcs12 = kmap(page);
>  	vmcs12->launch_state = 0;
> +	if (enable_vpid) {
> +		if (nested_cpu_has_vpid(vmcs12)) {
> +			spin_lock(&vmx_vpid_lock);
> +			if (vmcs12->virtual_processor_id != 0)
> +				__clear_bit(vmcs12->virtual_processor_id, vmx_vpid_bitmap);
> +			spin_unlock(&vmx_vpid_lock);
> +		}
> +	}
>  	kunmap(page);
>  	nested_release_page(page);

I don't think this is enough, we should also check for set "nested" bits
in free_vpid() and clear them. There should be some sort of a mapping between the
nested guest vpid and the actual vpid so that we can just clear those bits.

> @@ -9189,6 +9203,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 exec_control;
> +	int vpid;
>  
>  	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>  	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> @@ -9438,13 +9453,21 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	else
>  		vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
>  
> +

Empty space here.

>  	if (enable_vpid) {
> -		/*
> -		 * Trivially support vpid by letting L2s share their parent
> -		 * L1's vpid. TODO: move to a more elaborate solution, giving
> -		 * each L2 its own vpid and exposing the vpid feature to L1.
> -		 */
> -		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> +		if (nested_cpu_has_vpid(vmcs12)) {
> +			if (vmcs12->virtual_processor_id == 0) {

Ok, so if we advertise vpid to the nested hypervisor, isn't it going to
attempt writing this field when setting up ? Atleast
that's what Linux does, no ?

> +				spin_lock(&vmx_vpid_lock);
> +				vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> +				if (vpid < VMX_NR_VPIDS)
> +					__set_bit(vpid, vmx_vpid_bitmap);
> +				spin_unlock(&vmx_vpid_lock);
> +				vmcs_write16(VIRTUAL_PROCESSOR_ID, vpid);
> +			} else
> +				vmcs_write16(VIRTUAL_PROCESSOR_ID, vmcs12->virtual_processor_id);
> +		} else
> +			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> +

I guess L1 shouldn't know what vpid L0 chose to run L2. If L1 vmreads,
it should get what it expects for the value of vpid, not the one L0 chose.

>  		vmx_flush_tlb(vcpu);
>  	}

So, this isn't removed ? I thought it's not needed anymore ?

> @@ -9973,6 +9996,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  		vmcs12_save_pending_event(vcpu, vmcs12);
>  	}
>  
> +	if (nested_cpu_has_vpid(vmcs12))
> +		vmcs12->virtual_processor_id = vmcs_read16(VIRTUAL_PROCESSOR_ID);
>  	/*
>  	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
>  	 * preserved above and would only end up incorrectly in L1.
Date: Mon, 14 Sep 2015 21:37:52 +0530
In-Reply-To: <BLU436-SMTP172159ECF89EA5C21830024805D0@xxxxxxx> (Wanpeng Li's
	message of "Mon, 14 Sep 2015 20:52:23 +0800")
Message-ID: <jpgh9mxt25j.fsf@xxxxxxxxxxxxxxxxxxxxx>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html