Re: [PATCH v2 38/49] KVM: x86: Initialize guest cpu_caps based on guest CPUID

Maxim Levitsky <mlevitsk@xxxxxxxxxx> · Thu, 04 Jul 2024 22:13:20 -0400

On Fri, 2024-05-17 at 10:39 -0700, Sean Christopherson wrote:
> Initialize a vCPU's capabilities based on the guest CPUID provided by
> userspace instead of simply zeroing the entire array.  This is the first
> step toward using cpu_caps to query *all* CPUID-based guest capabilities,
> i.e. will allow converting all usage of guest_cpuid_has() to
> guest_cpu_cap_has().
> 
> Zeroing the array was the logical choice when using cpu_caps was opt-in,
> e.g. "unsupported" was generally a safer default, and the whole point of
> governed features is that KVM would need to check host and guest support,
> i.e. making everything unsupported by default didn't require more code.
> 
> But requiring KVM to manually "enable" every CPUID-based feature in
> cpu_caps would require an absurd amount of boilerplate code.
> 
> Follow existing CPUID/kvm_cpu_caps nomenclature where possible, e.g. for
> the change() and clear() APIs.  Replace check_and_set() with constrain()
> to try and capture that KVM is constraining userspace's desired guest
> feature set based on KVM's capabilities.
> 
> This is intended to be gigantic nop, i.e. should not have any impact on
> guest or KVM functionality.
> 
> This is also an intermediate step; a future commit will also incorporate
> KVM support into the vCPU's cpu_caps before converting guest_cpuid_has()
> to guest_cpu_cap_has().
> 
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
>  arch/x86/kvm/cpuid.c   | 46 ++++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/cpuid.h   | 25 ++++++++++++++++++++---
>  arch/x86/kvm/svm/svm.c | 28 +++++++++++++------------
>  arch/x86/kvm/vmx/vmx.c |  8 +++++---
>  4 files changed, 86 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 89c506cf649b..fd725cbbcce5 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -381,13 +381,56 @@ static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu)
>  #endif
>  }
>  
> +/*
> + * This isn't truly "unsafe", but except for the cpu_caps initialization code,
> + * all register lookups should use __cpuid_entry_get_reg(), which provides
> + * compile-time validation of the input.
> + */
> +static u32 cpuid_get_reg_unsafe(struct kvm_cpuid_entry2 *entry, u32 reg)
> +{
> +	switch (reg) {
> +	case CPUID_EAX:
> +		return entry->eax;
> +	case CPUID_EBX:
> +		return entry->ebx;
> +	case CPUID_ECX:
> +		return entry->ecx;
> +	case CPUID_EDX:
> +		return entry->edx;
> +	default:
> +		WARN_ON_ONCE(1);
> +		return 0;
> +	}
> +}
> +
>  void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	struct kvm_cpuid_entry2 *best;
> +	struct kvm_cpuid_entry2 *entry;
>  	bool allow_gbpages;
> +	int i;
>  
>  	memset(vcpu->arch.cpu_caps, 0, sizeof(vcpu->arch.cpu_caps));
> +	BUILD_BUG_ON(ARRAY_SIZE(reverse_cpuid) != NR_KVM_CPU_CAPS);
> +
> +	/*
> +	 * Reset guest capabilities to userspace's guest CPUID definition, i.e.
> +	 * honor userspace's definition for features that don't require KVM or
> +	 * hardware management/support (or that KVM simply doesn't care about).
> +	 */
> +	for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
> +		const struct cpuid_reg cpuid = reverse_cpuid[i];
> +
> +		if (!cpuid.function)
> +			continue;
> +
> +		entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
> +		if (!entry)
> +			continue;
> +
> +		vcpu->arch.cpu_caps[i] = cpuid_get_reg_unsafe(entry, cpuid.reg);
> +	}
>  
>  	kvm_update_cpuid_runtime(vcpu);
>  
> @@ -404,8 +447,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  	 */
>  	allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
>  				      guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
> -	if (allow_gbpages)
> -		guest_cpu_cap_set(vcpu, X86_FEATURE_GBPAGES);
> +	guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages);
>  
>  	best = kvm_find_cpuid_entry(vcpu, 1);
>  	if (best && apic) {
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index ad0168d3aec5..c2c2b8aa347b 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -265,11 +265,30 @@ static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
>  	vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
>  }
>  
> -static __always_inline void guest_cpu_cap_check_and_set(struct kvm_vcpu *vcpu,
> -							unsigned int x86_feature)
> +static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
> +						unsigned int x86_feature)
>  {
> -	if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
> +	unsigned int x86_leaf = __feature_leaf(x86_feature);
> +
> +	reverse_cpuid_check(x86_leaf);
> +	vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
> +}
> +
> +static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
> +						 unsigned int x86_feature,
> +						 bool guest_has_cap)
> +{
> +	if (guest_has_cap)
>  		guest_cpu_cap_set(vcpu, x86_feature);
> +	else
> +		guest_cpu_cap_clear(vcpu, x86_feature);
> +}

Assuming that this code is not deleted in following patches, I''ll prefer
to call this 'guest_cpu_cap_change' because this is what the function does.

> +
> +static __always_inline void guest_cpu_cap_constrain(struct kvm_vcpu *vcpu,
> +						    unsigned int x86_feature)
> +{
> +	if (!kvm_cpu_cap_has(x86_feature))
> +		guest_cpu_cap_clear(vcpu, x86_feature);
>  }
>  
>  static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 2acd2e3bb1b0..1bc431a7e862 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4339,27 +4339,29 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  	 * XSS on VM-Enter/VM-Exit.  Failure to do so would effectively give
>  	 * the guest read/write access to the host's XSS.
>  	 */
> -	if (boot_cpu_has(X86_FEATURE_XSAVE) &&
> -	    boot_cpu_has(X86_FEATURE_XSAVES) &&
> -	    guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
> -		guest_cpu_cap_set(vcpu, X86_FEATURE_XSAVES);
> +	guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
> +			     boot_cpu_has(X86_FEATURE_XSAVE) &&
> +			     boot_cpu_has(X86_FEATURE_XSAVES) &&
> +			     guest_cpuid_has(vcpu, X86_FEATURE_XSAVE));
>  
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_NRIPS);
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_LBRV);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_NRIPS);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_TSCRATEMSR);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_LBRV);
>  
>  	/*
>  	 * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
>  	 * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
>  	 * SVM on Intel is bonkers and extremely unlikely to work).
>  	 */
> -	if (!guest_cpuid_is_intel(vcpu))
> -		guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
> +	if (guest_cpuid_is_intel(vcpu))
> +		guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
> +	else
> +		guest_cpu_cap_constrain(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
>  
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_VGIF);
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_VNMI);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_PAUSEFILTER);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_PFTHRESHOLD);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_VGIF);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_VNMI);
>  
>  	svm_recalc_instruction_intercepts(vcpu, svm);
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 1bc56596d653..d873386e1473 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7838,10 +7838,12 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  	 */
>  	if (boot_cpu_has(X86_FEATURE_XSAVE) &&
>  	    guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
> -		guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_XSAVES);
> +		guest_cpu_cap_constrain(vcpu, X86_FEATURE_XSAVES);
> +	else
> +		guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES);
>  
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_VMX);
> -	guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_LAM);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_VMX);
> +	guest_cpu_cap_constrain(vcpu, X86_FEATURE_LAM);
>  
>  	vmx_setup_uret_msrs(vmx);
>  

Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>

Best regards,
	Maxim Levitsky