On Fri, 2024-05-17 at 10:39 -0700, Sean Christopherson wrote: > Initialize a vCPU's capabilities based on the guest CPUID provided by > userspace instead of simply zeroing the entire array. This is the first > step toward using cpu_caps to query *all* CPUID-based guest capabilities, > i.e. will allow converting all usage of guest_cpuid_has() to > guest_cpu_cap_has(). > > Zeroing the array was the logical choice when using cpu_caps was opt-in, > e.g. "unsupported" was generally a safer default, and the whole point of > governed features is that KVM would need to check host and guest support, > i.e. making everything unsupported by default didn't require more code. > > But requiring KVM to manually "enable" every CPUID-based feature in > cpu_caps would require an absurd amount of boilerplate code. > > Follow existing CPUID/kvm_cpu_caps nomenclature where possible, e.g. for > the change() and clear() APIs. Replace check_and_set() with constrain() > to try and capture that KVM is constraining userspace's desired guest > feature set based on KVM's capabilities. > > This is intended to be gigantic nop, i.e. should not have any impact on > guest or KVM functionality. > > This is also an intermediate step; a future commit will also incorporate > KVM support into the vCPU's cpu_caps before converting guest_cpuid_has() > to guest_cpu_cap_has(). > > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/kvm/cpuid.c | 46 ++++++++++++++++++++++++++++++++++++++++-- > arch/x86/kvm/cpuid.h | 25 ++++++++++++++++++++--- > arch/x86/kvm/svm/svm.c | 28 +++++++++++++------------ > arch/x86/kvm/vmx/vmx.c | 8 +++++--- > 4 files changed, 86 insertions(+), 21 deletions(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 89c506cf649b..fd725cbbcce5 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -381,13 +381,56 @@ static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu) > #endif > } > > +/* > + * This isn't truly "unsafe", but except for the cpu_caps initialization code, > + * all register lookups should use __cpuid_entry_get_reg(), which provides > + * compile-time validation of the input. > + */ > +static u32 cpuid_get_reg_unsafe(struct kvm_cpuid_entry2 *entry, u32 reg) > +{ > + switch (reg) { > + case CPUID_EAX: > + return entry->eax; > + case CPUID_EBX: > + return entry->ebx; > + case CPUID_ECX: > + return entry->ecx; > + case CPUID_EDX: > + return entry->edx; > + default: > + WARN_ON_ONCE(1); > + return 0; > + } > +} > + > void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) > { > struct kvm_lapic *apic = vcpu->arch.apic; > struct kvm_cpuid_entry2 *best; > + struct kvm_cpuid_entry2 *entry; > bool allow_gbpages; > + int i; > > memset(vcpu->arch.cpu_caps, 0, sizeof(vcpu->arch.cpu_caps)); > + BUILD_BUG_ON(ARRAY_SIZE(reverse_cpuid) != NR_KVM_CPU_CAPS); > + > + /* > + * Reset guest capabilities to userspace's guest CPUID definition, i.e. > + * honor userspace's definition for features that don't require KVM or > + * hardware management/support (or that KVM simply doesn't care about). > + */ > + for (i = 0; i < NR_KVM_CPU_CAPS; i++) { > + const struct cpuid_reg cpuid = reverse_cpuid[i]; > + > + if (!cpuid.function) > + continue; > + > + entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index); > + if (!entry) > + continue; > + > + vcpu->arch.cpu_caps[i] = cpuid_get_reg_unsafe(entry, cpuid.reg); > + } > > kvm_update_cpuid_runtime(vcpu); > > @@ -404,8 +447,7 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) > */ > allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) : > guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES); > - if (allow_gbpages) > - guest_cpu_cap_set(vcpu, X86_FEATURE_GBPAGES); > + guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages); > > best = kvm_find_cpuid_entry(vcpu, 1); > if (best && apic) { > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > index ad0168d3aec5..c2c2b8aa347b 100644 > --- a/arch/x86/kvm/cpuid.h > +++ b/arch/x86/kvm/cpuid.h > @@ -265,11 +265,30 @@ static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu, > vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature); > } > > -static __always_inline void guest_cpu_cap_check_and_set(struct kvm_vcpu *vcpu, > - unsigned int x86_feature) > +static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu, > + unsigned int x86_feature) > { > - if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature)) > + unsigned int x86_leaf = __feature_leaf(x86_feature); > + > + reverse_cpuid_check(x86_leaf); > + vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); > +} > + > +static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu, > + unsigned int x86_feature, > + bool guest_has_cap) > +{ > + if (guest_has_cap) > guest_cpu_cap_set(vcpu, x86_feature); > + else > + guest_cpu_cap_clear(vcpu, x86_feature); > +} Assuming that this code is not deleted in following patches, I''ll prefer to call this 'guest_cpu_cap_change' because this is what the function does. > + > +static __always_inline void guest_cpu_cap_constrain(struct kvm_vcpu *vcpu, > + unsigned int x86_feature) > +{ > + if (!kvm_cpu_cap_has(x86_feature)) > + guest_cpu_cap_clear(vcpu, x86_feature); > } > > static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu, > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 2acd2e3bb1b0..1bc431a7e862 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -4339,27 +4339,29 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) > * XSS on VM-Enter/VM-Exit. Failure to do so would effectively give > * the guest read/write access to the host's XSS. > */ > - if (boot_cpu_has(X86_FEATURE_XSAVE) && > - boot_cpu_has(X86_FEATURE_XSAVES) && > - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) > - guest_cpu_cap_set(vcpu, X86_FEATURE_XSAVES); > + guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES, > + boot_cpu_has(X86_FEATURE_XSAVE) && > + boot_cpu_has(X86_FEATURE_XSAVES) && > + guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)); > > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_NRIPS); > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR); > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_LBRV); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_NRIPS); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_TSCRATEMSR); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_LBRV); > > /* > * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that > * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing > * SVM on Intel is bonkers and extremely unlikely to work). > */ > - if (!guest_cpuid_is_intel(vcpu)) > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); > + if (guest_cpuid_is_intel(vcpu)) > + guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); > + else > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); > > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER); > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD); > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_VGIF); > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_VNMI); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_PAUSEFILTER); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_PFTHRESHOLD); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_VGIF); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_VNMI); > > svm_recalc_instruction_intercepts(vcpu, svm); > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 1bc56596d653..d873386e1473 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -7838,10 +7838,12 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) > */ > if (boot_cpu_has(X86_FEATURE_XSAVE) && > guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_XSAVES); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_XSAVES); > + else > + guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES); > > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_VMX); > - guest_cpu_cap_check_and_set(vcpu, X86_FEATURE_LAM); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_VMX); > + guest_cpu_cap_constrain(vcpu, X86_FEATURE_LAM); > > vmx_setup_uret_msrs(vmx); > Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Best regards, Maxim Levitsky