On Thu, Nov 24, 2022, Yang Weijiang wrote: > return vmcs_config.cpu_based_2nd_exec_ctrl & > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index b28be793de29..59bdd9873fb5 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -2360,6 +2360,10 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 > if (guest_efer != host_efer) > exec_control |= VM_ENTRY_LOAD_IA32_EFER; > } > + > + if (cpu_has_vmx_arch_lbr()) > + exec_control &= ~VM_ENTRY_LOAD_IA32_LBR_CTL; > + > vm_entry_controls_set(vmx, exec_control); > > /* > @@ -2374,6 +2378,10 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 > exec_control |= VM_EXIT_LOAD_IA32_EFER; > else > exec_control &= ~VM_EXIT_LOAD_IA32_EFER; > + > + if (cpu_has_vmx_arch_lbr()) > + exec_control &= ~VM_EXIT_CLEAR_IA32_LBR_CTL; This is wrong. If KVM doesn't enumerate suport to L1, then L1's value needs to be preserved on entry/exit to/from L1<->L2. I.e. just delete these lines. > vm_exit_controls_set(vmx, exec_control); > > /* > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 2ab4c33b5008..359da38a19a1 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -2599,6 +2599,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, > { VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER }, > { VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS }, > { VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL }, > + { VM_ENTRY_LOAD_IA32_LBR_CTL, VM_EXIT_CLEAR_IA32_LBR_CTL }, > }; > > memset(vmcs_conf, 0, sizeof(*vmcs_conf)); > @@ -4794,6 +4795,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > vpid_sync_context(vmx->vpid); > > vmx_update_fb_clear_dis(vcpu, vmx); > + > + if (!init_event && cpu_has_vmx_arch_lbr()) > + vmcs_write64(GUEST_IA32_LBR_CTL, 0); This belongs in init_vmcs(). > } > > static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) > @@ -6191,6 +6195,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu) > vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) > pr_err("PerfGlobCtl = 0x%016llx\n", > vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); > + if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR) && Follow every other field and check the VMCS support, not the cap. > + vmentry_ctl & VM_ENTRY_LOAD_IA32_LBR_CTL) > + pr_err("ArchLBRCtl = 0x%016llx\n", > + vmcs_read64(GUEST_IA32_LBR_CTL)); > if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) > pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); > pr_err("Interruptibility = %08x ActivityState = %08x\n", ... > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index a3da84f4ea45..f68c8a53a248 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -493,7 +493,8 @@ static inline u8 vmx_get_rvi(void) > VM_ENTRY_LOAD_IA32_EFER | \ > VM_ENTRY_LOAD_BNDCFGS | \ > VM_ENTRY_PT_CONCEAL_PIP | \ > - VM_ENTRY_LOAD_IA32_RTIT_CTL) > + VM_ENTRY_LOAD_IA32_RTIT_CTL | \ > + VM_ENTRY_LOAD_IA32_LBR_CTL) > > #define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \ > (VM_EXIT_SAVE_DEBUG_CONTROLS | \ > @@ -515,7 +516,8 @@ static inline u8 vmx_get_rvi(void) > VM_EXIT_LOAD_IA32_EFER | \ > VM_EXIT_CLEAR_BNDCFGS | \ > VM_EXIT_PT_CONCEAL_PIP | \ > - VM_EXIT_CLEAR_IA32_RTIT_CTL) > + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ > + VM_EXIT_CLEAR_IA32_LBR_CTL) Enabling these flags by default is wrong. KVM will clear LBR_CTL on VM-Exit when arch LBRs are supported, and AFAICT, never restore the host's values. And that will happen regardless of whether or not the guest is using arch LBRs. I assume the correct approach is to toggle these fields, but I'll be damned if I can figure out what the intended behavior of the existing LBR suport is. I'll follow up in the cover letter.