On 8/1/2024 12:58 PM, Mingwei Zhang wrote: > From: Xiong Zhang <xiong.y.zhang@xxxxxxxxxxxxxxx> > > In PMU passthrough mode, there are three requirements to manage > IA32_PERF_GLOBAL_CTRL: > - guest IA32_PERF_GLOBAL_CTRL MSR must be saved at vm exit. > - IA32_PERF_GLOBAL_CTRL MSR must be cleared at vm exit to avoid any > counter of running within KVM runloop. > - guest IA32_PERF_GLOBAL_CTRL MSR must be restored at vm entry. > > Introduce vmx_set_perf_global_ctrl() function to auto switching > IA32_PERF_GLOBAL_CTR and invoke it after the VMM finishes setting up the > CPUID bits. > > Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> > Signed-off-by: Xiong Zhang <xiong.y.zhang@xxxxxxxxxxxxxxx> > Tested-by: Yongwei Ma <yongwei.ma@xxxxxxxxx> > Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx> > --- > arch/x86/include/asm/vmx.h | 1 + > arch/x86/kvm/vmx/vmx.c | 117 +++++++++++++++++++++++++++++++------ > arch/x86/kvm/vmx/vmx.h | 3 +- > 3 files changed, 103 insertions(+), 18 deletions(-) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index d77a31039f24..5ed89a099533 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -106,6 +106,7 @@ > #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 > #define VM_EXIT_PT_CONCEAL_PIP 0x01000000 > #define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 > +#define VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL 0x40000000 > > #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 339742350b7a..34a420fa98c5 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -4394,6 +4394,97 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) > return pin_based_exec_ctrl; > } > > +static void vmx_set_perf_global_ctrl(struct vcpu_vmx *vmx) > +{ > + u32 vmentry_ctrl = vm_entry_controls_get(vmx); > + u32 vmexit_ctrl = vm_exit_controls_get(vmx); > + struct vmx_msrs *m; > + int i; > + > + if (cpu_has_perf_global_ctrl_bug() || > + !is_passthrough_pmu_enabled(&vmx->vcpu)) { > + vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; > + vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; > + vmexit_ctrl &= ~VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; > + } > + > + if (is_passthrough_pmu_enabled(&vmx->vcpu)) { > + /* > + * Setup auto restore guest PERF_GLOBAL_CTRL MSR at vm entry. > + */ > + if (vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) { > + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, 0); > + } else { > + m = &vmx->msr_autoload.guest; > + i = vmx_find_loadstore_msr_slot(m, MSR_CORE_PERF_GLOBAL_CTRL); > + if (i < 0) { > + i = m->nr++; > + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); > + } > + m->val[i].index = MSR_CORE_PERF_GLOBAL_CTRL; > + m->val[i].value = 0; This function has much duplicated code to initialize/clear MSR autoload/restore region, we may create two simple helpers to avoid these duplicated code. static inline void vmx_init_loadstore_msr(struct vmx_msrs *m, int idx, bool load); static inline void vmx_clear_loadstore_msr(struct vmx_msrs *m, int idx, bool load); > + } > + /* > + * Setup auto clear host PERF_GLOBAL_CTRL msr at vm exit. > + */ > + if (vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) { > + vmcs_write64(HOST_IA32_PERF_GLOBAL_CTRL, 0); > + } else { > + m = &vmx->msr_autoload.host; > + i = vmx_find_loadstore_msr_slot(m, MSR_CORE_PERF_GLOBAL_CTRL); > + if (i < 0) { > + i = m->nr++; > + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); > + } > + m->val[i].index = MSR_CORE_PERF_GLOBAL_CTRL; > + m->val[i].value = 0; > + } > + /* > + * Setup auto save guest PERF_GLOBAL_CTRL msr at vm exit > + */ > + if (!(vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL)) { > + m = &vmx->msr_autostore.guest; > + i = vmx_find_loadstore_msr_slot(m, MSR_CORE_PERF_GLOBAL_CTRL); > + if (i < 0) { > + i = m->nr++; > + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr); > + } > + m->val[i].index = MSR_CORE_PERF_GLOBAL_CTRL; > + } > + } else { > + if (!(vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)) { > + m = &vmx->msr_autoload.guest; > + i = vmx_find_loadstore_msr_slot(m, MSR_CORE_PERF_GLOBAL_CTRL); > + if (i >= 0) { > + m->nr--; > + m->val[i] = m->val[m->nr]; > + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); > + } > + } > + if (!(vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)) { > + m = &vmx->msr_autoload.host; > + i = vmx_find_loadstore_msr_slot(m, MSR_CORE_PERF_GLOBAL_CTRL); > + if (i >= 0) { > + m->nr--; > + m->val[i] = m->val[m->nr]; > + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); > + } > + } > + if (!(vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL)) { > + m = &vmx->msr_autostore.guest; > + i = vmx_find_loadstore_msr_slot(m, MSR_CORE_PERF_GLOBAL_CTRL); > + if (i >= 0) { > + m->nr--; > + m->val[i] = m->val[m->nr]; > + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr); > + } > + } > + } > + > + vm_entry_controls_set(vmx, vmentry_ctrl); > + vm_exit_controls_set(vmx, vmexit_ctrl); > +} > + > static u32 vmx_vmentry_ctrl(void) > { > u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; > @@ -4401,17 +4492,10 @@ static u32 vmx_vmentry_ctrl(void) > if (vmx_pt_mode_is_system()) > vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | > VM_ENTRY_LOAD_IA32_RTIT_CTL); > - /* > - * IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically. > - */ > - vmentry_ctrl &= ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | > - VM_ENTRY_LOAD_IA32_EFER | > - VM_ENTRY_IA32E_MODE); > - > - if (cpu_has_perf_global_ctrl_bug()) > - vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; > - > - return vmentry_ctrl; > + /* > + * IA32e mode, and loading of EFER is toggled dynamically. > + */ > + return vmentry_ctrl &= ~(VM_ENTRY_LOAD_IA32_EFER | VM_ENTRY_IA32E_MODE); > } > > static u32 vmx_vmexit_ctrl(void) > @@ -4429,12 +4513,8 @@ static u32 vmx_vmexit_ctrl(void) > vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | > VM_EXIT_CLEAR_IA32_RTIT_CTL); > > - if (cpu_has_perf_global_ctrl_bug()) > - vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; > - > - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ > - return vmexit_ctrl & > - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); > + /* Loading of EFER is toggled dynamically */ > + return vmexit_ctrl & ~VM_EXIT_LOAD_IA32_EFER; > } > > void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) > @@ -4777,6 +4857,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) > vmcs_write64(VM_FUNCTION_CONTROL, 0); > > vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); > + vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val)); > vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); > vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); > vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); > @@ -7916,6 +7997,8 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) > else > exec_controls_setbit(vmx, CPU_BASED_RDPMC_EXITING); > > + vmx_set_perf_global_ctrl(vmx); > + > /* Refresh #PF interception to account for MAXPHYADDR changes. */ > vmx_update_exception_bitmap(vcpu); > } > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index 7b64e271a931..32e3974c1a2c 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -510,7 +510,8 @@ static inline u8 vmx_get_rvi(void) > VM_EXIT_LOAD_IA32_EFER | \ > VM_EXIT_CLEAR_BNDCFGS | \ > VM_EXIT_PT_CONCEAL_PIP | \ > - VM_EXIT_CLEAR_IA32_RTIT_CTL) > + VM_EXIT_CLEAR_IA32_RTIT_CTL | \ > + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL) > > #define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \ > (PIN_BASED_EXT_INTR_MASK | \