On 7/31/2024 9:58 PM, Mingwei Zhang wrote: > @@ -7295,6 +7299,46 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) > msrs[i].host, false); > } > > +static void save_perf_global_ctrl_in_passthrough_pmu(struct vcpu_vmx *vmx) > +{ > + struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); > + int i; > + > + if (vm_exit_controls_get(vmx) & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL) { > + pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); As commented in patch 26, compared with MSR auto save/store area approach, the exec control way needs one relatively expensive VMCS read on every VM exit. > + } else { > + i = pmu->global_ctrl_slot_in_autostore; > + pmu->global_ctrl = vmx->msr_autostore.guest.val[i].value; > + } > +} > + > +static void load_perf_global_ctrl_in_passthrough_pmu(struct vcpu_vmx *vmx) > +{ > + struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); > + u64 global_ctrl = pmu->global_ctrl; > + int i; > + > + if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) { > + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, global_ctrl); ditto. We may optimize it by introducing a new flag pmu->global_ctrl_dirty and update GUEST_IA32_PERF_GLOBAL_CTRL only when it's needed. But this makes the code even more complicated. > + } else { > + i = pmu->global_ctrl_slot_in_autoload; > + vmx->msr_autoload.guest.val[i].value = global_ctrl; > + } > +} > + > +static void __atomic_switch_perf_msrs_in_passthrough_pmu(struct vcpu_vmx *vmx) > +{ > + load_perf_global_ctrl_in_passthrough_pmu(vmx); > +} > + > +static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) > +{ > + if (is_passthrough_pmu_enabled(&vmx->vcpu)) > + __atomic_switch_perf_msrs_in_passthrough_pmu(vmx); > + else > + __atomic_switch_perf_msrs(vmx); > +} > + > static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit) > { > struct vcpu_vmx *vmx = to_vmx(vcpu);