On 10/25/2024 4:26 AM, Chen, Zide wrote: > > On 7/31/2024 9:58 PM, Mingwei Zhang wrote: > >> @@ -7295,6 +7299,46 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) >> msrs[i].host, false); >> } >> >> +static void save_perf_global_ctrl_in_passthrough_pmu(struct vcpu_vmx *vmx) >> +{ >> + struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); >> + int i; >> + >> + if (vm_exit_controls_get(vmx) & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL) { >> + pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); > As commented in patch 26, compared with MSR auto save/store area > approach, the exec control way needs one relatively expensive VMCS read > on every VM exit. Anyway, let us have a evaluation and data speaks. > >> + } else { >> + i = pmu->global_ctrl_slot_in_autostore; >> + pmu->global_ctrl = vmx->msr_autostore.guest.val[i].value; >> + } >> +} >> + >> +static void load_perf_global_ctrl_in_passthrough_pmu(struct vcpu_vmx *vmx) >> +{ >> + struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu); >> + u64 global_ctrl = pmu->global_ctrl; >> + int i; >> + >> + if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) { >> + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, global_ctrl); > ditto. > > We may optimize it by introducing a new flag pmu->global_ctrl_dirty and > update GUEST_IA32_PERF_GLOBAL_CTRL only when it's needed. But this > makes the code even more complicated. > > >> + } else { >> + i = pmu->global_ctrl_slot_in_autoload; >> + vmx->msr_autoload.guest.val[i].value = global_ctrl; >> + } >> +} >> + >> +static void __atomic_switch_perf_msrs_in_passthrough_pmu(struct vcpu_vmx *vmx) >> +{ >> + load_perf_global_ctrl_in_passthrough_pmu(vmx); >> +} >> + >> +static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) >> +{ >> + if (is_passthrough_pmu_enabled(&vmx->vcpu)) >> + __atomic_switch_perf_msrs_in_passthrough_pmu(vmx); >> + else >> + __atomic_switch_perf_msrs(vmx); >> +} >> + >> static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >