On Tue, Jul 09, 2024, Dapeng Mi wrote: > Return KVM_MSR_RET_INVALID instead of 0 to inject #GP to guest for all > invalid PMU MSRs access > > Currently KVM silently drops the access and doesn't inject #GP for some > invalid PMU MSRs like MSR_P6_PERFCTR0/MSR_P6_PERFCTR1, > MSR_P6_EVNTSEL0/MSR_P6_EVNTSEL1, but KVM still injects #GP for all other > invalid PMU MSRs. This leads to guest see different behavior on invalid > PMU access and may confuse guest. This is by design. I'm not saying it's _good_ design, but it is very much intended. More importantly, it's established behavior, i.e. having KVM inject #GP could break existing setups. > This behavior is introduced by the > 'commit 5753785fa977 ("KVM: do not #GP on perf MSR writes when vPMU is disabled")' > in 2012. This commit seems to want to keep back compatible with weird > behavior of some guests in vPMU disabled case, Ya, because at the time, guest kernels hadn't been taught to play nice with unexpected virtualization setups, i.e. VMs without PMUs. > but strongly suspect if it's still available nowadays. I don't follow this comment. > Since Perfmon v6 starts, the GP counters could become discontinuous on > HW, It's possible that HW doesn't support GP counters 0 and 1. > Considering this situation KVM should inject #GP for all invalid PMU MSRs > access. IIUC, the behavior you want is inject a #GP if the vCPU has a PMU and the MSR is not valid. We can do that and still maintain backwards compatibility, hopefully without too much ugliness (maybe even an improvement!). This? (completely untested) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5aa7581802f7..b5e95e5f1f32 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4063,9 +4063,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: - if (kvm_pmu_is_valid_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr_info); - + if (vcpu_to_pmu(vcpu)->version) + goto default_handler; if (data) kvm_pr_unimpl_wrmsr(vcpu, msr, data); break; @@ -4146,6 +4145,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; #endif default: +default_handler: if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); @@ -4251,8 +4251,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: - if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) - return kvm_pmu_get_msr(vcpu, msr_info); + if (vcpu_to_pmu(vcpu)->version) + goto default_handler; msr_info->data = 0; break; case MSR_IA32_UCODE_REV: @@ -4505,6 +4505,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; #endif default: +default_handler: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index 96446134c00b..0de606b542ac 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -344,7 +344,8 @@ static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, uint8_t nr_counters, uint32_t or_mask) { - const bool pmu_has_fast_mode = !guest_get_pmu_version(); + const u8 pmu_version = guest_get_pmu_version(); + const bool pmu_has_fast_mode = !pmu_version; uint8_t i; for (i = 0; i < nr_possible_counters; i++) { @@ -363,12 +364,13 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters const bool expect_success = i < nr_counters || (or_mask & BIT(i)); /* - * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are - * unsupported, i.e. doesn't #GP and reads back '0'. + * KVM drops writes to MSR_P6_PERFCTR[0|1] if the vCPU doesn't + * have a PMU, i.e. doesn't #GP and reads back '0'. */ const uint64_t expected_val = expect_success ? test_val : 0; - const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && - msr != MSR_P6_PERFCTR1; + const bool expect_gp = !expect_success && + (pmu_version || + (msr != MSR_P6_PERFCTR0 && msr != MSR_P6_PERFCTR1)); uint32_t rdpmc_idx; uint8_t vector; uint64_t val;