On 2/21/22 11:33, Maxim Levitsky wrote:
For compatibility with userspace the MSR_AMD64_TSC_RATIO can be set
even when the feature is not exposed to the guest, which breaks assumptions
that it has the default value in this case.
Fixes: 5228eb96a487 ("KVM: x86: nSVM: implement nested TSC scaling")
Cc: stable@xxxxxxxxxxxxxxx
Reported-by: Dāvis Mosāns <davispuh@xxxxxxxxx>
Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
It's not clear how QEMU ends up writing MSR_AMD64_TSC_RATIO_DEFAULT
rather than 0, but we clearly have a bug in KVM. It should not allow
writing 0 in the first place if tsc-ratio is not available in the VM.
If QEMU really can get itself in this situation, we cannot fix this
except with KVM_CAP_DISABLE_QUIRKS (a quirk that would accept and ignore
host-initiated writes if the CPUID bit is not available) or perhaps with
a pr_warn_ratelimited and a quick deprecation cycle, until some time
after 6.2.1 is released.
Hmmm... maybe the issue is actually that KVM *returns* 0 from
KVM_GET_MSRS? And in this case, fixing KVM would also prevent QEMU from
sending the bogus KVM_SET_MSRS?
Thanks,
Paolo
---
arch/x86/kvm/svm/nested.c | 10 ++++------
arch/x86/kvm/svm/svm.c | 5 +++--
arch/x86/kvm/svm/svm.h | 1 +
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1218b5a342fc..a2e2436057dc 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -574,14 +574,12 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
vcpu->arch.l1_tsc_offset,
svm->nested.ctl.tsc_offset,
- svm->tsc_ratio_msr);
+ svm_get_l2_tsc_multiplier(vcpu));
svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
- if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
- WARN_ON(!svm->tsc_scaling_enabled);
+ if (svm_get_l2_tsc_multiplier(vcpu) != kvm_default_tsc_scaling_ratio)
nested_svm_update_tsc_ratio_msr(vcpu);
- }
svm->vmcb->control.int_ctl =
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
@@ -867,8 +865,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
- if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
- WARN_ON(!svm->tsc_scaling_enabled);
+ if (svm_get_l2_tsc_multiplier(vcpu) != kvm_default_tsc_scaling_ratio) {
vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
}
@@ -1264,6 +1261,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ WARN_ON_ONCE(!svm->tsc_scaling_enabled);
vcpu->arch.tsc_scaling_ratio =
kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
svm->tsc_ratio_msr);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 975be872cd1a..5cf6bb5bfd3e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -911,11 +911,12 @@ static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
return svm->nested.ctl.tsc_offset;
}
-static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
+u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- return svm->tsc_ratio_msr;
+ return svm->tsc_scaling_enabled ? svm->tsc_ratio_msr :
+ kvm_default_tsc_scaling_ratio;
}
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 852b12aee03d..006571dd30df 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -542,6 +542,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
+u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier);
void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
struct vmcb_control_area *control);