Re: [PATCH] KVM: nVMX/nSVM: Fix bug which sets vcpu->arch.tsc_offset to L1 tsc_offset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 6, 2018 at 2:14 AM, Liran Alon <liran.alon@xxxxxxxxxx> wrote:
> From: Leonid Shatz <leonid.shatz@xxxxxxxxxx>
>
> Since commit e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to
> represent the running guest"), vcpu->arch.tsc_offset meaning was
> changed to always reflect the tsc_offset value set on active VMCS.
> Regardless if vCPU is currently running L1 or L2.
>
> However, above mentioned commit failed to also change
> kvm_vcpu_write_tsc_offset() to set vcpu->arch.tsc_offset correctly.
> This is because vmx_write_tsc_offset() could set the tsc_offset value
> in active VMCS to given offset parameter *plus vmcs12->tsc_offset*.
> However, kvm_vcpu_write_tsc_offset() just sets vcpu->arch.tsc_offset
> to given offset parameter. Without taking into account the possible
> addition of vmcs12->tsc_offset. (Same is true for SVM case).
>
> Fix this issue by changing kvm_x86_ops->write_tsc_offset() to return
> actually set tsc_offset in active VMCS and modify
> kvm_vcpu_write_tsc_offset() to set returned value in
> vcpu->arch.tsc_offset.
> In addition, rename write_tsc_offset() callback to write_l1_tsc_offset()
> to make it clear that it is meant to set L1 TSC offset.

I think write_l1_tsc_offset() is misleading, since the TSC offset
that's actually written in guest mode is L2's TSC offset. Perhaps it
would be more clear to simply rename the argument from 'offset' to
'l1_tsc_offset'?

> Fixes: e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to represent the running guest")
>
> Reviewed-by: Liran Alon <liran.alon@xxxxxxxxxx>
> Reviewed-by: Mihai Carabas <mihai.carabas@xxxxxxxxxx>
> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx>
> Signed-off-by: Leonid Shatz <leonid.shatz@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 ++-
>  arch/x86/kvm/svm.c              |  5 +++--
>  arch/x86/kvm/vmx.c              | 18 +++++++++---------
>  arch/x86/kvm/x86.c              |  6 +++---
>  4 files changed, 17 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 55e51ff7e421..fbda5a917c5b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1094,7 +1094,8 @@ struct kvm_x86_ops {
>         bool (*has_wbinvd_exit)(void);
>
>         u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
> -       void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
> +       /* Returns actual tsc_offset set in active VMCS */

'VMCS' is Intel-centric. Perhaps 'Returns the actual TSC offset set
for the active guest'?

> +       u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
>
>         void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 0e21ccc46792..db788dc5f1e7 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
>         return vcpu->arch.tsc_offset;
>  }
>
> -static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
> +static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>  {
>         struct vcpu_svm *svm = to_svm(vcpu);
>         u64 g_tsc_offset = 0;
> @@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>         svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
>
>         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
> +       return svm->vmcb->control.tsc_offset;
>  }
>
>  static void avic_init_vmcb(struct vcpu_svm *svm)
> @@ -7149,7 +7150,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>         .has_wbinvd_exit = svm_has_wbinvd_exit,
>
>         .read_l1_tsc_offset = svm_read_l1_tsc_offset,
> -       .write_tsc_offset = svm_write_tsc_offset,
> +       .write_l1_tsc_offset = svm_write_l1_tsc_offset,
>
>         .set_tdp_cr3 = set_tdp_cr3,
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 4555077d69ce..59633175fe6c 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3455,8 +3455,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
>  /*
>   * writes 'offset' into guest's timestamp counter offset register
>   */

The comment above needs some clarification, since 'offset' is only
written to the guest's TSC offset field if L1 is active. When L2 is
active, a different value is calculated and written to the guest's TSC
offset field.

> -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
> +static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>  {
> +       u64 active_offset = offset;
>         if (is_guest_mode(vcpu)) {
>                 /*
>                  * We're here if L1 chose not to trap WRMSR to TSC. According
> @@ -3464,17 +3465,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>                  * set for L2 remains unchanged, and still needs to be added
>                  * to the newly set TSC to get L2's TSC.
>                  */
> -               struct vmcs12 *vmcs12;
> -               /* recalculate vmcs02.TSC_OFFSET: */
> -               vmcs12 = get_vmcs12(vcpu);
> -               vmcs_write64(TSC_OFFSET, offset +
> -                       (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
> -                        vmcs12->tsc_offset : 0));
> +               struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +               if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
> +                       active_offset += vmcs12->tsc_offset;
>         } else {
>                 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
>                                            vmcs_read64(TSC_OFFSET), offset);

Why do we only trace for L1?

> -               vmcs_write64(TSC_OFFSET, offset);
>         }
> +
> +       vmcs_write64(TSC_OFFSET, active_offset);
> +       return active_offset;
>  }



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux