On Tue, Nov 6, 2018 at 2:14 AM, Liran Alon <liran.alon@xxxxxxxxxx> wrote: > From: Leonid Shatz <leonid.shatz@xxxxxxxxxx> > > Since commit e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to > represent the running guest"), vcpu->arch.tsc_offset meaning was > changed to always reflect the tsc_offset value set on active VMCS. > Regardless if vCPU is currently running L1 or L2. > > However, above mentioned commit failed to also change > kvm_vcpu_write_tsc_offset() to set vcpu->arch.tsc_offset correctly. > This is because vmx_write_tsc_offset() could set the tsc_offset value > in active VMCS to given offset parameter *plus vmcs12->tsc_offset*. > However, kvm_vcpu_write_tsc_offset() just sets vcpu->arch.tsc_offset > to given offset parameter. Without taking into account the possible > addition of vmcs12->tsc_offset. (Same is true for SVM case). > > Fix this issue by changing kvm_x86_ops->write_tsc_offset() to return > actually set tsc_offset in active VMCS and modify > kvm_vcpu_write_tsc_offset() to set returned value in > vcpu->arch.tsc_offset. > In addition, rename write_tsc_offset() callback to write_l1_tsc_offset() > to make it clear that it is meant to set L1 TSC offset. I think write_l1_tsc_offset() is misleading, since the TSC offset that's actually written in guest mode is L2's TSC offset. Perhaps it would be more clear to simply rename the argument from 'offset' to 'l1_tsc_offset'? > Fixes: e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to represent the running guest") > > Reviewed-by: Liran Alon <liran.alon@xxxxxxxxxx> > Reviewed-by: Mihai Carabas <mihai.carabas@xxxxxxxxxx> > Reviewed-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx> > Signed-off-by: Leonid Shatz <leonid.shatz@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 3 ++- > arch/x86/kvm/svm.c | 5 +++-- > arch/x86/kvm/vmx.c | 18 +++++++++--------- > arch/x86/kvm/x86.c | 6 +++--- > 4 files changed, 17 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 55e51ff7e421..fbda5a917c5b 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1094,7 +1094,8 @@ struct kvm_x86_ops { > bool (*has_wbinvd_exit)(void); > > u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); > - void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); > + /* Returns actual tsc_offset set in active VMCS */ 'VMCS' is Intel-centric. Perhaps 'Returns the actual TSC offset set for the active guest'? > + u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); > > void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 0e21ccc46792..db788dc5f1e7 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) > return vcpu->arch.tsc_offset; > } > > -static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) > +static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) > { > struct vcpu_svm *svm = to_svm(vcpu); > u64 g_tsc_offset = 0; > @@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) > svm->vmcb->control.tsc_offset = offset + g_tsc_offset; > > mark_dirty(svm->vmcb, VMCB_INTERCEPTS); > + return svm->vmcb->control.tsc_offset; > } > > static void avic_init_vmcb(struct vcpu_svm *svm) > @@ -7149,7 +7150,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > .has_wbinvd_exit = svm_has_wbinvd_exit, > > .read_l1_tsc_offset = svm_read_l1_tsc_offset, > - .write_tsc_offset = svm_write_tsc_offset, > + .write_l1_tsc_offset = svm_write_l1_tsc_offset, > > .set_tdp_cr3 = set_tdp_cr3, > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 4555077d69ce..59633175fe6c 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -3455,8 +3455,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) > /* > * writes 'offset' into guest's timestamp counter offset register > */ The comment above needs some clarification, since 'offset' is only written to the guest's TSC offset field if L1 is active. When L2 is active, a different value is calculated and written to the guest's TSC offset field. > -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) > +static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) > { > + u64 active_offset = offset; > if (is_guest_mode(vcpu)) { > /* > * We're here if L1 chose not to trap WRMSR to TSC. According > @@ -3464,17 +3465,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) > * set for L2 remains unchanged, and still needs to be added > * to the newly set TSC to get L2's TSC. > */ > - struct vmcs12 *vmcs12; > - /* recalculate vmcs02.TSC_OFFSET: */ > - vmcs12 = get_vmcs12(vcpu); > - vmcs_write64(TSC_OFFSET, offset + > - (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? > - vmcs12->tsc_offset : 0)); > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING)) > + active_offset += vmcs12->tsc_offset; > } else { > trace_kvm_write_tsc_offset(vcpu->vcpu_id, > vmcs_read64(TSC_OFFSET), offset); Why do we only trace for L1? > - vmcs_write64(TSC_OFFSET, offset); > } > + > + vmcs_write64(TSC_OFFSET, active_offset); > + return active_offset; > }