Re: [PATCH] KVM: nVMX/nSVM: Fix bug which sets vcpu->arch.tsc_offset to L1 tsc_offset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 08/11/18 01:24, Jim Mattson wrote:
> On Tue, Nov 6, 2018 at 2:14 AM, Liran Alon <liran.alon@xxxxxxxxxx> wrote:
>> From: Leonid Shatz <leonid.shatz@xxxxxxxxxx>
>>
>> Since commit e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to
>> represent the running guest"), vcpu->arch.tsc_offset meaning was
>> changed to always reflect the tsc_offset value set on active VMCS.
>> Regardless if vCPU is currently running L1 or L2.
>>
>> However, above mentioned commit failed to also change
>> kvm_vcpu_write_tsc_offset() to set vcpu->arch.tsc_offset correctly.
>> This is because vmx_write_tsc_offset() could set the tsc_offset value
>> in active VMCS to given offset parameter *plus vmcs12->tsc_offset*.
>> However, kvm_vcpu_write_tsc_offset() just sets vcpu->arch.tsc_offset
>> to given offset parameter. Without taking into account the possible
>> addition of vmcs12->tsc_offset. (Same is true for SVM case).
>>
>> Fix this issue by changing kvm_x86_ops->write_tsc_offset() to return
>> actually set tsc_offset in active VMCS and modify
>> kvm_vcpu_write_tsc_offset() to set returned value in
>> vcpu->arch.tsc_offset.
>> In addition, rename write_tsc_offset() callback to write_l1_tsc_offset()
>> to make it clear that it is meant to set L1 TSC offset.
> 
> I think write_l1_tsc_offset() is misleading, since the TSC offset
> that's actually written in guest mode is L2's TSC offset. Perhaps it
> would be more clear to simply rename the argument from 'offset' to
> 'l1_tsc_offset'?
> 
>> Fixes: e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to represent the running guest")
>>
>> Reviewed-by: Liran Alon <liran.alon@xxxxxxxxxx>
>> Reviewed-by: Mihai Carabas <mihai.carabas@xxxxxxxxxx>
>> Reviewed-by: Krish Sadhukhan <krish.sadhukhan@xxxxxxxxxx>
>> Signed-off-by: Leonid Shatz <leonid.shatz@xxxxxxxxxx>
>> ---
>>  arch/x86/include/asm/kvm_host.h |  3 ++-
>>  arch/x86/kvm/svm.c              |  5 +++--
>>  arch/x86/kvm/vmx.c              | 18 +++++++++---------
>>  arch/x86/kvm/x86.c              |  6 +++---
>>  4 files changed, 17 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 55e51ff7e421..fbda5a917c5b 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1094,7 +1094,8 @@ struct kvm_x86_ops {
>>         bool (*has_wbinvd_exit)(void);
>>
>>         u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
>> -       void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
>> +       /* Returns actual tsc_offset set in active VMCS */
> 
> 'VMCS' is Intel-centric. Perhaps 'Returns the actual TSC offset set
> for the active guest'?
> 
>> +       u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
>>
>>         void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 0e21ccc46792..db788dc5f1e7 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
>>         return vcpu->arch.tsc_offset;
>>  }
>>
>> -static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>> +static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>>  {
>>         struct vcpu_svm *svm = to_svm(vcpu);
>>         u64 g_tsc_offset = 0;
>> @@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>>         svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
>>
>>         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
>> +       return svm->vmcb->control.tsc_offset;
>>  }
>>
>>  static void avic_init_vmcb(struct vcpu_svm *svm)
>> @@ -7149,7 +7150,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>>         .has_wbinvd_exit = svm_has_wbinvd_exit,
>>
>>         .read_l1_tsc_offset = svm_read_l1_tsc_offset,
>> -       .write_tsc_offset = svm_write_tsc_offset,
>> +       .write_l1_tsc_offset = svm_write_l1_tsc_offset,
>>
>>         .set_tdp_cr3 = set_tdp_cr3,
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 4555077d69ce..59633175fe6c 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -3455,8 +3455,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
>>  /*
>>   * writes 'offset' into guest's timestamp counter offset register
>>   */
> 
> The comment above needs some clarification, since 'offset' is only
> written to the guest's TSC offset field if L1 is active. When L2 is
> active, a different value is calculated and written to the guest's TSC
> offset field.

I just removed the comment, it's more misleading than anything else.

I don't like that much the new kvm_x86_ops name, but at least it's
consistent with read_l1_tsc_offset.

With some cleanup and making the trace event available for L2 writes
too, I'm squashing this in:

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2085539149ed..f03c46c183d8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3464,27 +3464,26 @@ static u64 vmx_read_l1_tsc_offset(struct
kvm_vcpu *vcpu)
 	return vcpu->arch.tsc_offset;
 }

-/*
- * writes 'offset' into guest's timestamp counter offset register
- */
 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	u64 old_offset = vcpu->arch.tsc_offset;
 	u64 active_offset = offset;
-	if (is_guest_mode(vcpu)) {
+
+	if (is_guest_mode(vcpu) &&
+	    (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) {
 		/*
 		 * We're here if L1 chose not to trap WRMSR to TSC. According
 		 * to the spec, this should set L1's TSC; The offset that L1
 		 * set for L2 remains unchanged, and still needs to be added
 		 * to the newly set TSC to get L2's TSC.
 		 */
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
-			active_offset += vmcs12->tsc_offset;
-	} else {
-		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
-					   vmcs_read64(TSC_OFFSET), offset);
+		old_offset -= vmcs12->tsc_offset;
+		active_offset += vmcs12->tsc_offset;
 	}

+	trace_kvm_write_tsc_offset(vcpu->vcpu_id, old_offset, offset);
 	vmcs_write64(TSC_OFFSET, active_offset);
 	return active_offset;
 }

Paolo

>> -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>> +static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>>  {
>> +       u64 active_offset = offset;
>>         if (is_guest_mode(vcpu)) {
>>                 /*
>>                  * We're here if L1 chose not to trap WRMSR to TSC. According
>> @@ -3464,17 +3465,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
>>                  * set for L2 remains unchanged, and still needs to be added
>>                  * to the newly set TSC to get L2's TSC.
>>                  */
>> -               struct vmcs12 *vmcs12;
>> -               /* recalculate vmcs02.TSC_OFFSET: */
>> -               vmcs12 = get_vmcs12(vcpu);
>> -               vmcs_write64(TSC_OFFSET, offset +
>> -                       (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
>> -                        vmcs12->tsc_offset : 0));
>> +               struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> +               if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
>> +                       active_offset += vmcs12->tsc_offset;
>>         } else {
>>                 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
>>                                            vmcs_read64(TSC_OFFSET), offset);
> 
> Why do we only trace for L1?
> 
>> -               vmcs_write64(TSC_OFFSET, offset);
>>         }
>> +
>> +       vmcs_write64(TSC_OFFSET, active_offset);
>> +       return active_offset;
>>  }




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux