ACK from me, but please check with Joerg on the SVM change. I believe the scaling is done properly, however it won't hurt to have a second opinion. Zach On Tue, Aug 2, 2011 at 5:54 AM, Nadav Har'El <nyh@xxxxxxxxxx> wrote: > KVM assumed in several places that reading the TSC MSR returns the value for > L1. This is incorrect, because when L2 is running, the correct TSC read exit > emulation is to return L2's value. > > We therefore add a new x86_ops function, read_l1_tsc, to use in places that > specifically need to read the L1 TSC, NOT the TSC of the current level of > guest. > > Note that one change, of one line in kvm_arch_vcpu_load, is made redundant > by a different patch sent by Zachary Amsden (and not yet applied): > kvm_arch_vcpu_load() should not read the guest TSC, and if it didn't, of > course we didn't have to change the call of kvm_get_msr() to read_l1_tsc(). > > Signed-off-by: Nadav Har'El <nyh@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/svm.c | 9 +++++++++ > arch/x86/kvm/vmx.c | 17 +++++++++++++++++ > arch/x86/kvm/x86.c | 8 ++++---- > 4 files changed, 32 insertions(+), 4 deletions(-) > > --- .before/arch/x86/include/asm/kvm_host.h 2011-08-02 15:51:02.000000000 +0300 > +++ .after/arch/x86/include/asm/kvm_host.h 2011-08-02 15:51:02.000000000 +0300 > @@ -636,6 +636,8 @@ struct kvm_x86_ops { > struct x86_instruction_info *info, > enum x86_intercept_stage stage); > > + u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu); > + > const struct trace_print_flags *exit_reasons_str; > }; > > --- .before/arch/x86/kvm/vmx.c 2011-08-02 15:51:02.000000000 +0300 > +++ .after/arch/x86/kvm/vmx.c 2011-08-02 15:51:02.000000000 +0300 > @@ -1748,6 +1748,21 @@ static u64 guest_read_tsc(void) > } > > /* > + * Like guest_read_tsc, but always returns L1's notion of the timestamp > + * counter, even if a nested guest (L2) is currently running. > + */ > +u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu) > +{ > + u64 host_tsc, tsc_offset; > + > + rdtscll(host_tsc); > + tsc_offset = is_guest_mode(vcpu) ? > + to_vmx(vcpu)->nested.vmcs01_tsc_offset : > + vmcs_read64(TSC_OFFSET); > + return host_tsc + tsc_offset; > +} > + > +/* > * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ > * ioctl. In this case the call-back should update internal vmx state to make > * the changes effective. > @@ -7059,6 +7074,8 @@ static struct kvm_x86_ops vmx_x86_ops = > .set_tdp_cr3 = vmx_set_cr3, > > .check_intercept = vmx_check_intercept, > + > + .read_l1_tsc = vmx_read_l1_tsc, > }; > > static int __init vmx_init(void) > --- .before/arch/x86/kvm/svm.c 2011-08-02 15:51:02.000000000 +0300 > +++ .after/arch/x86/kvm/svm.c 2011-08-02 15:51:02.000000000 +0300 > @@ -2894,6 +2894,13 @@ static int cr8_write_interception(struct > return 0; > } > > +u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu) > +{ > + struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); > + return vmcb->control.tsc_offset + > + svm_scale_tsc(vcpu, native_read_tsc()); > +} > + > static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -4243,6 +4250,8 @@ static struct kvm_x86_ops svm_x86_ops = > .set_tdp_cr3 = set_tdp_cr3, > > .check_intercept = svm_check_intercept, > + > + .read_l1_tsc = svm_read_l1_tsc, > }; > > static int __init svm_init(void) > --- .before/arch/x86/kvm/x86.c 2011-08-02 15:51:02.000000000 +0300 > +++ .after/arch/x86/kvm/x86.c 2011-08-02 15:51:02.000000000 +0300 > @@ -1098,7 +1098,7 @@ static int kvm_guest_time_update(struct > > /* Keep irq disabled to prevent changes to the clock */ > local_irq_save(flags); > - kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); > + tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); > kernel_ns = get_kernel_ns(); > this_tsc_khz = vcpu_tsc_khz(v); > if (unlikely(this_tsc_khz == 0)) { > @@ -2215,7 +2215,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu > s64 tsc_delta; > u64 tsc; > > - kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); > + tsc = kvm_x86_ops->read_l1_tsc(vcpu); > tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : > tsc - vcpu->arch.last_guest_tsc; > > @@ -2239,7 +2239,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu * > { > kvm_x86_ops->vcpu_put(vcpu); > kvm_put_guest_fpu(vcpu); > - kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); > + vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); > } > > static int is_efer_nx(void) > @@ -5722,7 +5722,7 @@ static int vcpu_enter_guest(struct kvm_v > if (hw_breakpoint_active()) > hw_breakpoint_restore(); > > - kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); > + vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); > > vcpu->mode = OUTSIDE_GUEST_MODE; > smp_wmb(); > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html