On Thu, Nov 15, 2012 at 12:26:37PM -0800, Will Auld wrote: > CPUID.7.0.EBX[1]=1 indicates IA32_TSC_ADJUST MSR 0x3b is supported > > Basic design is to emulate the MSR by allowing reads and writes to a guest > vcpu specific location to store the value of the emulated MSR while adding > the value to the vmcs tsc_offset. In this way the IA32_TSC_ADJUST value will > be included in all reads to the TSC MSR whether through rdmsr or rdtsc. This > is of course as long as the "use TSC counter offsetting" VM-execution > control is enabled as well as the IA32_TSC_ADJUST control. > > However, because hardware will only return the TSC + IA32_TSC_ADJUST + vmsc > tsc_offset for a guest process when it does and rdtsc (with the correct > settings) the value of our virtualized IA32_TSC_ADJUST must be stored in > one of these three locations. The argument against storing it in the actual > MSR is performance. This is likely to be seldom used while the save/restore > is required on every transition. IA32_TSC_ADJUST was created as a way to > solve some issues with writing TSC itself so that is not an option either. > The remaining option, defined above as our solution has the problem of > returning incorrect vmcs tsc_offset values (unless we intercept and fix, not > done here) as mentioned above. However, more problematic is that storing the > data in vmcs tsc_offset will have a different semantic effect on the system > than does using the actual MSR. This is illustrated in the following example: > The hypervisor set the IA32_TSC_ADJUST, then the guest sets it and a guest > process performs a rdtsc. In this case the guest process will get TSC + > IA32_TSC_ADJUST_hyperviser + vmsc tsc_offset including IA32_TSC_ADJUST_guest. > While the total system semantics changed the semantics as seen by the guest > do not and hence this will not cause a problem. > > Signed-off-by: Will Auld <will.auld@xxxxxxxxx> > --- > arch/x86/include/asm/cpufeature.h | 1 + > arch/x86/include/asm/kvm_host.h | 4 ++++ > arch/x86/include/asm/msr-index.h | 1 + > arch/x86/kvm/cpuid.c | 4 ++++ > arch/x86/kvm/svm.c | 6 ++++++ > arch/x86/kvm/vmx.c | 17 +++++++++++++++++ > arch/x86/kvm/x86.c | 17 +++++++++++++++++ > 7 files changed, 50 insertions(+) > > diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h > index 6b7ee5f..e574d81 100644 > --- a/arch/x86/include/asm/cpufeature.h > +++ b/arch/x86/include/asm/cpufeature.h > @@ -199,6 +199,7 @@ > > /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ > #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ > +#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3b */ > #define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ > #define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */ > #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index da34027..0a9ac5c 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -442,6 +442,8 @@ struct kvm_vcpu_arch { > u32 virtual_tsc_mult; > u32 virtual_tsc_khz; > > + s64 ia32_tsc_adjust_msr; > + > atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ > unsigned nmi_pending; /* NMI queued after currently running handler */ > bool nmi_injected; /* Trying to inject an NMI this entry */ > @@ -693,6 +695,8 @@ struct kvm_x86_ops { > void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); > > u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); > + bool (*have_ia32_tsc_adjust_msr)(void); > + void (*update_ia32_tsc_adjust_msr)(struct kvm_vcpu *vcpu, s64 offset); > u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu); > > void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > index 957ec87..6486569 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -231,6 +231,7 @@ > #define MSR_IA32_EBL_CR_POWERON 0x0000002a > #define MSR_EBC_FREQUENCY_ID 0x0000002c > #define MSR_IA32_FEATURE_CONTROL 0x0000003a > +#define MSR_IA32_TSC_ADJUST 0x0000003b > > #define FEATURE_CONTROL_LOCKED (1<<0) > #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 0595f13..3007fbd 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -320,6 +320,10 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > if (index == 0) { > entry->ebx &= kvm_supported_word9_x86_features; > cpuid_mask(&entry->ebx, 9); > + if (kvm_x86_ops->have_ia32_tsc_adjust_msr()) { > + // TSC_ADJUST is emulated > + entry->ebx |= F(TSC_ADJUST); > + } Why only Intel hosts? Its possible to emulate on SVM hosts. Should check for the guest cpuid bit enabled instead (introduce guest_cpuid_has_tsc_adjust and use instead of kvm_x86_ops->have_ia32_tsc_adjust_msr()). Can then move vmx_update_ia32_tsc_adjust_msr() functionality to x86.c. > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 5ac11f0..815815d 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -1051,6 +1051,11 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) > return target_tsc - tsc; > } > > +static bool svm_have_ia32_tsc_adjust_msr(void) > +{ > + return false; > +} > + > static void init_vmcb(struct vcpu_svm *svm) > { > struct vmcb_control_area *control = &svm->vmcb->control; > @@ -4336,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = { > .write_tsc_offset = svm_write_tsc_offset, > .adjust_tsc_offset = svm_adjust_tsc_offset, > .compute_tsc_offset = svm_compute_tsc_offset, > + .have_ia32_tsc_adjust_msr = svm_have_ia32_tsc_adjust_msr, > .read_l1_tsc = svm_read_l1_tsc, > > .set_tdp_cr3 = set_tdp_cr3, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 819970f..cff8b20 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -1900,6 +1900,18 @@ static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) > return target_tsc - native_read_tsc(); > } > > +static bool vmx_have_ia32_tsc_adjust_msr(void) > +{ > + return true; > +} > + > +static void vmx_update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) > +{ > + u64 curr_offset = vmcs_read64(TSC_OFFSET); > + vcpu->arch.ia32_tsc_adjust_msr = > + offset - (curr_offset - vcpu->arch.ia32_tsc_adjust_msr); > +} > + ia32_tsc_adjust_msr += offset - curr_offset; Looks nicer. > static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) > { > struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); > @@ -2243,6 +2255,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > } > ret = kvm_set_msr_common(vcpu, msr_info); > break; > + case MSR_IA32_TSC_ADJUST: > + ret = kvm_set_msr_common(vcpu, msr_info); > + break; > case MSR_TSC_AUX: > if (!vmx->rdtscp_enabled) > return 1; > @@ -7341,6 +7356,8 @@ static struct kvm_x86_ops vmx_x86_ops = { > .write_tsc_offset = vmx_write_tsc_offset, > .adjust_tsc_offset = vmx_adjust_tsc_offset, > .compute_tsc_offset = vmx_compute_tsc_offset, > + .have_ia32_tsc_adjust_msr = vmx_have_ia32_tsc_adjust_msr, > + .update_ia32_tsc_adjust_msr = vmx_update_ia32_tsc_adjust_msr, > .read_l1_tsc = vmx_read_l1_tsc, > > .set_tdp_cr3 = vmx_set_cr3, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index a3aac1c..7d69a18 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -824,6 +824,7 @@ static u32 msrs_to_save[] = { > static unsigned num_msrs_to_save; > > static u32 emulated_msrs[] = { > + MSR_IA32_TSC_ADJUST, > MSR_IA32_TSCDEADLINE, > MSR_IA32_MISC_ENABLE, > MSR_IA32_MCG_STATUS, > @@ -1131,6 +1132,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) > vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; > vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; > > + if (kvm_x86_ops->have_ia32_tsc_adjust_msr() && !msr->host_initiated) { > + kvm_x86_ops->update_ia32_tsc_adjust_msr(vcpu, offset); > + } No need for braces. Please write a testcase for kvm-unit-test, see http://git.kernel.org/?p=virt/kvm/kvm-unit-tests.git;a=summary. Can start by copying x86/tsc.c to x86/tsc_adjust.c. Otherwise looks fine. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html