To enable it, you need to set -accel kvm,x-precise-tsc=on, and have a kernel that supports this feature. Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> --- accel/kvm/kvm-all.c | 28 +++++++++ include/sysemu/kvm.h | 1 + target/i386/cpu.h | 1 + target/i386/kvm.c | 140 +++++++++++++++++++++++++++++++++--------- target/i386/machine.c | 19 ++++++ 5 files changed, 161 insertions(+), 28 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index baaa54249d..3829f2e7a3 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -104,6 +104,8 @@ struct KVMState OnOffAuto kernel_irqchip_split; bool sync_mmu; uint64_t manual_dirty_log_protect; + /* Use KVM_GET_TSC_PRECISE/KVM_SET_TSC_PRECISE to access IA32_TSC */ + bool precise_tsc; /* The man page (and posix) say ioctl numbers are signed int, but * they're not. Linux, glibc and *BSD all treat ioctl numbers as * unsigned, and treating them as signed here can break things */ @@ -3194,6 +3196,24 @@ bool kvm_kernel_irqchip_split(void) return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON; } +bool kvm_has_precise_tsc(void) +{ + return kvm_state && kvm_state->precise_tsc; +} + +static void kvm_set_precise_tsc(Object *obj, + bool value, Error **errp G_GNUC_UNUSED) +{ + KVMState *s = KVM_STATE(obj); + s->precise_tsc = value; +} + +static bool kvm_get_precise_tsc(Object *obj, Error **errp G_GNUC_UNUSED) +{ + KVMState *s = KVM_STATE(obj); + return s->precise_tsc; +} + static void kvm_accel_instance_init(Object *obj) { KVMState *s = KVM_STATE(obj); @@ -3222,6 +3242,14 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, "kvm-shadow-mem", "KVM shadow MMU size"); + + object_class_property_add_bool(oc, "x-precise-tsc", + kvm_get_precise_tsc, + kvm_set_precise_tsc); + + object_class_property_set_description(oc, "x-precise-tsc", + "Use precise tsc kvm API"); + } static const TypeInfo kvm_accel_type = { diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index bb5d5cf497..14eff2b1c9 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -519,6 +519,7 @@ void kvm_init_irq_routing(KVMState *s); bool kvm_kernel_irqchip_allowed(void); bool kvm_kernel_irqchip_required(void); bool kvm_kernel_irqchip_split(void); +bool kvm_has_precise_tsc(void); /** * kvm_arch_irqchip_create: diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 88e8586f8f..d2230d9735 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1460,6 +1460,7 @@ typedef struct CPUX86State { uint64_t tsc_adjust; uint64_t tsc_deadline; uint64_t tsc_aux; + uint64_t tsc_ns_timestamp; uint64_t xcr0; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index a2934dda02..4adb7d6246 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -121,7 +121,6 @@ static int has_xsave; static int has_xcrs; static int has_pit_state2; static int has_exception_payload; - static bool has_msr_mcg_ext_ctl; static struct kvm_cpuid2 *cpuid_cache; @@ -196,31 +195,112 @@ static int kvm_get_tsc(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data = {}; int ret; if (env->tsc_valid) { return 0; } - memset(&msr_data, 0, sizeof(msr_data)); - msr_data.info.nmsrs = 1; - msr_data.entries[0].index = MSR_IA32_TSC; - env->tsc_valid = !runstate_is_running(); + if (kvm_has_precise_tsc()) { + struct kvm_tsc_state tsc_state; - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); - if (ret < 0) { - return ret; + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_TSC_STATE, &tsc_state); + if (ret < 0) { + return ret; + } + + env->tsc = tsc_state.tsc; + + if (tsc_state.flags & KVM_TSC_STATE_TIMESTAMP_VALID) { + env->tsc_ns_timestamp = tsc_state.nsec; + } + + if (tsc_state.flags & KVM_TSC_STATE_TSC_ADJUST_VALID) { + env->tsc_adjust = tsc_state.tsc_adjust; + } + + } else { + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[2]; + } msr_data = { + .info.nmsrs = 1, + .entries[0].index = MSR_IA32_TSC, + }; + + if (has_msr_tsc_adjust) { + msr_data.info.nmsrs++; + msr_data.entries[1].index = MSR_TSC_ADJUST; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + + assert(ret == msr_data.info.nmsrs); + env->tsc = msr_data.entries[0].data; + if (has_msr_tsc_adjust) { + env->tsc_adjust = msr_data.entries[1].data; + } } - assert(ret == 1); - env->tsc = msr_data.entries[0].data; + env->tsc_valid = !runstate_is_running(); return 0; } +static int kvm_set_tsc(CPUState *cs) +{ + int ret; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + if (kvm_has_precise_tsc()) { + struct kvm_tsc_state tsc_state = { + .tsc = env->tsc, + }; + + if (env->tsc_ns_timestamp) { + tsc_state.nsec = env->tsc_ns_timestamp; + tsc_state.flags |= KVM_TSC_STATE_TIMESTAMP_VALID; + } + + if (has_msr_tsc_adjust) { + tsc_state.tsc_adjust = env->tsc_adjust; + tsc_state.flags |= KVM_TSC_STATE_TSC_ADJUST_VALID; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_TSC_STATE, &tsc_state); + if (ret < 0) { + return ret; + } + + } else { + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[2]; + } msr_data = { + .info.nmsrs = 1, + .entries[0].index = MSR_IA32_TSC, + .entries[0].data = env->tsc, + }; + + if (has_msr_tsc_adjust) { + msr_data.info.nmsrs++; + msr_data.entries[1].index = MSR_TSC_ADJUST; + msr_data.entries[1].data = env->tsc_adjust; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + + assert(ret == msr_data.info.nmsrs); + } + return ret; +} + static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg) { kvm_get_tsc(cpu); @@ -1780,6 +1860,13 @@ int kvm_arch_init_vcpu(CPUState *cs) } } + if (kvm_has_precise_tsc()) { + if (!kvm_check_extension(cs->kvm_state, KVM_CAP_PRECISE_TSC)) { + error_report("kvm: Precise TSC is not supported by the host's KVM"); + return -ENOTSUP; + } + } + if (cpu->vmware_cpuid_freq /* Guests depend on 0x40000000 to detect this feature, so only expose * it if KVM exposes leaf 0x40000000. (Conflicts with Hyper-V) */ @@ -2756,9 +2843,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_tsc_aux) { kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux); } - if (has_msr_tsc_adjust) { - kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust); - } if (has_msr_misc_enable) { kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, env->msr_ia32_misc_enable); @@ -2802,7 +2886,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) * for normal writeback. Limit them to reset or full state updates. */ if (level >= KVM_PUT_RESET_STATE) { - kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc); kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr); if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) { @@ -3142,9 +3225,6 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_tsc_aux) { kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0); } - if (has_msr_tsc_adjust) { - kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0); - } if (has_msr_tsc_deadline) { kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0); } @@ -3178,10 +3258,6 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_virt_ssbd) { kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0); } - if (!env->tsc_valid) { - kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0); - env->tsc_valid = !runstate_is_running(); - } #ifdef TARGET_X86_64 if (lm_capable_kernel) { @@ -3385,9 +3461,6 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_TSC_AUX: env->tsc_aux = msrs[i].data; break; - case MSR_TSC_ADJUST: - env->tsc_adjust = msrs[i].data; - break; case MSR_IA32_TSCDEADLINE: env->tsc_deadline = msrs[i].data; break; @@ -3995,6 +4068,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level) if (ret < 0) { return ret; } + + ret = kvm_set_tsc(cpu); + if (ret < 0) { + return ret; + } } ret = kvm_put_tscdeadline_msr(x86_cpu); @@ -4064,6 +4142,12 @@ int kvm_arch_get_registers(CPUState *cs) if (ret < 0) { goto out; } + + ret = kvm_get_tsc(cs); + if (ret < 0) { + goto out; + } + ret = 0; out: cpu_sync_bndcs_hflags(&cpu->env); diff --git a/target/i386/machine.c b/target/i386/machine.c index 233e46bb70..59b1c9be2b 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1359,6 +1359,24 @@ static const VMStateDescription vmstate_msr_tsx_ctrl = { } }; + +static bool tsc_ns_timestamp_needed(void *opaque) +{ + return kvm_has_precise_tsc(); +} + +static const VMStateDescription vmstate_tsc_ns_timestamp = { + .name = "cpu/tsc_ns_timestamp", + .version_id = 1, + .minimum_version_id = 1, + .needed = tsc_ns_timestamp_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.tsc_ns_timestamp, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + + VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -1493,6 +1511,7 @@ VMStateDescription vmstate_x86_cpu = { #endif #ifdef CONFIG_KVM &vmstate_nested_state, + &vmstate_tsc_ns_timestamp, #endif &vmstate_msr_tsx_ctrl, NULL -- 2.26.2