To date, VMM-directed TSC synchronization and migration has been messy. KVM has some baked-in heuristics around TSC writes to infer if the VMM is attempting to synchronize. This is problematic, as it depends on the host writing to the guest's TSC within 1 second of the last write. A much cleaner approach to configuring the guest's views of the TSC is to simply migrate the TSC offset for every vCPU. Offsets are idempotent, and thus are not subject to change depending on when the VMM actually reads the values from KVM. The VMM can then read the TSC once to capture the instant at which the guest's TSCs are paused. Implement the KVM_{GET,SET}_SYSTEM_COUNTER_STATE ioctls and advertise KVM_CAP_SYSTEM_COUNTER_STATE to userspace. Reviewed-by: David Matlack <dmatlack@xxxxxxxxxx> Signed-off-by: Oliver Upton <oupton@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/kvm.h | 8 ++++ arch/x86/kvm/x86.c | 70 +++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55efbacfc244..8768173f614c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1018,6 +1018,7 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_write; u32 last_tsc_khz; + u64 last_tsc_offset; u64 cur_tsc_nsec; u64 cur_tsc_write; u64 cur_tsc_offset; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0662f644aad9..60ad6b9ebcd6 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -490,4 +490,12 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +/* for KVM_CAP_SYSTEM_COUNTER_STATE */ +struct kvm_system_counter_state { + __u32 flags; + __u32 pad; + __u64 tsc_offset; + __u64 rsvd[6]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 61069995a592..bb3ecb5cd548 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2332,6 +2332,11 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); +static u64 kvm_vcpu_read_tsc_offset(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.l1_tsc_offset; +} + static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { vcpu->arch.l1_tsc_offset = offset; @@ -2377,6 +2382,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc, kvm->arch.last_tsc_nsec = ns; kvm->arch.last_tsc_write = tsc; kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; + kvm->arch.last_tsc_offset = offset; vcpu->arch.last_guest_tsc = tsc; @@ -2485,6 +2491,44 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) adjust_tsc_offset_guest(vcpu, adjustment); } +static int kvm_vcpu_get_system_counter_state(struct kvm_vcpu *vcpu, + struct kvm_system_counter_state *state) +{ + if (state->flags) + return -EINVAL; + + state->tsc_offset = kvm_vcpu_read_tsc_offset(vcpu); + return 0; +} + +static int kvm_vcpu_set_system_counter_state(struct kvm_vcpu *vcpu, + struct kvm_system_counter_state *state) +{ + struct kvm *kvm = vcpu->kvm; + u64 offset, tsc, ns; + unsigned long flags; + bool matched; + + if (state->flags) + return -EINVAL; + + offset = state->tsc_offset; + + raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); + + matched = (vcpu->arch.virtual_tsc_khz && + kvm->arch.last_tsc_khz == vcpu->arch.virtual_tsc_khz && + kvm->arch.last_tsc_offset == offset); + + tsc = kvm_scale_tsc(vcpu, rdtsc()) + offset; + ns = get_kvmclock_base_ns(); + + __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched); + raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); + + return 0; +} + #ifdef CONFIG_X86_64 static u64 read_tsc(void) @@ -3912,6 +3956,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SGX_ATTRIBUTE: #endif case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM: + case KVM_CAP_SYSTEM_COUNTER_STATE: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -5200,6 +5245,31 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } #endif + case KVM_GET_SYSTEM_COUNTER_STATE: { + struct kvm_system_counter_state state; + + r = -EFAULT; + if (copy_from_user(&state, argp, sizeof(state))) + goto out; + + r = kvm_vcpu_get_system_counter_state(vcpu, &state); + if (r) + goto out; + if (copy_to_user(argp, &state, sizeof(state))) + r = -EFAULT; + + break; + } + case KVM_SET_SYSTEM_COUNTER_STATE: { + struct kvm_system_counter_state state; + + r = -EFAULT; + if (copy_from_user(&state, argp, sizeof(state))) + goto out; + + r = kvm_vcpu_set_system_counter_state(vcpu, &state); + break; + } default: r = -EINVAL; } -- 2.32.0.rc1.229.g3e70b5a671-goog