On Wed, 2024-04-10 at 09:52 +0000, Jack Allister wrote: > > +static int kvm_vcpu_ioctl_get_clock_guest(struct kvm_vcpu *v, void __user *argp) > +{ > + struct pvclock_vcpu_time_info *vcpu_pvti = &v->arch.hv_clock; > + struct pvclock_vcpu_time_info local_pvti = { 0 }; > + struct kvm_arch *ka = &v->kvm->arch; > + uint64_t host_tsc, guest_tsc; > + bool use_master_clock; > + uint64_t kernel_ns; > + unsigned int seq; > + > + /* > + * CLOCK_MONOTONIC_RAW is not suitable for GET/SET API, > + * see kvm_vcpu_ioctl_set_clock_guest equivalent comment. > + */ > + if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC)) > + return -EINVAL; > + > + /* > + * Querying needs to be performed in a seqcount loop as it's possible > + * another vCPU has triggered an update of the master clock. If so we > + * should store the host TSC & time at this point. > + */ > + do { > + seq = read_seqcount_begin(&ka->pvclock_sc); > + use_master_clock = ka->use_master_clock; > + if (use_master_clock) { > + host_tsc = ka->master_cycle_now; > + kernel_ns = ka->master_kernel_ns; > + } > + } while (read_seqcount_retry(&ka->pvclock_sc, seq)); > + > + if (!use_master_clock) > + return -EINVAL; > + > + /* > + * It's possible that this vCPU doesn't have a HVCLOCK configured > + * but the other vCPUs may. If this is the case calculate based > + * upon the time gathered in the seqcount but do not update the > + * vCPU specific PVTI. If we have one, then use that. > + */ > + if (!vcpu_pvti->tsc_timestamp && !vcpu_pvti->system_time) { || !kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu) Otherwise you may be using out of date information. > + guest_tsc = kvm_read_l1_tsc(v, host_tsc); > + > + local_pvti.tsc_timestamp = guest_tsc; > + local_pvti.system_time = kernel_ns + ka->kvmclock_offset; This is missing the scale information in tsc_to_system_mul and tsc_shift. Is there a reason we can't just call kvm_guest_time_update() from here? (I think we looked at using it for the *SET* function, but did we look at doing so for GET?) > + } else { > + local_pvti = *vcpu_pvti; > + } > + > + if (copy_to_user(argp, &local_pvti, sizeof(local_pvti))) > + return -EFAULT; > + > + return 0; > +} > + > +static int kvm_vcpu_ioctl_set_clock_guest(struct kvm_vcpu *v, void __user *argp) > +{ > + struct pvclock_vcpu_time_info dummy_pvti; > + struct pvclock_vcpu_time_info orig_pvti; > + struct kvm *kvm = v->kvm; > + struct kvm_arch *ka = &kvm->arch; > + uint64_t clock_orig, clock_dummy; > + uint64_t host_tsc, guest_tsc; > + int64_t kernel_ns; > + int64_t correction; > + int rc = 0; > + > + /* > + * If a constant TSC is not provided by the host then KVM will > + * be using CLOCK_MONOTONIC_RAW, correction using this is not > + * precise and as such we can never sync to the precision we > + * are requiring. > + */ > + if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC)) > + return -EINVAL; > + > + if (copy_from_user(&orig_pvti, argp, sizeof(orig_pvti))) > + return -EFAULT; > + > + kvm_hv_request_tsc_page_update(kvm); > + kvm_start_pvclock_update(kvm); > + pvclock_update_vm_gtod_copy(kvm); > + > + if (!ka->use_master_clock) { > + rc = -EINVAL; > + goto out; > + } > + > + /* > + * Sample the kernel time and host TSC at a singular point. > + * We then calculate the guest TSC using this exact point in time. > + * From here we can then determine the delta using the > + * PV time info requested from the user and what we currently have > + * using the fixed point in time. This delta is then used as a > + * correction factor to subtract from the clock offset. > + */ > + if (!kvm_get_time_and_clockread(&kernel_ns, &host_tsc)) { > + rc = -EFAULT; > + goto out; > + } > + > + guest_tsc = kvm_read_l1_tsc(v, host_tsc); > + > + dummy_pvti = orig_pvti; > + dummy_pvti.tsc_timestamp = guest_tsc; > + dummy_pvti.system_time = kernel_ns + ka->kvmclock_offset; > + > + clock_orig = __pvclock_read_cycles(&orig_pvti, guest_tsc); > + clock_dummy = __pvclock_read_cycles(&dummy_pvti, guest_tsc); > In both cases here you're using the scale information directly from userspace... that you forgot to fill in for them earlier. I think we should we have a sanity check on it, to ensure that it matches the TSC frequency of the vCPU? > + correction = clock_orig - clock_dummy; > + ka->kvmclock_offset += correction; > + > +out: > + kvm_end_pvclock_update(kvm); > + return rc; > +} > + > long kvm_arch_vcpu_ioctl(struct file *filp, > unsigned int ioctl, unsigned long arg) > { > @@ -6239,6 +6357,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, > srcu_read_unlock(&vcpu->kvm->srcu, idx); > break; > } > + case KVM_SET_CLOCK_GUEST: > + r = kvm_vcpu_ioctl_set_clock_guest(vcpu, argp); > + break; > + case KVM_GET_CLOCK_GUEST: > + r = kvm_vcpu_ioctl_get_clock_guest(vcpu, argp); > + break; > #ifdef CONFIG_KVM_HYPERV > case KVM_GET_SUPPORTED_HV_CPUID: > r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp); > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 2190adbe3002..0d306311e4d6 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1548,4 +1548,7 @@ struct kvm_create_guest_memfd { > __u64 reserved[6]; > }; > > +#define KVM_SET_CLOCK_GUEST _IOW(KVMIO, 0xd5, struct pvclock_vcpu_time_info) > +#define KVM_GET_CLOCK_GUEST _IOR(KVMIO, 0xd6, struct pvclock_vcpu_time_info) > + > #endif /* __LINUX_KVM_H */
Attachment:
smime.p7s
Description: S/MIME cryptographic signature