On 25/01/2017 13:55, Marcelo Tosatti wrote: > On Wed, Jan 25, 2017 at 01:30:49PM +0100, Paolo Bonzini wrote: >> >> >> On 24/01/2017 18:09, Marcelo Tosatti wrote: >>> Add a hypercall to retrieve the host realtime clock >>> and the TSC value used to calculate that clock read. >>> >>> Used to implement clock synchronization between >>> host and guest. >>> >>> Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> >>> >>> --- >>> Documentation/virtual/kvm/hypercalls.txt | 33 ++++++++++++++++++++++++ >>> arch/x86/include/uapi/asm/kvm_para.h | 9 ++++++ >>> arch/x86/kvm/x86.c | 41 +++++++++++++++++++++++++++++++ >>> include/uapi/linux/kvm_para.h | 3 ++ >>> 4 files changed, 86 insertions(+) >>> >>> v2: improve documentation (Radim) >>> change hypercall name to KVM_HC_CLOCK_PAIRING (Radim) >>> increase padding size >>> >>> Index: kvm-ptpdriver/arch/x86/include/uapi/asm/kvm_para.h >>> =================================================================== >>> --- kvm-ptpdriver.orig/arch/x86/include/uapi/asm/kvm_para.h 2017-01-13 16:43:11.947240575 -0200 >>> +++ kvm-ptpdriver/arch/x86/include/uapi/asm/kvm_para.h 2017-01-13 16:43:25.038258187 -0200 >>> @@ -50,6 +50,15 @@ >>> __u32 pad[11]; >>> }; >>> >>> +#define KVM_CLOCK_PAIRING_WALLCLOCK 0 >>> +struct kvm_clock_offset { >> >> The struct still has the old name. >> >>> + __s64 sec; >>> + __s64 nsec; >>> + __u64 tsc; >>> + __u32 flags; >>> + __u32 pad[9]; >>> +}; > > That was on purpose: "to pair" is the operation. > > "struct kvm_clock_pairing" makes no sense to me > (because pairing is an "action"). > > (if you'd prefer that name, let me know). A "pairing" is a union of two things, in this case a sec/nsec value and a TSC value. (I had to look it up in the dictionary though :)). >>> #define KVM_STEAL_ALIGNMENT_BITS 5 >>> #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) >>> #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) >>> Index: kvm-ptpdriver/Documentation/virtual/kvm/hypercalls.txt >>> =================================================================== >>> --- kvm-ptpdriver.orig/Documentation/virtual/kvm/hypercalls.txt 2017-01-13 16:43:11.947240575 -0200 >>> +++ kvm-ptpdriver/Documentation/virtual/kvm/hypercalls.txt 2017-01-13 16:43:25.038258187 -0200 >>> @@ -81,3 +81,36 @@ >>> same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, >>> specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) >>> is used in the hypercall for future use. >>> + >>> + >>> +6. KVM_HC_CLOCK_PAIRING >>> +------------------------ >>> +Architecture: x86 >>> +Status: active >>> +Purpose: Hypercall used to synchronize host and guest clocks. >>> +Usage: >>> + >>> +a0: guest physical address where host copies >>> +"struct kvm_clock_offset" structure. >>> + >>> +a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0) >>> +is supported (hosts CLOCK_REALTIME clock). >>> + >>> + struct kvm_clock_offset { >> >> Name of the struct not adjusted. >>> + __s64 sec; >>> + __s64 nsec; >>> + __u64 tsc; >>> + __u32 flags; >>> + __u32 pad[9]; >> >> I'd just leave one element of padding and put the struct on the stack, >> since we have no reason to think we'll need a lot of padding. > > Well you don't know: we could implement it in case of HPET clocksource > hosts. But then the guest wouldn't have a TSC value to use, would it? Also, how would it use more padding? Paolo >>> + }; >>> + >>> + Where: >>> + * sec: seconds from clock_type clock. >>> + * nsec: nanoseconds from clock_type clock. >>> + * tsc: TSC value used to calculate sec/nsec pair >>> + (this hypercall only works when host uses TSC clocksource). >> >> This is already mentioned below, and it's not clear that tsc is a guest >> TSC value. > > Ok fixed. > >>> + * flags: flags, unused (0) at the moment. >>> + >>> +Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, >>> +or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. >>> + >>> Index: kvm-ptpdriver/arch/x86/kvm/x86.c >>> =================================================================== >>> --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c 2017-01-13 16:43:20.898252596 -0200 >>> +++ kvm-ptpdriver/arch/x86/kvm/x86.c 2017-01-13 16:43:25.041258191 -0200 >>> @@ -6119,6 +6119,44 @@ >>> } >>> EXPORT_SYMBOL_GPL(kvm_emulate_halt); >>> >>> +static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, >>> + unsigned long clock_type) >>> +{ >>> + struct kvm_clock_offset *clock_offset; >>> + struct timespec ts; >>> + cycle_t cycle; >>> + int ret; >>> + >>> + if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) >>> + return -KVM_EOPNOTSUPP; >>> + >>> + if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) >>> + return -KVM_EOPNOTSUPP; >> >> Already checked in kvm_get_walltime_and_clockread. > > Fixed. > >>> + clock_offset = kzalloc(sizeof(struct kvm_clock_offset), GFP_KERNEL); >>> + if (clock_offset == NULL) >>> + return -KVM_ENOMEM; >>> + >>> + if (kvm_get_walltime_and_clockread(&ts, &cycle) == false) { >>> + kfree(clock_offset); >>> + return -KVM_EOPNOTSUPP; >>> + } >>> + clock_offset->sec = ts.tv_sec; >>> + clock_offset->nsec = ts.tv_nsec; >>> + clock_offset->tsc = kvm_read_l1_tsc(vcpu, cycle); >>> + clock_offset->flags = 0; >>> + >>> + ret = 0; >>> + if (kvm_write_guest(vcpu->kvm, paddr, clock_offset, >>> + sizeof(struct kvm_clock_offset))) >>> + ret = -KVM_EFAULT; >>> + >>> + kfree(clock_offset); >>> + >>> + return ret; >>> +} >>> + >>> /* >>> * kvm_pv_kick_cpu_op: Kick a vcpu. >>> * >>> @@ -6183,6 +6221,9 @@ >>> kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); >>> ret = 0; >>> break; >>> + case KVM_HC_CLOCK_PAIRING: >>> + ret = kvm_pv_clock_pairing(vcpu, a0, a1); >>> + break; >>> default: >>> ret = -KVM_ENOSYS; >>> break; >>> Index: kvm-ptpdriver/include/uapi/linux/kvm_para.h >>> =================================================================== >>> --- kvm-ptpdriver.orig/include/uapi/linux/kvm_para.h 2017-01-13 16:43:11.947240575 -0200 >>> +++ kvm-ptpdriver/include/uapi/linux/kvm_para.h 2017-01-13 16:43:25.042258192 -0200 >>> @@ -14,6 +14,8 @@ >>> #define KVM_EFAULT EFAULT >>> #define KVM_E2BIG E2BIG >>> #define KVM_EPERM EPERM >>> +#define KVM_EOPNOTSUPP EOPNOTSUPP >>> +#define KVM_ENOMEM ENOMEM >> >> This file could potentially be used across multiple architectures (it's >> not in arch/x86/include/uapi/asm), but EOPNOTSUPP is not the same on >> all systems. Better use "95" which is the value it has on x86. > > Fixed. > >> All this gives: >> >> diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt >> index 55eb3e1ca494..cd35de84999f 100644 >> --- a/Documentation/virtual/kvm/hypercalls.txt >> +++ b/Documentation/virtual/kvm/hypercalls.txt >> @@ -94,9 +94,9 @@ a0: guest physical address where host copies >> "struct kvm_clock_offset" structure. >> >> a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0) >> -is supported (hosts CLOCK_REALTIME clock). >> +is supported (corresponding to the host's CLOCK_REALTIME clock). >> >> - struct kvm_clock_offset { >> + struct kvm_clock_pairing { >> __s64 sec; >> __s64 nsec; >> __u64 tsc; >> @@ -107,10 +107,12 @@ is supported (hosts CLOCK_REALTIME clock). >> Where: >> * sec: seconds from clock_type clock. >> * nsec: nanoseconds from clock_type clock. >> - * tsc: TSC value used to calculate sec/nsec pair >> - (this hypercall only works when host uses TSC clocksource). >> + * tsc: guest TSC value used to calculate sec/nsec pair >> * flags: flags, unused (0) at the moment. >> >> +The hypercall lets a guest compute a precise timestamp across >> +host and guest. The guest can use the returned TSC value to >> +compute the CLOCK_REALTIME for its clock, at the same instant. >> + >> Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, >> or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. >> - >> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h >> index ce7b411e28bc..3641d8b7dba9 100644 >> --- a/arch/x86/include/uapi/asm/kvm_para.h >> +++ b/arch/x86/include/uapi/asm/kvm_para.h >> @@ -51,12 +51,12 @@ struct kvm_steal_time { >> }; >> >> #define KVM_CLOCK_PAIRING_WALLCLOCK 0 >> -struct kvm_clock_offset { >> +struct kvm_clock_pairing { >> __s64 sec; >> __s64 nsec; >> __u64 tsc; >> __u32 flags; >> - __u32 pad[9]; >> + __u32 pad[1]; >> }; >> >> #define KVM_STEAL_ALIGNMENT_BITS 5 >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 4ecc3145ea4b..09e5d31dac98 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -6149,9 +6149,9 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) >> EXPORT_SYMBOL_GPL(kvm_emulate_halt); >> >> static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, >> - unsigned long clock_type) >> + unsigned long clock_type) >> { >> - struct kvm_clock_offset *clock_offset; >> + struct kvm_clock_pairing clock_pairing; >> struct timespec ts; >> cycle_t cycle; >> int ret; >> @@ -6159,30 +6159,19 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, >> if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) >> return -KVM_EOPNOTSUPP; >> >> - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) >> - return -KVM_EOPNOTSUPP; >> - >> - clock_offset = kzalloc(sizeof(struct kvm_clock_offset), GFP_KERNEL); >> - if (clock_offset == NULL) >> - return -KVM_ENOMEM; >> - >> - if (kvm_get_walltime_and_clockread(&ts, &cycle) == false) { >> - kfree(clock_offset); >> + if (kvm_get_walltime_and_clockread(&ts, &cycle) == false) >> return -KVM_EOPNOTSUPP; >> - } >> >> - clock_offset->sec = ts.tv_sec; >> - clock_offset->nsec = ts.tv_nsec; >> - clock_offset->tsc = kvm_read_l1_tsc(vcpu, cycle); >> - clock_offset->flags = 0; >> + clock_pairing.sec = ts.tv_sec; >> + clock_pairing.nsec = ts.tv_nsec; >> + clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); >> + clock_pairing.flags = 0; >> >> ret = 0; >> - if (kvm_write_guest(vcpu->kvm, paddr, clock_offset, >> - sizeof(struct kvm_clock_offset))) >> + if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, >> + sizeof(struct kvm_clock_pairing))) >> ret = -KVM_EFAULT; >> >> - kfree(clock_offset); >> - >> return ret; >> } >> >> diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h >> index 68da60e7f519..c72128527377 100644 >> --- a/include/uapi/linux/kvm_para.h >> +++ b/include/uapi/linux/kvm_para.h >> @@ -14,7 +14,7 @@ >> #define KVM_EFAULT EFAULT >> #define KVM_E2BIG E2BIG >> #define KVM_EPERM EPERM >> -#define KVM_EOPNOTSUPP EOPNOTSUPP >> +#define KVM_EOPNOTSUPP 95 >> #define KVM_ENOMEM ENOMEM >> >> #define KVM_HC_VAPIC_POLL_IRQ 1 >> >> Okay? > > Just not the padding please, the rest is fine. > >> >> Paolo >> >>> #define KVM_HC_VAPIC_POLL_IRQ 1 >>> #define KVM_HC_MMU_OP 2 >>> @@ -23,6 +25,7 @@ >>> #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 >>> #define KVM_HC_MIPS_EXIT_VM 7 >>> #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 >>> +#define KVM_HC_CLOCK_PAIRING 9 >>> >>> /* >>> * hypercalls use architecture specific >>> >>> >>>