On Thu, Aug 09, 2018 at 11:43:03AM -0300, Marcelo Tosatti wrote: > Commit 0bc48bea36d178aea9d switches the order of operations to avoid the > convertion > > TSC (without frequency correction) -> > system_timestamp (with frequency correction), > > which might cause a time jump. > > However, it leaves any other masterclock update unsafe, which includes, > at the moment: > > * HV_X64_MSR_REFERENCE_TSC MSR write. > * TSC writes. > * Host suspend/resume. > > Avoid the time jump issue by using frequency uncorrected > CLOCK_MONOTONIC_RAW clock. > > Its the guests time keeping software responsability > to track and correct a reference clock such as UTC. > > Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> > > --- > arch/x86/kvm/x86.c | 81 ++++++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 59 insertions(+), 22 deletions(-) > > Index: linux-2.6.git/arch/x86/kvm/x86.c > =================================================================== > --- linux-2.6.git.orig/arch/x86/kvm/x86.c 2018-08-09 10:32:51.151942374 -0300 > +++ linux-2.6.git/arch/x86/kvm/x86.c 2018-08-09 10:33:27.323993052 -0300 > @@ -1240,20 +1240,25 @@ > } > > #ifdef CONFIG_X86_64 > +struct pvclock_clock { > + int vclock_mode; > + u64 cycle_last; > + u64 mask; > + u32 mult; > + u32 shift; > +}; > + > struct pvclock_gtod_data { > seqcount_t seq; > > - struct { /* extract of a clocksource struct */ > - int vclock_mode; > - u64 cycle_last; > - u64 mask; > - u32 mult; > - u32 shift; > - } clock; > + struct pvclock_clock clock; /* extract of a clocksource struct */ > + struct pvclock_clock raw_clock; /* extract of a clocksource struct */ > > + u64 boot_ns_raw; > u64 boot_ns; > u64 nsec_base; > u64 wall_time_sec; > + u64 monotonic_raw_nsec; > }; > > static struct pvclock_gtod_data pvclock_gtod_data; > @@ -1261,10 +1266,20 @@ > static void update_pvclock_gtod(struct timekeeper *tk) > { > struct pvclock_gtod_data *vdata = &pvclock_gtod_data; > - u64 boot_ns; > + u64 boot_ns, boot_ns_raw; > > boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); > > + /* > + * FIXME: tk->offs_boot should be converted to CLOCK_MONOTONIC_RAW > + * interval (that is, without frequency adjustment for that interval). > + * > + * Lack of this fix can cause system_timestamp to not be equal to > + * CLOCK_MONOTONIC_RAW (which happen if the host uses > + * suspend/resume). > + */ > + boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot)); > + > write_seqcount_begin(&vdata->seq); > > /* copy pvclock gtod data */ > @@ -1274,11 +1289,20 @@ > vdata->clock.mult = tk->tkr_mono.mult; > vdata->clock.shift = tk->tkr_mono.shift; > > + vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode; > + vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last; > + vdata->raw_clock.mask = tk->tkr_raw.mask; > + vdata->raw_clock.mult = tk->tkr_raw.mult; > + vdata->raw_clock.shift = tk->tkr_raw.shift; > + > vdata->boot_ns = boot_ns; > vdata->nsec_base = tk->tkr_mono.xtime_nsec; > > vdata->wall_time_sec = tk->xtime_sec; > > + vdata->boot_ns_raw = boot_ns_raw; > + vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec; > + > write_seqcount_end(&vdata->seq); > } > #endif > @@ -1293,6 +1317,20 @@ > kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); > } > > +/* > +guest_walltime = kvm_get_wallclock + kvmclock_read(). > +FIX: > +E' so' colocar em kvm_get_wallclock, o tempo de boot do host MAIS > +quanto SERIA corrigido no system_timestamp. > +E' so' salvar a diferenca entre system_timestampMONOTONIC > +e system_timestampMONOTONIC_RAW quando se atualiza > +o masterclock. > + > +Entao por isso que se utiliza "boot offset" > +(para adicionar tempo de suspend), portanto > +tambem queremos isso no caso do RAW. > +*/ Ouch, leftover portuguese comment. Resending.