* Marcelo Tosatti (mtosatti@xxxxxxxxxx) wrote: > This patch, relative to pre-copy migration codepath, > measures the time between vm_stop() and pre_save(), > which includes copying the remaining RAM to destination, > and advances the clock by that amount. > > In a VM with 5 seconds downtime, this reduces the guest > clock difference on destination from 5s to 0.2s. > > Tested with Linux and Windows 2012 R2 guests with -cpu XXX,+hv-time. One thing that bothers me is that it's only this clock that's getting corrected; doesn't it cause things to get upset when one clock moves and the others dont? Shouldn't the pause delay be recorded somewhere architecturally independent and then be a thing that kvm-clock happens to use and other clocks might as well? Dave > Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> > > --- > > v2: use subsection (Juan Quintela) > fix older machine types support > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > index 0f75dd3..a2a02ac 100644 > --- a/hw/i386/kvm/clock.c > +++ b/hw/i386/kvm/clock.c > @@ -22,9 +22,11 @@ > #include "kvm_i386.h" > #include "hw/sysbus.h" > #include "hw/kvm/clock.h" > +#include "migration/migration.h" > > #include <linux/kvm.h> > #include <linux/kvm_para.h> > +#include <time.h> > > #define TYPE_KVM_CLOCK "kvmclock" > #define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK) > @@ -35,7 +37,13 @@ typedef struct KVMClockState { > /*< public >*/ > > uint64_t clock; > + uint64_t ns; > bool clock_valid; > + > + uint64_t advance_clock; > + struct timespec t_aftervmstop; > + > + bool adv_clock_enabled; > } KVMClockState; > > struct pvclock_vcpu_time_info { > @@ -100,6 +108,11 @@ static void kvmclock_vm_state_change(void *opaque, int running, > s->clock = time_at_migration; > } > > + if (s->advance_clock && s->clock + s->advance_clock > s->clock) { > + s->clock += s->advance_clock; > + s->advance_clock = 0; > + } > + > data.clock = s->clock; > ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); > if (ret < 0) { > @@ -135,6 +148,18 @@ static void kvmclock_vm_state_change(void *opaque, int running, > abort(); > } > s->clock = data.clock; > + /* > + * Transition from VM-running to VM-stopped via migration? > + * Record when the VM was stopped. > + */ > + > + if (state == RUN_STATE_FINISH_MIGRATE && > + !migration_in_postcopy(migrate_get_current())) { > + clock_gettime(CLOCK_MONOTONIC, &s->t_aftervmstop); > + } else { > + s->t_aftervmstop.tv_sec = 0; > + s->t_aftervmstop.tv_nsec = 0; > + } > > /* > * If the VM is stopped, declare the clock state valid to > @@ -152,6 +177,77 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) > qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); > } > > +static uint64_t clock_delta(struct timespec *before, struct timespec *after) > +{ > + if (before->tv_sec > after->tv_sec || > + (before->tv_sec == after->tv_sec && > + before->tv_nsec > after->tv_nsec)) { > + fprintf(stderr, "clock_delta failed: before=(%ld sec, %ld nsec)," > + "after=(%ld sec, %ld nsec)\n", before->tv_sec, > + before->tv_nsec, after->tv_sec, after->tv_nsec); > + abort(); > + } > + > + return (after->tv_sec - before->tv_sec) * 1000000000ULL + > + after->tv_nsec - before->tv_nsec; > +} > + > +static void kvmclock_pre_save(void *opaque) > +{ > + KVMClockState *s = opaque; > + struct timespec now; > + uint64_t ns; > + > + if (s->t_aftervmstop.tv_sec == 0) { > + return; > + } > + > + clock_gettime(CLOCK_MONOTONIC, &now); > + > + ns = clock_delta(&s->t_aftervmstop, &now); > + > + /* > + * Linux guests can overflow if time jumps > + * forward in large increments. > + * Cap maximum adjustment to 10 minutes. > + */ > + ns = MIN(ns, 600000000000ULL); > + > + if (s->clock + ns > s->clock) { > + s->ns = ns; > + } > +} > + > +static int kvmclock_post_load(void *opaque, int version_id) > +{ > + KVMClockState *s = opaque; > + > + /* save the value from incoming migration */ > + s->advance_clock = s->ns; > + > + return 0; > +} > + > +static bool kvmclock_ns_needed(void *opaque) > +{ > + KVMClockState *s = opaque; > + > + return s->adv_clock_enabled; > +} > + > +static const VMStateDescription kvmclock_advance_ns = { > + .name = "kvmclock/advance_ns", > + .version_id = 1, > + .minimum_version_id = 1, > + .needed = kvmclock_ns_needed, > + .pre_save = kvmclock_pre_save, > + .post_load = kvmclock_post_load, > + .fields = (VMStateField[]) { > + VMSTATE_UINT64(ns, KVMClockState), > + VMSTATE_END_OF_LIST() > + } > +}; > + > static const VMStateDescription kvmclock_vmsd = { > .name = "kvmclock", > .version_id = 1, > @@ -159,15 +255,25 @@ static const VMStateDescription kvmclock_vmsd = { > .fields = (VMStateField[]) { > VMSTATE_UINT64(clock, KVMClockState), > VMSTATE_END_OF_LIST() > + }, > + .subsections = (const VMStateDescription * []) { > + &kvmclock_advance_ns, > + NULL > } > }; > > +static Property kvmclock_properties[] = { > + DEFINE_PROP_BOOL("advance_clock", KVMClockState, adv_clock_enabled, true), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > static void kvmclock_class_init(ObjectClass *klass, void *data) > { > DeviceClass *dc = DEVICE_CLASS(klass); > > dc->realize = kvmclock_realize; > dc->vmsd = &kvmclock_vmsd; > + dc->props = kvmclock_properties; > } > > static const TypeInfo kvmclock_info = { > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 98dc772..243352e 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -370,6 +370,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); > #define PC_COMPAT_2_7 \ > HW_COMPAT_2_7 \ > {\ > + .driver = "kvmclock",\ > + .property = "advance_clock",\ > + .value = "off",\ > + },\ > + {\ > .driver = TYPE_X86_CPU,\ > .property = "l3-cache",\ > .value = "off",\ > -- Dr. David Alan Gilbert / dgilbert@xxxxxxxxxx / Manchester, UK -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html