On Tue, Dec 28, 2010 at 07:38:18PM -1000, Zachary Amsden wrote: > During a host suspend, TSC may go backwards, which KVM interprets > as an unstable TSC. Technically, KVM should not be marking the > TSC unstable, which causes the TSC clocksource to go bad, but > should be adjusting the TSC offsets in such a case. > > Signed-off-by: Zachary Amsden <zamsden@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/x86.c | 66 +++++++++++++++++++++++++++++++++++--- > 2 files changed, 61 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 9e6fe39..63a82b0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -386,6 +386,7 @@ struct kvm_vcpu_arch { > u64 last_kernel_ns; > u64 last_tsc_nsec; > u64 last_tsc_write; > + u64 tsc_offset_adjustment; > bool tsc_catchup; > > bool nmi_pending; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index b9118f4..b509c01 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2042,12 +2042,20 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > } > > kvm_x86_ops->vcpu_load(vcpu, cpu); > + > + /* Apply any externally detected TSC adjustments (due to suspend) */ > + if (unlikely(vcpu->arch.tsc_offset_adjustment)) { > + kvm_x86_ops->adjust_tsc_offset(vcpu, > + vcpu->arch.tsc_offset_adjustment); > + vcpu->arch.tsc_offset_adjustment = 0; > + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); > + } > if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { > /* Make sure TSC doesn't go backwards */ > s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : > native_read_tsc() - vcpu->arch.last_host_tsc; > if (tsc_delta < 0) > - mark_tsc_unstable("KVM discovered backwards TSC"); > + WARN_ON(!check_tsc_unstable()); > if (check_tsc_unstable()) { > kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); > vcpu->arch.tsc_catchup = 1; > @@ -5778,14 +5786,60 @@ int kvm_arch_hardware_enable(void *garbage) > { > struct kvm *kvm; > struct kvm_vcpu *vcpu; > - int i; > + int i, ret; > + u64 local_tsc; > + u64 max_tsc = 0; > + bool stable, backwards_tsc = false; > > kvm_shared_msr_cpu_online(); > - list_for_each_entry(kvm, &vm_list, vm_list) > - kvm_for_each_vcpu(i, vcpu, kvm) > - if (vcpu->cpu == smp_processor_id()) > + local_tsc = native_read_tsc(); > + stable = !check_tsc_unstable(); > + ret = kvm_x86_ops->hardware_enable(garbage); > + if (ret) > + return ret; > + > + list_for_each_entry(kvm, &vm_list, vm_list) { > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (!stable && vcpu->cpu == smp_processor_id()) > kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); > - return kvm_x86_ops->hardware_enable(garbage); > + if (stable && vcpu->arch.last_host_tsc > local_tsc) { > + backwards_tsc = true; > + if (vcpu->arch.last_host_tsc > max_tsc) > + max_tsc = vcpu->arch.last_host_tsc; > + } > + } > + } > + > + /* > + * Sometimes, reliable TSCs go backwards. This happens > + * on platforms that reset TSC during suspend or hibernate > + * actions, but maintain synchronization. We must compensate. > + * Unfortunately, we can't bring the TSCs fully up to date > + * with real time. The reason is that we aren't yet far > + * enough into CPU bringup that we know how much real time > + * has actually elapsed; our helper function, get_kernel_ns() > + * will be using boot variables that haven't been updated yet. > + * We simply find the maximum observed TSC above, then record > + * the adjustment to TSC in each VCPU. When the VCPU later > + * gets loaded, the adjustment will be applied. Note that we > + * accumulate adjustments, in case multiple suspend cycles > + * happen before the VCPU gets a chance to run again. > + * > + * Note that unreliable TSCs will be compensated already by > + * the logic in vcpu_load, which sets the TSC to catchup mode. > + * This will catchup all VCPUs to real time, but cannot > + * guarantee synchronization. > + */ > + if (backwards_tsc) { > + u64 delta_cyc = max_tsc - local_tsc; > + list_for_each_entry(kvm, &vm_list, vm_list) > + kvm_for_each_vcpu(i, vcpu, kvm) { > + vcpu->arch.tsc_offset_adjustment += delta_cyc; > + vcpu->arch.last_host_tsc = 0; > + } > + } > + > + return 0; > } Wouldnt it be simpler to use cyc2ns_offset (or something equivalent)? In any case, you forgot to compare smp_processor_id. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html