On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: > On 08.01.2014 10:40, Vadim Rozenfeld wrote: > > On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: > >> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: > >>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: > >>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: > >>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: > >>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: > >>>>>>> Signed-off: Peter Lieven <pl@xxxxxxx> > >>>>>>> Signed-off: Gleb Natapov <gleb@xxxxxxxxxx> > >>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@xxxxxxxxxx> > >>>>>>> > >>>>>>> v1 -> v2 > >>>>>>> 1. mark TSC page dirty as suggested by > >>>>>>> Eric Northup <digitaleric@xxxxxxxxxx> and Gleb > >>>>>>> 2. disable local irq when calling get_kernel_ns, > >>>>>>> as it was done by Peter Lieven <pl@xxxxxxxxx> > >>>>>>> 3. move check for TSC page enable from second patch > >>>>>>> to this one. > >>>>>>> > >>>>>>> --- > >>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ > >>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ > >>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- > >>>>>>> include/uapi/linux/kvm.h | 1 + > >>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) > >>>>>>> > >>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > >>>>>>> index ae5d783..2fd0753 100644 > >>>>>>> --- a/arch/x86/include/asm/kvm_host.h > >>>>>>> +++ b/arch/x86/include/asm/kvm_host.h > >>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { > >>>>>>> /* fields used by HYPER-V emulation */ > >>>>>>> u64 hv_guest_os_id; > >>>>>>> u64 hv_hypercall; > >>>>>>> + u64 hv_ref_count; > >>>>>>> + u64 hv_tsc_page; > >>>>>>> > >>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT > >>>>>>> int audit_point; > >>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>> index b8f1c01..462efe7 100644 > >>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h > >>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h > >>>>>>> @@ -28,6 +28,9 @@ > >>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ > >>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) > >>>>>>> > >>>>>>> +/* A partition's reference time stamp counter (TSC) page */ > >>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > >>>>>>> + > >>>>>>> /* > >>>>>>> * There is a single feature flag that signifies the presence of the MSR > >>>>>>> * that can be used to retrieve both the local APIC Timer frequency as > >>>>>>> @@ -198,6 +201,9 @@ > >>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ > >>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) > >>>>>>> > >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > >>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > >>>>>>> + > >>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 > >>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 > >>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 > >>>>>>> @@ -210,4 +216,11 @@ > >>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 > >>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 > >>>>>>> > >>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { > >>>>>>> + __u32 tsc_sequence; > >>>>>>> + __u32 res1; > >>>>>>> + __u64 tsc_scale; > >>>>>>> + __s64 tsc_offset; > >>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; > >>>>>>> + > >>>>>>> #endif > >>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > >>>>>>> index 21ef1ba..5e4e495a 100644 > >>>>>>> --- a/arch/x86/kvm/x86.c > >>>>>>> +++ b/arch/x86/kvm/x86.c > >>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); > >>>>>>> static u32 msrs_to_save[] = { > >>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > >>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > >>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > >>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, > >>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > >>>>>>> MSR_KVM_PV_EOI_EN, > >>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > >>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) > >>>>>>> switch (msr) { > >>>>>>> case HV_X64_MSR_GUEST_OS_ID: > >>>>>>> case HV_X64_MSR_HYPERCALL: > >>>>>>> + case HV_X64_MSR_REFERENCE_TSC: > >>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: > >>>>>>> r = true; > >>>>>>> break; > >>>>>>> } > >>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) > >>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) > >>>>>>> return 1; > >>>>>>> kvm->arch.hv_hypercall = data; > >>>>>>> + local_irq_disable(); > >>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; > >>>>>>> + local_irq_enable(); > >>>>>> > >>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock > >>>>>> starts counting? > >>>>>> > >>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover > >>>>>> the name is weird, better name would be "hv_ref_start_time". > >>>>> > >>>>> Just add kvmclock_offset when reading the values (otherwise you have a > >>>>> "stale copy" of kvmclock_offset in hv_ref_count). > >>>>> > >>>> > >>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. > >>>> > >>>> I was debugging some weird clockjump issues and I think the problem is that after live migration > >>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the > >>>> hypercall was set up this can lead to series jumps. > >>>> > >>>> So I would suggest to completely drop kvm->arch.hv_ref_count. > >>>> > >>>> And use simply this in get_msr_hyperv_pw(). > >>>> > >>>> case HV_X64_MSR_TIME_REF_COUNT: { > >>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); > >>>> break; > >>>> } > >>>> > >>> > >>> Agreed. It should work as long as we rely on kvmclock_offset. > >>> Vadim. > >> > >> I think we can rely on kvmclock_offset. Had you had a chance to do further testing > >> during the weekend? > > > > Yes, and still testing. > > > > There is still some inconsistency in the value returned by > > QueryPerformanceCounter during migration. > > With my proposal? > Right. Please see below QTC vs. NTP time stamp before and after migration: C:\timedrift\objchk_win7_x86\i386>timedrift.exe quantum = 15 QPC NTP Delta 1311 1307 4 2621 2618 3 3932 3931 1 ....... ....... 38018 38014 4 Unable to wait for NTP reply from the SNTP server, GetLastError returns 7279088 142210 40113 102097 143458 41357 102101 ....... ....... 156500 54615 101885 NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 mSec) Vadim. > Peter > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html