Am 12.01.2014 um 13:08 schrieb Vadim Rozenfeld <vrozenfe@xxxxxxxxxx>: > On Wed, 2014-01-08 at 23:20 +0100, Peter Lieven wrote: >> Am 08.01.2014 21:08, schrieb Vadim Rozenfeld: >>> On Wed, 2014-01-08 at 15:54 +0100, Peter Lieven wrote: >>>> On 08.01.2014 13:12, Vadim Rozenfeld wrote: >>>>> On Wed, 2014-01-08 at 12:48 +0100, Peter Lieven wrote: >>>>>> On 08.01.2014 11:44, Vadim Rozenfeld wrote: >>>>>>> On Wed, 2014-01-08 at 11:15 +0100, Peter Lieven wrote: >>>>>>>> On 08.01.2014 10:40, Vadim Rozenfeld wrote: >>>>>>>>> On Tue, 2014-01-07 at 18:52 +0100, Peter Lieven wrote: >>>>>>>>>> Am 07.01.2014 10:36, schrieb Vadim Rozenfeld: >>>>>>>>>>> On Thu, 2014-01-02 at 17:52 +0100, Peter Lieven wrote: >>>>>>>>>>>> Am 11.12.2013 19:59, schrieb Marcelo Tosatti: >>>>>>>>>>>>> On Wed, Dec 11, 2013 at 04:53:05PM -0200, Marcelo Tosatti wrote: >>>>>>>>>>>>>> On Sun, Dec 08, 2013 at 10:33:38PM +1100, Vadim Rozenfeld wrote: >>>>>>>>>>>>>>> Signed-off: Peter Lieven <pl@xxxxxxx> >>>>>>>>>>>>>>> Signed-off: Gleb Natapov <gleb@xxxxxxxxxx> >>>>>>>>>>>>>>> Signed-off: Vadim Rozenfeld <vrozenfe@xxxxxxxxxx> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> v1 -> v2 >>>>>>>>>>>>>>> 1. mark TSC page dirty as suggested by >>>>>>>>>>>>>>> Eric Northup <digitaleric@xxxxxxxxxx> and Gleb >>>>>>>>>>>>>>> 2. disable local irq when calling get_kernel_ns, >>>>>>>>>>>>>>> as it was done by Peter Lieven <pl@xxxxxxxxx> >>>>>>>>>>>>>>> 3. move check for TSC page enable from second patch >>>>>>>>>>>>>>> to this one. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> --- >>>>>>>>>>>>>>> arch/x86/include/asm/kvm_host.h | 2 ++ >>>>>>>>>>>>>>> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ >>>>>>>>>>>>>>> arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++++++++++++++- >>>>>>>>>>>>>>> include/uapi/linux/kvm.h | 1 + >>>>>>>>>>>>>>> 4 files changed, 54 insertions(+), 1 deletion(-) >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>>>> index ae5d783..2fd0753 100644 >>>>>>>>>>>>>>> --- a/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>>>> +++ b/arch/x86/include/asm/kvm_host.h >>>>>>>>>>>>>>> @@ -605,6 +605,8 @@ struct kvm_arch { >>>>>>>>>>>>>>> /* fields used by HYPER-V emulation */ >>>>>>>>>>>>>>> u64 hv_guest_os_id; >>>>>>>>>>>>>>> u64 hv_hypercall; >>>>>>>>>>>>>>> + u64 hv_ref_count; >>>>>>>>>>>>>>> + u64 hv_tsc_page; >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> #ifdef CONFIG_KVM_MMU_AUDIT >>>>>>>>>>>>>>> int audit_point; >>>>>>>>>>>>>>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>>>> index b8f1c01..462efe7 100644 >>>>>>>>>>>>>>> --- a/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>>>> +++ b/arch/x86/include/uapi/asm/hyperv.h >>>>>>>>>>>>>>> @@ -28,6 +28,9 @@ >>>>>>>>>>>>>>> /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ >>>>>>>>>>>>>>> #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> +/* A partition's reference time stamp counter (TSC) page */ >>>>>>>>>>>>>>> +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 >>>>>>>>>>>>>>> + >>>>>>>>>>>>>>> /* >>>>>>>>>>>>>>> * There is a single feature flag that signifies the presence of the MSR >>>>>>>>>>>>>>> * that can be used to retrieve both the local APIC Timer frequency as >>>>>>>>>>>>>>> @@ -198,6 +201,9 @@ >>>>>>>>>>>>>>> #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ >>>>>>>>>>>>>>> (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 >>>>>>>>>>>>>>> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 >>>>>>>>>>>>>>> + >>>>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C0 0 >>>>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C1 1 >>>>>>>>>>>>>>> #define HV_PROCESSOR_POWER_STATE_C2 2 >>>>>>>>>>>>>>> @@ -210,4 +216,11 @@ >>>>>>>>>>>>>>> #define HV_STATUS_INVALID_ALIGNMENT 4 >>>>>>>>>>>>>>> #define HV_STATUS_INSUFFICIENT_BUFFERS 19 >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> +typedef struct _HV_REFERENCE_TSC_PAGE { >>>>>>>>>>>>>>> + __u32 tsc_sequence; >>>>>>>>>>>>>>> + __u32 res1; >>>>>>>>>>>>>>> + __u64 tsc_scale; >>>>>>>>>>>>>>> + __s64 tsc_offset; >>>>>>>>>>>>>>> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; >>>>>>>>>>>>>>> + >>>>>>>>>>>>>>> #endif >>>>>>>>>>>>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>>>>>>>>>>>>>> index 21ef1ba..5e4e495a 100644 >>>>>>>>>>>>>>> --- a/arch/x86/kvm/x86.c >>>>>>>>>>>>>>> +++ b/arch/x86/kvm/x86.c >>>>>>>>>>>>>>> @@ -840,7 +840,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); >>>>>>>>>>>>>>> static u32 msrs_to_save[] = { >>>>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, >>>>>>>>>>>>>>> MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, >>>>>>>>>>>>>>> - HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, >>>>>>>>>>>>>>> + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, >>>>>>>>>>>>>>> HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, >>>>>>>>>>>>>>> MSR_KVM_PV_EOI_EN, >>>>>>>>>>>>>>> MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, >>>>>>>>>>>>>>> @@ -1826,6 +1826,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) >>>>>>>>>>>>>>> switch (msr) { >>>>>>>>>>>>>>> case HV_X64_MSR_GUEST_OS_ID: >>>>>>>>>>>>>>> case HV_X64_MSR_HYPERCALL: >>>>>>>>>>>>>>> + case HV_X64_MSR_REFERENCE_TSC: >>>>>>>>>>>>>>> + case HV_X64_MSR_TIME_REF_COUNT: >>>>>>>>>>>>>>> r = true; >>>>>>>>>>>>>>> break; >>>>>>>>>>>>>>> } >>>>>>>>>>>>>>> @@ -1865,6 +1867,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) >>>>>>>>>>>>>>> if (__copy_to_user((void __user *)addr, instructions, 4)) >>>>>>>>>>>>>>> return 1; >>>>>>>>>>>>>>> kvm->arch.hv_hypercall = data; >>>>>>>>>>>>>>> + local_irq_disable(); >>>>>>>>>>>>>>> + kvm->arch.hv_ref_count = get_kernel_ns() + kvm->arch.kvmclock_offset; >>>>>>>>>>>>>>> + local_irq_enable(); >>>>>>>>>>>>>> >>>>>>>>>>>>>> Where does the docs say that HV_X64_MSR_HYPERCALL is the where the clock >>>>>>>>>>>>>> starts counting? >>>>>>>>>>>>>> >>>>>>>>>>>>>> No need to store kvmclock_offset in hv_ref_count? (moreover >>>>>>>>>>>>>> the name is weird, better name would be "hv_ref_start_time". >>>>>>>>>>>>> >>>>>>>>>>>>> Just add kvmclock_offset when reading the values (otherwise you have a >>>>>>>>>>>>> "stale copy" of kvmclock_offset in hv_ref_count). >>>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>> After some experiments I think we do no need kvm->arch.hv_ref_count at all. >>>>>>>>>>>> >>>>>>>>>>>> I was debugging some weird clockjump issues and I think the problem is that after live migration >>>>>>>>>>>> kvm->arch.hv_ref_count is initialized to 0. Depending on the uptime of the vServer when the >>>>>>>>>>>> hypercall was set up this can lead to series jumps. >>>>>>>>>>>> >>>>>>>>>>>> So I would suggest to completely drop kvm->arch.hv_ref_count. >>>>>>>>>>>> >>>>>>>>>>>> And use simply this in get_msr_hyperv_pw(). >>>>>>>>>>>> >>>>>>>>>>>> case HV_X64_MSR_TIME_REF_COUNT: { >>>>>>>>>>>> data = div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); >>>>>>>>>>>> break; >>>>>>>>>>>> } >>>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> Agreed. It should work as long as we rely on kvmclock_offset. >>>>>>>>>>> Vadim. >>>>>>>>>> >>>>>>>>>> I think we can rely on kvmclock_offset. Had you had a chance to do further testing >>>>>>>>>> during the weekend? >>>>>>>>> >>>>>>>>> Yes, and still testing. > > It seems to be working fine. However, I would to keep hv_ref_count as is > for the following reason: > " A partition's reference time counter is initialized to zero when the > partition is created." > http://msdn.microsoft.com/en-us/library/windows/hardware/ff542637% > 28v=vs.85%29.aspx > > Technical, hypercall page initialization event (HV_X64_MSR_HYPERCALL MSR > write handler) can be treated as such event. In this case hv_ref_count > is no more than just an offset, which should be subtracted from the > current time ( get_kernel_ns() + kvm->arch.kvmclock_offset ) when > handling HV_X64_MSR_TIME_REF_COUNT MSR read request. my 2 cents: a) if you treat the start of the vm as partition create event get_kernel_ns() + kvm->arch.kvmclock_offset is exactly the vm life time in nanoseconds. b) afaik currently the value of hv_ref_count is not restored after a live migration on the destination. this needs to be implemented for proper function. if we go for dropping this offset completely we can drop the requirement of restoring hv_ref_count as well. Peter > > Best regards, > Vadim. > >>>>>>>>> >>>>>>>>> There is still some inconsistency in the value returned by >>>>>>>>> QueryPerformanceCounter during migration. >>>>>>>> >>>>>>>> With my proposal? >>>>>>>> >>>>>>> >>>>>>> Right. >>>>>>> Please see below QTC vs. NTP time stamp >>>>>>> before and after migration: >>>>>>> >>>>>>> C:\timedrift\objchk_win7_x86\i386>timedrift.exe >>>>>>> quantum = 15 >>>>>>> QPC NTP Delta >>>>>>> 1311 1307 4 >>>>>>> 2621 2618 3 >>>>>>> 3932 3931 1 >>>>>>> ....... >>>>>>> ....... >>>>>>> 38018 38014 4 >>>>>>> Unable to wait for NTP reply from the SNTP server, GetLastError returns >>>>>>> 7279088 >>>>>>> 142210 40113 102097 >>>>>>> 143458 41357 102101 >>>>>>> ....... >>>>>>> ....... >>>>>>> 156500 54615 101885 >>>>>>> >>>>>>> NTP time changed from 38014 to 40113 ((40113 - 38014) / 100 = 21 mSec) >>>>>>> while QPC jumped from 38018 to 142210 ((142210 - 38014) / 100 = 1042 >>>>>>> mSec) >>>>>> >>>>>> How long was the downtime when you migrated the vServer? >>>>> I use Win7-32 for testing. >>>>>> >>>>>> I have done similar tests and have not seen this... >>>>>> Can you show the relevant parts of your patch. Remember I use the REFERENCE_COUTNER not iTSC. >>>>> Yes, I also test it for reference counter. >>>> >>>> Can you use these parameters for testing: >>>> >>>> -cpu qemu64,hv_relaxed,hv_vapic,hv_spinlocks=4096,hv_refcnt >>>> -rtc base=localtime -vga std -usb -usbdevice tablet -no-hpet >>> >>> My settings are: >>> -cpu qemu64, >>> +x2apic,family=0xf,hv_vapic,hv_spinlocks=0xfff,hv_relaxed,hv_tsc -m 1G >>> -smp 4 >> >> do you also see the jump if you leave the hv_tsc out and just use hv_refcnt? >> >> Peter > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html