hv_clock_test() is reported to be flaky: https://bugzilla.kernel.org/show_bug.cgi?id=217516 The test tries measuring the divergence between MSR based clock and TSC page over one second and then expects delta to stay within the measured range over another two seconds. This works well for a completely idle system but if tasks get scheduled out, rescheduled to a different CPU,... the test fails. Widening the expected range helps to certain extent but even when the expected delta is "max_delta * 1024" sporadic failures still occur. Rewrite the test completely to make it stable. Check two things: - MSR based and TSC page clocksources never go backwards. - MSR based clocksource read between to TSC page reads stays within the interval. Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> --- x86/hyperv_clock.c | 55 +++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c index d0993bb75ac7..9061da8c6d2c 100644 --- a/x86/hyperv_clock.c +++ b/x86/hyperv_clock.c @@ -64,40 +64,51 @@ uint64_t loops[MAX_CPU]; static void hv_clock_test(void *data) { int i = (long)data; - uint64_t t = rdmsr(HV_X64_MSR_TIME_REF_COUNT); - uint64_t end = t + 3 * TICKS_PER_SEC; - uint64_t msr_sample = t + TICKS_PER_SEC; - int min_delta = 123456, max_delta = -123456; + uint64_t t_msr_prev = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + uint64_t t_page_prev = hv_clock_read(); + uint64_t end = t_page_prev + TICKS_PER_SEC; bool got_drift = false; - bool got_warp = false; + bool got_warp_msr = false; + bool got_warp_page = false; ok[i] = true; do { - uint64_t now = hv_clock_read(); - int delta = rdmsr(HV_X64_MSR_TIME_REF_COUNT) - now; - - min_delta = delta < min_delta ? delta : min_delta; - if (t < msr_sample) { - max_delta = delta > max_delta ? delta: max_delta; - } else if (delta < 0 || delta > max_delta * 3 / 2) { - printf("suspecting drift on CPU %d? delta = %d, acceptable [0, %d)\n", i, - delta, max_delta); + uint64_t t_page_1, t_page_2, t_msr; + + t_page_1 = hv_clock_read(); + barrier(); + t_msr = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + barrier(); + t_page_2 = hv_clock_read(); + + if (!got_drift && (t_msr < t_page_1 || t_msr > t_page_2)) { + printf("drift on CPU %d, MSR value = %ld, acceptable [%ld, %ld]\n", i, + t_msr, t_page_1, t_page_2); ok[i] = false; got_drift = true; - max_delta *= 2; } - if (now < t && !got_warp) { - printf("warp on CPU %d!\n", i); + if (!got_warp_msr && t_msr < t_msr_prev) { + printf("warp on CPU %d, MSR value = %ld prev MSR value = %ld!\n", i, + t_msr, t_msr_prev); ok[i] = false; - got_warp = true; + got_warp_msr = true; break; } - t = now; - } while(t < end); - if (!got_drift) - printf("delta on CPU %d was %d...%d\n", i, min_delta, max_delta); + if (!got_warp_page && t_page_1 < t_page_prev) { + printf("warp on CPU %d, TSC page value = %ld prev TSC page value = %ld!\n", i, + t_page_1, t_page_prev); + ok[i] = false; + got_warp_page = true; + break; + } + + t_page_prev = t_page_1; + t_msr_prev = t_msr; + + } while(t_page_prev < end); + barrier(); } -- 2.44.0