The paravirtualized clock used in KVM and Xen uses a version field to allow the guest to see when the shared data structure is inconsistent. The code reads the version field twice (before and after the data structure is copied) and checks whether they haven't changed and that the lowest bit is not set. As the second access is not synchronized, the compiler could generate code that accesses version more than two times and you end up with inconsistent data. An example using pvclock_get_time_values: host starts updating data, sets src->version to 1 guest reads src->version (1) and stores it into dst->version. guest copies inconsistent data guest reads src->version (1) and computes xor with dst->version. host finishes updating data and sets src->version to 2 guest reads src->version (2) and checks whether lower bit is not set. while loop exits with inconsistent data! AFAICS the compiler is allowed to optimize the given code this way. Signed-off-by: Julian Stecklina <jsteckli@xxxxxxxxxxxxxxxxxxxx> --- arch/x86/kernel/pvclock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 42eb330..f62b41c 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -55,6 +55,8 @@ static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, struct pvclock_vcpu_time_info *src) { + u32 nversion; + do { dst->version = src->version; rmb(); /* fetch version before data */ @@ -64,7 +66,8 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, dst->tsc_shift = src->tsc_shift; dst->flags = src->flags; rmb(); /* test version after fetching data */ - } while ((src->version & 1) || (dst->version != src->version)); + nversion = ACCESS_ONCE(src->version); + } while ((nversion & 1) || (dst->version != nversion)); return dst->version; } @@ -135,7 +138,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, struct pvclock_vcpu_time_info *vcpu_time, struct timespec *ts) { - u32 version; + u32 version, nversion; u64 delta; struct timespec now; @@ -146,7 +149,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, now.tv_sec = wall_clock->sec; now.tv_nsec = wall_clock->nsec; rmb(); /* fetch time before checking version */ - } while ((wall_clock->version & 1) || (version != wall_clock->version)); + nversion = ACCESS_ONCE(wall_clock->version); + } while ((nversion & 1) || (version != nversion)); delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; -- 1.8.4.2 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html