[PATCH kvm-unit-tests 2/2] x86:kvmclock: sync with kernel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Bring kvmclock code in sync with kernel (including the not yet merged
fixes for missing barriers from Minfei Huang <mnghuan@xxxxxxxxx> and the
read_begin/read_retry rework from Paolo Bonzini <pbonzini@xxxxxxxxxx>).

This gets rid of excessive memory barriers and speeds up kvmclock reads
by up to 50%.  (There's another 2x speedup possible if lfence (aka
rmb()) is used instead of mfence (aka mb()) in rdtsc_ordered(); however
this is only supposed to be allowed on Intel CPUs and we don't have the
infrastructure ala kernel alternatives yet).

Signed-off-by: Roman Kagan <rkagan@xxxxxxxxxxxxx>
---
 x86/kvmclock.c | 68 ++++++++++++++++++++++++++++++++--------------------------
 x86/kvmclock.h | 16 --------------
 2 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/x86/kvmclock.c b/x86/kvmclock.c
index 208d43c..bad0784 100644
--- a/x86/kvmclock.c
+++ b/x86/kvmclock.c
@@ -142,51 +142,59 @@ void set_normalized_timespec(struct timespec *ts, long sec, s64 nsec)
 	ts->tv_nsec = nsec;
 }
 
-static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+static inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
 {
-	u64 delta = rdtsc() - shadow->tsc_timestamp;
-	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+	unsigned version = src->version & ~1;
+	/* Make sure that the version is read before the data. */
+	smp_rmb();
+	return version;
 }
 
-/*
- * Reads a consistent set of time-base values from hypervisor,
- * into a shadow data area.
- */
-static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
-					struct pvclock_vcpu_time_info *src)
+static inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+			unsigned version)
 {
-	do {
-		dst->version = src->version;
-		rmb();		/* fetch version before data */
-		dst->tsc_timestamp     = src->tsc_timestamp;
-		dst->system_timestamp  = src->system_time;
-		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
-		dst->tsc_shift         = src->tsc_shift;
-		dst->flags             = src->flags;
-		rmb();		/* test version after fetching data */
-	} while ((src->version & 1) || (dst->version != src->version));
-
-	return dst->version;
+	/* Make sure that the version is re-read after the data. */
+	smp_rmb();
+	return version != src->version;
+}
+
+static inline u64 rdtsc_ordered()
+{
+	/*
+	 * FIXME: on Intel CPUs rmb() aka lfence is sufficient which brings up
+	 * to 2x speedup
+	 */
+	mb();
+	return rdtsc();
+}
+
+static inline
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
+{
+	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+	cycle_t offset = scale_delta(delta, src->tsc_to_system_mul,
+					     src->tsc_shift);
+	return src->system_time + offset;
 }
 
 cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 {
-	struct pvclock_shadow_time shadow;
 	unsigned version;
-	cycle_t ret, offset;
+	cycle_t ret;
 	u64 last;
+	u8 flags;
 
 	do {
-		version = pvclock_get_time_values(&shadow, src);
-		mb();
-		offset = pvclock_get_nsec_offset(&shadow);
-		ret = shadow.system_timestamp + offset;
-		mb();
-	} while (version != src->version);
+		version = pvclock_read_begin(src);
+		ret = __pvclock_read_cycles(src);
+		flags = src->flags;
+	} while (pvclock_read_retry(src, version));
 
 	if ((valid_flags & PVCLOCK_RAW_CYCLE_BIT) ||
             ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
-             (shadow.flags & PVCLOCK_TSC_STABLE_BIT)))
+             (flags & PVCLOCK_TSC_STABLE_BIT)))
                 return ret;
 
 	/*
diff --git a/x86/kvmclock.h b/x86/kvmclock.h
index ab7dc0c..dff6802 100644
--- a/x86/kvmclock.h
+++ b/x86/kvmclock.h
@@ -30,22 +30,6 @@ struct pvclock_wall_clock {
 	u32   nsec;
 } __attribute__((__packed__));
 
-/*
- * These are perodically updated
- *    xen: magic shared_info page
- *    kvm: gpa registered via msr
- * and then copied here.
- */
-struct pvclock_shadow_time {
-	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
-	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
-	u32 tsc_to_nsec_mul;
-	int tsc_shift;
-	u32 version;
-	u8  flags;
-};
-
-
 struct timespec {
         long   tv_sec;
         long   tv_nsec;
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux