[PATCH v3 2/6] KVM: x86: switch to masterclock update using timekeeper functionality

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



It is reasonable to switch KVM to using a more simple, effective
and conceptually correct scheme of dealing with the data needed
for kvm masterclock values calculation.

With the current scheme the kvm needs to have an up-to-date copy of
some timekeeper data to provide a guest using kvmclock with necessary
information.

This is not:
    - simple
        KVM has to have a lot of code to do that, instead KVM could use
        a timekeeper function to get all the data it needs
    - effective
        the copy of the data used for time data calculation is updated
        every time it changed although this is not necessary since
	the updated piece of time data is needed in certain moments only
        (e.g masterclock updating), instead KVM can request this data
        directly form the timekeeper at the moments when it's really needed
    - conceptually correct
        to do the work (calculate the time data) which the other part
	of the system (timekeeper) has been designed and is able to do
        is not the proper way, instead deligate the work to the proper part

This patch switches KVM to using the improved timekeeper function for
the kvm masterclock time data.

Removing the leftovers of the old scheme is the matter of the next patches.

Signed-off-by: Denis Plotnikov <dplotnikov@xxxxxxxxxxxxx>
---
 arch/x86/kernel/kvmclock.c  | 14 ++++++++++++--
 arch/x86/kernel/tsc.c       |  6 ++++++
 arch/x86/kvm/x86.c          | 26 ++++++++++++++++++--------
 include/linux/clocksource.h |  3 +++
 include/linux/timekeeping.h |  2 ++
 kernel/time/timekeeping.c   | 21 +++++++++++++++++++--
 6 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f5cfc5d..52156d9 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -82,7 +82,7 @@ static int kvm_set_wallclock(const struct timespec *now)
 	return -1;
 }
 
-static u64 kvm_clock_read(void)
+static inline u64 __kvm_clock_read(u64 *cycles)
 {
 	struct pvclock_vcpu_time_info *src;
 	u64 ret;
@@ -91,10 +91,14 @@ static u64 kvm_clock_read(void)
 	preempt_disable_notrace();
 	cpu = smp_processor_id();
 	src = &hv_clock[cpu].pvti;
-	ret = pvclock_clocksource_read(src, NULL);
+	ret = pvclock_clocksource_read(src, cycles);
 	preempt_enable_notrace();
 	return ret;
 }
+static u64 kvm_clock_read(void)
+{
+	return __kvm_clock_read(NULL);
+}
 
 static u64 kvm_clock_get_cycles(struct clocksource *cs)
 {
@@ -177,9 +181,15 @@ bool kvm_check_and_clear_guest_paused(void)
 	return ret;
 }
 
+static void kvm_clock_read_with_cycles(u64 *cycles, u64 *cycles_stamp)
+{
+	*cycles = __kvm_clock_read(cycles_stamp);
+}
+
 struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_get_cycles,
+	.read_with_cycles = kvm_clock_read_with_cycles,
 	.rating = 400,
 	.mask = CLOCKSOURCE_MASK(64),
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 796d96b..5d655af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1015,6 +1015,11 @@ static u64 read_tsc(struct clocksource *cs)
 	return (u64)rdtsc_ordered();
 }
 
+static bool is_tsc_stable(void)
+{
+	return !tsc_unstable;
+}
+
 static void tsc_cs_mark_unstable(struct clocksource *cs)
 {
 	if (tsc_unstable)
@@ -1043,6 +1048,7 @@ static struct clocksource clocksource_tsc = {
 	.name                   = "tsc",
 	.rating                 = 300,
 	.read                   = read_tsc,
+	.is_stable		= is_tsc_stable,
 	.mask                   = CLOCKSOURCE_MASK(64),
 	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
 				  CLOCK_SOURCE_MUST_VERIFY,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c97c82..496e731 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1643,22 +1643,32 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
 /* returns true if host is using tsc clocksource */
 static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
 {
-	/* checked again under seqlock below */
-	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
-		return false;
+	struct system_time_snapshot systime_snapshot;
+
+	ktime_get_snapshot(&systime_snapshot);
+
+	if (systime_snapshot.cs_stable) {
+		*kernel_ns = ktime_to_ns(systime_snapshot.boot);
+		*cycle_now = systime_snapshot.cycles;
+	}
 
-	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+	return systime_snapshot.cs_stable;
 }
 
 /* returns true if host is using tsc clocksource */
 static bool kvm_get_walltime_and_clockread(struct timespec *ts,
 					   u64 *cycle_now)
 {
-	/* checked again under seqlock below */
-	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
-		return false;
+	struct system_time_snapshot systime_snapshot;
+
+	ktime_get_snapshot(&systime_snapshot);
+
+	if (systime_snapshot.cs_stable) {
+		*ts = ktime_to_timespec(systime_snapshot.real);
+		*cycle_now = systime_snapshot.cycles;
+	}
 
-	return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+	return systime_snapshot.cs_stable;
 }
 #endif
 
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index a78cb18..f849b91 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -49,6 +49,7 @@ struct module;
  *				The ideal clocksource. A must-use where
  *				available.
  * @read:		returns a cycle value, passes clocksource as argument
+ * @read_with_cycles
  * @enable:		optional function to enable the clocksource
  * @disable:		optional function to disable the clocksource
  * @mask:		bitmask for two's complement
@@ -78,6 +79,8 @@ struct module;
  */
 struct clocksource {
 	u64 (*read)(struct clocksource *cs);
+	void (*read_with_cycles)(u64 *cycles, u64 *cycles_stamp);
+	bool (*is_stable)(void);
 	u64 mask;
 	u32 mult;
 	u32 shift;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index ddc229f..21917fa 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -290,8 +290,10 @@ struct system_time_snapshot {
 	u64		cycles;
 	ktime_t		real;
 	ktime_t		raw;
+	ktime_t		boot;
 	unsigned int	clock_was_set_seq;
 	u8		cs_was_changed_seq;
+	bool		cs_stable;
 };
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cedafa0..a2bfc12 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -953,27 +953,44 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 	unsigned long seq;
 	ktime_t base_raw;
 	ktime_t base_real;
+	ktime_t base_boot;
 	u64 nsec_raw;
 	u64 nsec_real;
 	u64 now;
+	struct clocksource *clock;
 
 	WARN_ON_ONCE(timekeeping_suspended);
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		now = tk_clock_read(&tk->tkr_mono);
+		clock = tk->tkr_mono.clock;
+
+		if (clock->is_stable)
+			systime_snapshot->cs_stable = clock->is_stable();
+		else
+			systime_snapshot->cs_stable = false;
+
+		if (clock->read_with_cycles) {
+			clock->read_with_cycles(
+				&now, &systime_snapshot->cycles);
+		} else {
+			now = tk_clock_read(&tk->tkr_mono);
+			systime_snapshot->cycles = now;
+		}
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
 				      tk_core.timekeeper.offs_real);
 		base_raw = tk->tkr_raw.base;
+		base_boot = ktime_add(tk->tkr_mono.base,
+				      tk_core.timekeeper.offs_boot);
 		nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
 		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	systime_snapshot->cycles = now;
 	systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
 	systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
+	systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real);
 }
 EXPORT_SYMBOL_GPL(ktime_get_snapshot);
 
-- 
2.7.4




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux