[PATCH RFC 3/7] KVM: timer: synchronize tsc-deadline timestamp for guest

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Ben Luo <bn0418@xxxxxxxxx>

In general, KVM guest programs tsc-deadline timestamp to
MSR_IA32_TSC_DEADLINE MSR. This will cause a VM-exit, and
then KVM handles this timer for guest.

The tsc-deadline timestamp is mostly recorded in share page
with less VM-exit. We Introduce a periodically working kthread
to scan share page and synchronize timer setting for guest
on a dedicated CPU.

Signed-off-by: Yang Zhang <yang.zhang.wz@xxxxxxxxx>
Signed-off-by: Quan Xu <quan.xu0@xxxxxxxxx>
Signed-off-by: Ben Luo <bn0418@xxxxxxxxx>
---
 arch/x86/kvm/lapic.c |  138 ++++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.h |    5 ++
 2 files changed, 143 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 55c9ba3..20a23bb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -36,6 +36,10 @@
 #include <asm/delay.h>
 #include <linux/atomic.h>
 #include <linux/jump_label.h>
+#include <linux/ktime.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mmu_context.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
 #include "trace.h"
@@ -70,6 +74,12 @@
 #define APIC_BROADCAST			0xFF
 #define X2APIC_BROADCAST		0xFFFFFFFFul
 
+static struct hrtimer pv_sync_timer;
+static long pv_timer_period_ns = PVTIMER_PERIOD_NS;
+static struct task_struct *pv_timer_polling_worker;
+
+module_param(pv_timer_period_ns, long, 0644);
+
 static inline int apic_test_vector(int vec, void *bitmap)
 {
 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -2542,8 +2552,130 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 	}
 }
 
+static enum hrtimer_restart pv_sync_timer_callback(struct hrtimer *timer)
+{
+	hrtimer_forward_now(timer, ns_to_ktime(pv_timer_period_ns));
+	wake_up_process(pv_timer_polling_worker);
+
+	return HRTIMER_RESTART;
+}
+
+void kvm_apic_sync_pv_timer(void *data)
+{
+	struct kvm_vcpu *vcpu = data;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	unsigned long flags, this_tsc_khz = vcpu->arch.virtual_tsc_khz;
+	u64 guest_tsc, expire_tsc;
+	long rem_tsc;
+
+	if (!lapic_in_kernel(vcpu) || !pv_timer_enabled(vcpu))
+		return;
+
+	local_irq_save(flags);
+	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+	rem_tsc = ktime_to_ns(hrtimer_get_remaining(&pv_sync_timer))
+			* this_tsc_khz;
+	if (rem_tsc <= 0)
+		rem_tsc += pv_timer_period_ns * this_tsc_khz;
+	do_div(rem_tsc, 1000000L);
+
+	/*
+	 * make sure guest_tsc and rem_tsc are assigned before to update
+	 * next_sync_tsc.
+	 */
+	smp_wmb();
+	kvm_xchg_guest_cached(vcpu->kvm, &vcpu->arch.pv_timer.data,
+		offsetof(struct pvtimer_vcpu_event_info, next_sync_tsc),
+		guest_tsc + rem_tsc, 8);
+
+	/* make sure next_sync_tsc is visible */
+	smp_wmb();
+
+	expire_tsc = kvm_xchg_guest_cached(vcpu->kvm, &vcpu->arch.pv_timer.data,
+			offsetof(struct pvtimer_vcpu_event_info, expire_tsc),
+			0UL, 8);
+
+	/* make sure expire_tsc is visible */
+	smp_wmb();
+
+	if (expire_tsc) {
+		if (expire_tsc > guest_tsc)
+			/*
+			 * As we bind this thread to a dedicated CPU through
+			 * IPI, the timer is registered on that dedicated
+			 * CPU here.
+			 */
+			kvm_set_lapic_tscdeadline_msr(apic->vcpu, expire_tsc);
+		else
+			/* deliver immediately if expired */
+			kvm_apic_local_deliver(apic, APIC_LVTT);
+	}
+	local_irq_restore(flags);
+}
+
+static int pv_timer_polling(void *arg)
+{
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int i;
+	mm_segment_t oldfs = get_fs();
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (kthread_should_stop()) {
+			__set_current_state(TASK_RUNNING);
+			break;
+		}
+
+		spin_lock(&kvm_lock);
+		__set_current_state(TASK_RUNNING);
+		list_for_each_entry(kvm, &vm_list, vm_list) {
+			set_fs(USER_DS);
+			use_mm(kvm->mm);
+			kvm_for_each_vcpu(i, vcpu, kvm) {
+				kvm_apic_sync_pv_timer(vcpu);
+			}
+			unuse_mm(kvm->mm);
+			set_fs(oldfs);
+		}
+
+		spin_unlock(&kvm_lock);
+
+		schedule();
+	}
+
+	return 0;
+}
+
+static void kvm_pv_timer_init(void)
+{
+	ktime_t ktime = ktime_set(0, pv_timer_period_ns);
+
+	hrtimer_init(&pv_sync_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+	pv_sync_timer.function = &pv_sync_timer_callback;
+
+	/* kthread for pv_timer sync buffer */
+	pv_timer_polling_worker = kthread_create(pv_timer_polling, NULL,
+						"pv_timer_polling_worker/%d",
+						PVTIMER_SYNC_CPU);
+	if (IS_ERR(pv_timer_polling_worker)) {
+		pr_warn_once("kvm: failed to create thread for pv_timer\n");
+		pv_timer_polling_worker = NULL;
+		hrtimer_cancel(&pv_sync_timer);
+
+		return;
+	}
+
+	kthread_bind(pv_timer_polling_worker, PVTIMER_SYNC_CPU);
+	wake_up_process(pv_timer_polling_worker);
+	hrtimer_start(&pv_sync_timer, ktime, HRTIMER_MODE_REL);
+}
+
 void kvm_lapic_init(void)
 {
+	kvm_pv_timer_init();
+
 	/* do not patch jump label more than once per second */
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
 	jump_label_rate_limit(&apic_sw_disabled, HZ);
@@ -2551,6 +2683,12 @@ void kvm_lapic_init(void)
 
 void kvm_lapic_exit(void)
 {
+	if (pv_timer_polling_worker) {
+		hrtimer_cancel(&pv_sync_timer);
+		kthread_stop(pv_timer_polling_worker);
+		pv_timer_polling_worker = NULL;
+	}
+
 	static_key_deferred_flush(&apic_hw_disabled);
 	static_key_deferred_flush(&apic_sw_disabled);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 539a738..4588d59 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -16,6 +16,9 @@
 #define APIC_BUS_CYCLE_NS       1
 #define APIC_BUS_FREQUENCY      (1000000000ULL / APIC_BUS_CYCLE_NS)
 
+#define PVTIMER_SYNC_CPU	(NR_CPUS - 1) /* dedicated CPU */
+#define PVTIMER_PERIOD_NS	250000L /* pvtimer default period */
+
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
@@ -213,6 +216,8 @@ static inline bool pv_timer_enabled(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pv_timer.msr_val & KVM_MSR_ENABLED;
 }
 
+void kvm_apic_sync_pv_timer(void *data);
+
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
-- 
1.7.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux