Fwd: [KVM TSC emulation 9/9] Add software TSC emulation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





-------- Original Message --------
Subject: 	[KVM TSC emulation 9/9] Add software TSC emulation
Date: 	Mon, 20 Jun 2011 16:59:37 -0700
From: 	Zachary Amsden <zamsden@xxxxxxxxxx>
To: Avi Kivity <avi@xxxxxxxxxx>, Marcelo Tosatti <mtosatti@xxxxxxxxxx>, Glauber Costa <glommer@xxxxxxxxxx>, Frank Arnold <farnold@xxxxxxxxxx>, Joerg Roedel <joerg.roedel@xxxxxxx>, Jan Kiszka <jan.kiszka@xxxxxxxxxxx>, linux-kvm@xxxxxxxxxxxxxxx, linux-kernel@xxxxxxxxxxxxxxx, Zachary Amsden <zamsden@xxxxxxxxx>, Avi Kivity <avi@xxxxxxxxxx>, Marcelo Tosatti <mtosatti@xxxxxxxxxx>, Glauber Costa <glommer@xxxxxxxxxx>, Frank Arnold <farnold@xxxxxxxxxx>, Joerg Roedel <joerg.roedel@xxxxxxx>, Jan Kiszka <jan.kiszka@xxxxxxxxxxx>, linux-kvm@xxxxxxxxxxxxxxx CC: Zachary Amsden <zamsden@xxxxxxxxxx>, Zachary Amsden <zamsden@xxxxxxxxx>



When hardware assistance is unavailable to scale the TSC, or it is
not possible to keep in sync, add a software virtualization mode
where the TSC is trapped and thus guaranteed to always have perfect
synchronization.

Currently this behavior defaults to on; how and when the decision to
use trapping is made is likely to be a matter of debate.  For now,
just make it possible.

Signed-off-by: Zachary Amsden<zamsden@xxxxxxxxxx>
---
 arch/x86/kvm/svm.c |   26 +++++++++++++++++++++++++-
 arch/x86/kvm/vmx.c |   28 +++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c |   34 +++++++++++++++++++++++-----------
 arch/x86/kvm/x86.h |    5 +++++
 4 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index dcab00e..fc4583d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -185,6 +185,7 @@ module_param(nested, int, S_IRUGO);

 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
+static void svm_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap);

 static int nested_svm_exit_handled(struct vcpu_svm *svm);
 static int nested_svm_intercept(struct vcpu_svm *svm);
@@ -912,13 +913,18 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 	u64 khz;

 	/* Guest TSC same frequency as host TSC? */
-	if (!scale) {
+	if (!scale&&  !check_tsc_unstable()) {
 		svm->tsc_ratio = TSC_RATIO_DEFAULT;
 		return;
 	}

 	/* TSC scaling supported? */
 	if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+		if (kvm_software_tsc) {
+			pr_debug("kvm: using TSC trapping\n");
+			svm_set_tsc_trapping(vcpu, true);
+			return;
+		}
 		if (user_tsc_khz>  tsc_khz) {
 			vcpu->arch.tsc_catchup = 1;
 			vcpu->arch.tsc_always_catchup = 1;
@@ -1184,6 +1190,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	svm->vmcb_pa = page_to_pfn(page)<<  PAGE_SHIFT;
 	svm->asid_generation = 0;
 	init_vmcb(svm);
+	kvm_set_tsc_khz(&svm->vcpu, kvm_max_tsc_khz);
 	kvm_write_tsc(&svm->vcpu, 0);

 	err = fx_init(&svm->vcpu);
@@ -1303,6 +1310,15 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
 	clr_intercept(svm, INTERCEPT_VINTR);
 }

+static void svm_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	if (trap)
+		set_intercept(svm, INTERCEPT_RDTSC);
+	else
+		clr_intercept(svm, INTERCEPT_RDTSC);
+}
+
 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
 {
 	struct vmcb_save_area *save =&to_svm(vcpu)->vmcb->save;
@@ -2732,6 +2748,13 @@ static int task_switch_interception(struct vcpu_svm *svm)
 	return 1;
 }

+static int rdtsc_interception(struct vcpu_svm *svm)
+{
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
+	kvm_read_tsc(&svm->vcpu);
+	return 1;
+}
+
 static int cpuid_interception(struct vcpu_svm *svm)
 {
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
@@ -3178,6 +3201,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_SMI]				= nop_on_interception,
 	[SVM_EXIT_INIT]				= nop_on_interception,
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
+	[SVM_EXIT_RDTSC]			= rdtsc_interception,
 	[SVM_EXIT_CPUID]			= cpuid_interception,
 	[SVM_EXIT_IRET]                         = iret_interception,
 	[SVM_EXIT_INVD]                         = emulate_on_interception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 780fe12..65066b4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -606,6 +606,7 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
+static void vmx_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap);

 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1756,9 +1757,14 @@ static u64 guest_read_tsc(void)
  */
 static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 {
-	if (!scale)
+	if (!scale&&  !check_tsc_unstable())
 		return;

+	if (kvm_software_tsc) {
+		pr_debug("kvm: using TSC trapping\n");
+		vmx_set_tsc_trapping(vcpu, true);
+		return;
+	}
 	if (user_tsc_khz>  tsc_khz) {
 		vcpu->arch.tsc_catchup = 1;
 		vcpu->arch.tsc_always_catchup = 1;
@@ -3695,6 +3701,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	set_cr4_guest_host_mask(vmx);

+	kvm_set_tsc_khz(&vmx->vcpu, kvm_max_tsc_khz);
 	kvm_write_tsc(&vmx->vcpu, 0);

 	return 0;
@@ -3997,6 +4004,18 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 	return 0;
 }

+static void vmx_set_tsc_trapping(struct kvm_vcpu *vcpu, bool trap)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	if (trap)
+		cpu_based_vm_exec_control |= CPU_BASED_RDTSC_EXITING;
+	else
+		cpu_based_vm_exec_control&= ~CPU_BASED_RDTSC_EXITING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 				  int vec, u32 err_code)
 {
@@ -4497,6 +4516,12 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
 	return 1;
 }

+static int handle_rdtsc(struct kvm_vcpu *vcpu)
+{
+	kvm_read_tsc(vcpu);
+	return 1;
+}
+
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
 	skip_emulated_instruction(vcpu);
@@ -5421,6 +5446,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_HLT]                     = handle_halt,
 	[EXIT_REASON_INVD]		      = handle_invd,
 	[EXIT_REASON_INVLPG]		      = handle_invlpg,
+	[EXIT_REASON_RDTSC]		      = handle_rdtsc,
 	[EXIT_REASON_VMCALL]                  = handle_vmcall,
 	[EXIT_REASON_VMCLEAR]	              = handle_vmclear,
 	[EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 09e67fb..1a07796 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -99,6 +99,10 @@ EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
 static u32 tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);

+int kvm_software_tsc = 1;
+module_param_named(software_tsc_emulation, kvm_software_tsc, bool, 0644);
+EXPORT_SYMBOL_GPL(kvm_software_tsc);
+
 #define KVM_NR_SHARED_MSRS 16

 struct kvm_shared_msrs_global {
@@ -993,7 +997,8 @@ static inline u64 get_kernel_ns(void)
 }

 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
-unsigned long max_tsc_khz;
+unsigned long kvm_max_tsc_khz;
+EXPORT_SYMBOL_GPL(kvm_max_tsc_khz);

 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 {
@@ -1001,7 +1006,7 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 				   vcpu->arch.virtual_tsc_shift);
 }

-static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
+void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 {
 	u32 thresh_lo, thresh_hi;
 	int use_scaling = 0;
@@ -1026,6 +1031,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 	}
 	kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);	
 }
+EXPORT_SYMBOL_GPL(kvm_set_tsc_khz);

 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
 {
@@ -1117,6 +1123,18 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)

 EXPORT_SYMBOL_GPL(kvm_write_tsc);

+void kvm_read_tsc(struct kvm_vcpu *vcpu)
+{
+	u64 tsc;
+	s64 kernel_ns = get_kernel_ns();
+
+	tsc = compute_guest_tsc(vcpu, kernel_ns);
+	kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)tsc);
+	kvm_register_write(vcpu, VCPU_REGS_RDX, tsc>>  32);
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_read_tsc);
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
 	unsigned long flags;
@@ -4931,7 +4949,7 @@ static void kvm_timer_init(void)
 {
 	int cpu;

-	max_tsc_khz = tsc_khz;
+	kvm_max_tsc_khz = tsc_khz;
 	register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
 	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 #ifdef CONFIG_CPU_FREQ
@@ -4940,13 +4958,13 @@ static void kvm_timer_init(void)
 		cpu = get_cpu();
 		cpufreq_get_policy(&policy, cpu);
 		if (policy.cpuinfo.max_freq)
-			max_tsc_khz = policy.cpuinfo.max_freq;
+			kvm_max_tsc_khz = policy.cpuinfo.max_freq;
 		put_cpu();
 #endif
 		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
 					  CPUFREQ_TRANSITION_NOTIFIER);
 	}
-	pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
+	pr_debug("kvm: max_tsc_khz = %ld\n", kvm_max_tsc_khz);
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
 }
@@ -6194,10 +6212,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 						unsigned int id)
 {
-	if (check_tsc_unstable()&&  atomic_read(&kvm->online_vcpus) != 0)
-		printk_once(KERN_WARNING
-		"kvm: SMP vm created on host with unstable TSC; "
-		"guest TSC will not be reliable\n");
 	return kvm_x86_ops->vcpu_create(kvm, id);
 }

@@ -6385,8 +6399,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	}
 	vcpu->arch.pio_data = page_address(page);

-	kvm_set_tsc_khz(vcpu, max_tsc_khz);
-
 	r = kvm_mmu_create(vcpu);
 	if (r<  0)
 		goto fail_free_pio_data;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 256da82..94780df 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -80,6 +80,10 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);

 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
+void kvm_read_tsc(struct kvm_vcpu *vcpu);
+void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz);
+extern int kvm_software_tsc;
+extern unsigned long kvm_max_tsc_khz;

 int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
@@ -89,4 +93,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);

+
 #endif
--
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux