>From 7b12021e1d1b79797b49e41cc0a7be05a6180d9a Mon Sep 17 00:00:00 2001 From: Liu, Jinsong <jinsong.liu@xxxxxxxxx> Date: Tue, 13 Sep 2011 21:52:54 +0800 Subject: [PATCH] KVM: emulate lapic tsc deadline timer for guest This patch emulate lapic tsc deadline timer for guest: Enumerate tsc deadline timer capability by CPUID; Enable tsc deadline timer mode by lapic MMIO; Start tsc deadline timer by WRMSR; Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> --- arch/x86/include/asm/apicdef.h | 2 + arch/x86/include/asm/cpufeature.h | 3 + arch/x86/include/asm/kvm_host.h | 2 + arch/x86/include/asm/msr-index.h | 2 + arch/x86/kvm/kvm_timer.h | 2 + arch/x86/kvm/lapic.c | 122 ++++++++++++++++++++++++++++++------- arch/x86/kvm/lapic.h | 3 + arch/x86/kvm/x86.c | 20 ++++++- 8 files changed, 132 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 34595d5..3925d80 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -100,7 +100,9 @@ #define APIC_TIMER_BASE_CLKIN 0x0 #define APIC_TIMER_BASE_TMBASE 0x1 #define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) #define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) #define APIC_LVT_MASKED (1 << 16) #define APIC_LVT_LEVEL_TRIGGER (1 << 15) #define APIC_LVT_REMOTE_IRR (1 << 14) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4258aac..8a26e48 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -120,6 +120,7 @@ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ #define X86_FEATURE_AES (4*32+25) /* AES instructions */ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ @@ -284,6 +285,8 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) +#define cpu_has_tsc_deadline_timer \ + boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 307e3cf..2ce6529 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -671,6 +671,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; +extern u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); + /* control of guest tsc rate supported? */ extern bool kvm_has_tsc_control; /* minimum supported tsc_khz for guests */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d52609a..a6962d9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -229,6 +229,8 @@ #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define MSR_IA32_TSCDEADLINE 0x000006e0 + #define MSR_IA32_UCODE_WRITE 0x00000079 #define MSR_IA32_UCODE_REV 0x0000008b diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 64bc6ea..497dbaa 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h @@ -2,6 +2,8 @@ struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ + u32 timer_mode_mask; + u64 tscdeadline; atomic_t pending; /* accumulated triggered timers */ bool reinject; struct kvm_timer_ops *t_ops; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2b2255b..925d4b9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -135,9 +135,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; } +static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) +{ + return ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); +} + static inline int apic_lvtt_period(struct kvm_lapic *apic) { - return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; + return ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); +} + +static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) +{ + return ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) == + APIC_LVT_TIMER_TSCDEADLINE); } static inline int apic_lvt_nmi_mode(u32 lvt_val) @@ -166,7 +180,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) } static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { - LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ + LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ LVT_MASK | APIC_MODE_MASK, /* LVTPC */ LINT_MASK, LINT_MASK, /* LVT0-1 */ @@ -570,6 +584,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) break; case APIC_TMCCT: /* Timer CCR */ + if (apic_lvtt_tscdeadline(apic)) + return 0; + val = apic_get_tmcct(apic); break; @@ -664,29 +681,32 @@ static void update_divide_count(struct kvm_lapic *apic) static void start_apic_timer(struct kvm_lapic *apic) { - ktime_t now = apic->lapic_timer.timer.base->get_time(); - - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * - APIC_BUS_CYCLE_NS * apic->divide_count; + ktime_t now; atomic_set(&apic->lapic_timer.pending, 0); - if (!apic->lapic_timer.period) - return; - /* - * Do not allow the guest to program periodic timers with small - * interval, since the hrtimers are not throttled by the host - * scheduler. - */ - if (apic_lvtt_period(apic)) { - if (apic->lapic_timer.period < NSEC_PER_MSEC/2) - apic->lapic_timer.period = NSEC_PER_MSEC/2; - } + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { + /* lapic timer in oneshot or peroidic mode */ + now = apic->lapic_timer.timer.base->get_time(); + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) + * APIC_BUS_CYCLE_NS * apic->divide_count; + + if (!apic->lapic_timer.period) + return; + /* + * Do not allow the guest to program periodic timers with small + * interval, since the hrtimers are not throttled by the host + * scheduler. + */ + if (apic_lvtt_period(apic)) { + if (apic->lapic_timer.period < NSEC_PER_MSEC/2) + apic->lapic_timer.period = NSEC_PER_MSEC/2; + } - hrtimer_start(&apic->lapic_timer.timer, - ktime_add_ns(now, apic->lapic_timer.period), - HRTIMER_MODE_ABS); + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, apic->lapic_timer.period), + HRTIMER_MODE_ABS); - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" PRIx64 ", " "timer initial count 0x%x, period %lldns, " "expire @ 0x%016" PRIx64 ".\n", __func__, @@ -695,6 +715,28 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->lapic_timer.period, ktime_to_ns(ktime_add_ns(now, apic->lapic_timer.period))); + } else if (apic_lvtt_tscdeadline(apic)) { + /* lapic timer in tsc deadline mode */ + u64 guest_tsc, guest_tsc_delta, ns = 0; + struct kvm_vcpu *vcpu = apic->vcpu; + unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); + unsigned long flags; + + if (unlikely(!apic->lapic_timer.tscdeadline || !this_tsc_khz)) + return; + + local_irq_save(flags); + + now = apic->lapic_timer.timer.base->get_time(); + kvm_get_msr(vcpu, MSR_IA32_TSC, &guest_tsc); + guest_tsc_delta = apic->lapic_timer.tscdeadline - guest_tsc; + if (likely(guest_tsc_delta > 0)) + ns = guest_tsc_delta * 1000000UL / this_tsc_khz; + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, ns), HRTIMER_MODE_ABS); + + local_irq_restore(flags); + } } static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) @@ -782,7 +824,6 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVT0: apic_manage_nmi_watchdog(apic, val); - case APIC_LVTT: case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: @@ -796,7 +837,22 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; + case APIC_LVTT: + if ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) != + (val & apic->lapic_timer.timer_mode_mask)) + hrtimer_cancel(&apic->lapic_timer.timer); + + if (!apic_sw_enabled(apic)) + val |= APIC_LVT_MASKED; + val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); + apic_set_reg(apic, APIC_LVTT, val); + break; + case APIC_TMICT: + if (apic_lvtt_tscdeadline(apic)) + break; + hrtimer_cancel(&apic->lapic_timer.timer); apic_set_reg(apic, APIC_TMICT, val); start_apic_timer(apic); @@ -883,6 +939,28 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) *---------------------------------------------------------------------- */ +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + return 0; + + return apic->lapic_timer.tscdeadline; +} + +void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + return; + + hrtimer_cancel(&apic->lapic_timer.timer); + apic->lapic_timer.tscdeadline = data; + start_apic_timer(apic); +} + void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { struct kvm_lapic *apic = vcpu->arch.apic; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 52c9e6b..10c6ee6 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -41,6 +41,9 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu); bool kvm_apic_present(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); +void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); + void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6cb353c..a73c059 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -610,6 +610,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu) if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) best->ecx |= bit(X86_FEATURE_OSXSAVE); } + + /* + * When cpu has tsc deadline timer capacibility, use bit 17/18 + * as timer mode mask. Otherwise only use bit 17. + */ + if (cpu_has_tsc_deadline_timer && best->function == 0x1) { + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3 << 17); + } else + vcpu->arch.apic->lapic_timer.timer_mode_mask = (1 << 17); } int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -819,7 +829,7 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA + MSR_IA32_TSC, MSR_IA32_TSCDEADLINE, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; static unsigned num_msrs_to_save; @@ -1000,7 +1010,7 @@ static inline int kvm_tsc_changes_freq(void) return ret; } -static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) +u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) { if (vcpu->arch.virtual_tsc_khz) return vcpu->arch.virtual_tsc_khz; @@ -1564,6 +1574,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: return kvm_x2apic_msr_write(vcpu, msr, data); + case MSR_IA32_TSCDEADLINE: + kvm_set_lapic_tscdeadline_msr(vcpu, data); + break; case MSR_IA32_MISC_ENABLE: vcpu->arch.ia32_misc_enable_msr = data; break; @@ -1891,6 +1904,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: return kvm_x2apic_msr_read(vcpu, msr, pdata); break; + case MSR_IA32_TSCDEADLINE: + data = kvm_get_lapic_tscdeadline_msr(vcpu); + break; case MSR_IA32_MISC_ENABLE: data = vcpu->arch.ia32_misc_enable_msr; break; -- 1.6.5.6
Attachment:
kvm-lapic-tsc-deadline-timer.patch
Description: kvm-lapic-tsc-deadline-timer.patch