On Tue, Sep 13, 2011 at 10:36:51PM +0800, Liu, Jinsong wrote: > >From 7b12021e1d1b79797b49e41cc0a7be05a6180d9a Mon Sep 17 00:00:00 2001 > From: Liu, Jinsong <jinsong.liu@xxxxxxxxx> > Date: Tue, 13 Sep 2011 21:52:54 +0800 > Subject: [PATCH] KVM: emulate lapic tsc deadline timer for guest > > This patch emulate lapic tsc deadline timer for guest: > Enumerate tsc deadline timer capability by CPUID; > Enable tsc deadline timer mode by lapic MMIO; > Start tsc deadline timer by WRMSR; > > Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx> > --- > arch/x86/include/asm/apicdef.h | 2 + > arch/x86/include/asm/cpufeature.h | 3 + > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/include/asm/msr-index.h | 2 + > arch/x86/kvm/kvm_timer.h | 2 + > arch/x86/kvm/lapic.c | 122 ++++++++++++++++++++++++++++++------- > arch/x86/kvm/lapic.h | 3 + > arch/x86/kvm/x86.c | 20 ++++++- > 8 files changed, 132 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h > index 34595d5..3925d80 100644 > --- a/arch/x86/include/asm/apicdef.h > +++ b/arch/x86/include/asm/apicdef.h > @@ -100,7 +100,9 @@ > #define APIC_TIMER_BASE_CLKIN 0x0 > #define APIC_TIMER_BASE_TMBASE 0x1 > #define APIC_TIMER_BASE_DIV 0x2 > +#define APIC_LVT_TIMER_ONESHOT (0 << 17) > #define APIC_LVT_TIMER_PERIODIC (1 << 17) > +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) > #define APIC_LVT_MASKED (1 << 16) > #define APIC_LVT_LEVEL_TRIGGER (1 << 15) > #define APIC_LVT_REMOTE_IRR (1 << 14) Please have a separate, introductory patch for definitions that are not KVM specific. > +++ b/arch/x86/include/asm/kvm_host.h > @@ -671,6 +671,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); > > extern bool tdp_enabled; > > +extern u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); > + No need for extern. > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 2b2255b..925d4b9 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -135,9 +135,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) > return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; > } > > +static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) > +{ > + return ((apic_get_reg(apic, APIC_LVTT) & > + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); > +} > + > static inline int apic_lvtt_period(struct kvm_lapic *apic) > { > - return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; > + return ((apic_get_reg(apic, APIC_LVTT) & > + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); > +} > + > +static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) > +{ > + return ((apic_get_reg(apic, APIC_LVTT) & > + apic->lapic_timer.timer_mode_mask) == > + APIC_LVT_TIMER_TSCDEADLINE); > } > > static inline int apic_lvt_nmi_mode(u32 lvt_val) > @@ -166,7 +180,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) > } > > static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { > - LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ > + LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ > LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ > LVT_MASK | APIC_MODE_MASK, /* LVTPC */ > LINT_MASK, LINT_MASK, /* LVT0-1 */ > @@ -570,6 +584,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) > break; > > case APIC_TMCCT: /* Timer CCR */ > + if (apic_lvtt_tscdeadline(apic)) > + return 0; > + > val = apic_get_tmcct(apic); > break; > > @@ -664,29 +681,32 @@ static void update_divide_count(struct kvm_lapic *apic) > > static void start_apic_timer(struct kvm_lapic *apic) > { > - ktime_t now = apic->lapic_timer.timer.base->get_time(); > - > - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * > - APIC_BUS_CYCLE_NS * apic->divide_count; > + ktime_t now; > atomic_set(&apic->lapic_timer.pending, 0); > > - if (!apic->lapic_timer.period) > - return; > - /* > - * Do not allow the guest to program periodic timers with small > - * interval, since the hrtimers are not throttled by the host > - * scheduler. > - */ > - if (apic_lvtt_period(apic)) { > - if (apic->lapic_timer.period < NSEC_PER_MSEC/2) > - apic->lapic_timer.period = NSEC_PER_MSEC/2; > - } > + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { > + /* lapic timer in oneshot or peroidic mode */ > + now = apic->lapic_timer.timer.base->get_time(); > + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) > + * APIC_BUS_CYCLE_NS * apic->divide_count; > + > + if (!apic->lapic_timer.period) > + return; > + /* > + * Do not allow the guest to program periodic timers with small > + * interval, since the hrtimers are not throttled by the host > + * scheduler. > + */ > + if (apic_lvtt_period(apic)) { > + if (apic->lapic_timer.period < NSEC_PER_MSEC/2) > + apic->lapic_timer.period = NSEC_PER_MSEC/2; > + } > > - hrtimer_start(&apic->lapic_timer.timer, > - ktime_add_ns(now, apic->lapic_timer.period), > - HRTIMER_MODE_ABS); > + hrtimer_start(&apic->lapic_timer.timer, > + ktime_add_ns(now, apic->lapic_timer.period), > + HRTIMER_MODE_ABS); > > - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > PRIx64 ", " > "timer initial count 0x%x, period %lldns, " > "expire @ 0x%016" PRIx64 ".\n", __func__, > @@ -695,6 +715,28 @@ static void start_apic_timer(struct kvm_lapic *apic) > apic->lapic_timer.period, > ktime_to_ns(ktime_add_ns(now, > apic->lapic_timer.period))); > + } else if (apic_lvtt_tscdeadline(apic)) { > + /* lapic timer in tsc deadline mode */ > + u64 guest_tsc, guest_tsc_delta, ns = 0; > + struct kvm_vcpu *vcpu = apic->vcpu; > + unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); > + unsigned long flags; > + > + if (unlikely(!apic->lapic_timer.tscdeadline || !this_tsc_khz)) > + return; > + > + local_irq_save(flags); > + > + now = apic->lapic_timer.timer.base->get_time(); > + kvm_get_msr(vcpu, MSR_IA32_TSC, &guest_tsc); Use kvm_x86_ops->read_l1_tsc(vcpu) instead of direct MSR read (to avoid reading L2 guest TSC in case of nested virt). > + guest_tsc_delta = apic->lapic_timer.tscdeadline - guest_tsc; if (guest_tsc <= tscdeadline), the timer should start immediately. > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6cb353c..a73c059 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -610,6 +610,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu) > if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) > best->ecx |= bit(X86_FEATURE_OSXSAVE); > } > + > + /* > + * When cpu has tsc deadline timer capacibility, use bit 17/18 > + * as timer mode mask. Otherwise only use bit 17. > + */ > + if (cpu_has_tsc_deadline_timer && best->function == 0x1) { > + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); > + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3 << 17); > + } else > + vcpu->arch.apic->lapic_timer.timer_mode_mask = (1 << 17); > } The deadline timer is entirely emulated, whether the host CPU supports it or not is irrelevant. Why was this changed from previous submissions? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html