On 08/17/2011 07:19 AM, Liu, Jinsong wrote:
From a9670ddff84080c56183e2d678189e100f891174 Mon Sep 17 00:00:00 2001 From: Liu, Jinsong<jinsong.liu@xxxxxxxxx> Date: Wed, 17 Aug 2011 11:36:28 +0800 Subject: [PATCH] KVM: emulate lapic tsc deadline timer for hvm
kvm doesn't have hvm.
This patch emulate lapic tsc deadline timer for hvm: Enumerate tsc deadline timer capacibility by CPUID; Enable tsc deadline timer mode by LAPIC MMIO; Start tsc deadline timer by MSR;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4258aac..28bcf48 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -120,6 +120,7 @@ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ #define X86_FEATURE_AES (4*32+25) /* AES instructions */ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 307e3cf..28f7128 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -635,6 +635,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); + u64 (*guest_to_host_tsc)(u64 guest_tsc); };
Please put this near the other tsc functions. Add a comment explaining what value is returned under nested virtualization.
Please add the svm callback implementation.
--- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -229,6 +229,8 @@ #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define MSR_IA32_TSCDEADLINE 0x000006e0 + #define MSR_IA32_UCODE_WRITE 0x00000079 #define MSR_IA32_UCODE_REV 0x0000008b
Need to add to msrs_to_save so live migration works.
@@ -665,28 +682,30 @@ static void update_divide_count(struct kvm_lapic *apic) static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->lapic_timer.timer.base->get_time(); - - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * - APIC_BUS_CYCLE_NS * apic->divide_count; atomic_set(&apic->lapic_timer.pending, 0); - if (!apic->lapic_timer.period) - return; - /* - * Do not allow the guest to program periodic timers with small - * interval, since the hrtimers are not throttled by the host - * scheduler. - */ - if (apic_lvtt_period(apic)) { - if (apic->lapic_timer.period< NSEC_PER_MSEC/2) - apic->lapic_timer.period = NSEC_PER_MSEC/2; - } + /* lapic timer in oneshot or peroidic mode */ + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) + * APIC_BUS_CYCLE_NS * apic->divide_count; - hrtimer_start(&apic->lapic_timer.timer, - ktime_add_ns(now, apic->lapic_timer.period), - HRTIMER_MODE_ABS); + if (!apic->lapic_timer.period) + return; + /* + * Do not allow the guest to program periodic timers with small + * interval, since the hrtimers are not throttled by the host + * scheduler. + */ + if (apic_lvtt_period(apic)) { + if (apic->lapic_timer.period< NSEC_PER_MSEC/2) + apic->lapic_timer.period = NSEC_PER_MSEC/2; + } - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, apic->lapic_timer.period), + HRTIMER_MODE_ABS); + + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" PRIx64 ", " "timer initial count 0x%x, period %lldns, " "expire @ 0x%016" PRIx64 ".\n", __func__, @@ -695,6 +714,26 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->lapic_timer.period, ktime_to_ns(ktime_add_ns(now, apic->lapic_timer.period))); + } + + /* lapic timer in tsc deadline mode */ + if (apic_lvtt_tscdeadline(apic)) {
'else if' is slightly better, since it shows the reader the options are mutually exclusive.
+ u64 tsc_now, tsc_target, tsc_delta, nsec; + + if (!apic->lapic_timer.tscdeadline) + return; + + tsc_target = kvm_x86_ops-> + guest_to_host_tsc(apic->lapic_timer.tscdeadline); + rdtscll(tsc_now); + tsc_delta = tsc_target - tsc_now;
This only works if we have a constant tsc, that's not true for large multiboard machines. Need to do this with irqs disabled as well (reading both 'now' and 'tsc_now' in the same critical section).
+ if (tsc_delta< 0) + tsc_delta = 0; + + nsec = tsc_delta * 1000000L / tsc_khz; + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, nsec), HRTIMER_MODE_ABS); + } } @@ -883,6 +936,28 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) *---------------------------------------------------------------------- */ +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + return 0;
Why?
+ + return apic->lapic_timer.tscdeadline; +} + +void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + return; + + hrtimer_cancel(&apic->lapic_timer.timer); + apic->lapic_timer.tscdeadline = data; + start_apic_timer(apic);
Shouldn't the msr value be updated even if we're outside tsc-deadline mode?
+} +
/* * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ * ioctl. In this case the call-back should update internal vmx state to make @@ -6270,6 +6278,16 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) } } } + + /* + * Emulate Intel lapic tsc deadline timer even if host not support it. + * Open CPUID.1.ECX[24] and use bit17/18 as timer mode mask. + */ + best = kvm_find_cpuid_entry(vcpu, 1, 0); + if (best) { + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3<< 17); + } }
Should be in common code; there is nothing vmx specific about it (although it is Intel specific at present).
-- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html