Register steal time within KVM. Everytime we sample the steal time information, we update a local variable that tells what was the last time read. We then account the difference. Signed-off-by: Glauber Costa <glommer@xxxxxxxxxx> CC: Rik van Riel <riel@xxxxxxxxxx> CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx> CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CC: Avi Kivity <avi@xxxxxxxxxx> --- arch/x86/include/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 61 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/kvmclock.c | 2 + 3 files changed, 64 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 8ba33ed..8210122 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -89,6 +89,7 @@ struct kvm_vcpu_pv_apf_data { extern void kvmclock_init(void); extern int kvm_register_clock(char *txt); +extern int kvm_register_steal_time(void); /* This instruction is vmcall. On non-VT architectures, it will generate a diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 33c07b0..30c0fa7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -58,6 +58,8 @@ struct kvm_para_state { static DEFINE_PER_CPU(struct kvm_para_state, para_state); static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_steal_time, steal_time); +static DEFINE_PER_CPU(u64, steal_info); static struct kvm_para_state *kvm_para_state(void) { @@ -489,18 +491,21 @@ static void __init kvm_smp_prepare_boot_cpu(void) #ifdef CONFIG_KVM_CLOCK WARN_ON(kvm_register_clock("primary cpu clock")); #endif + WARN_ON(kvm_register_steal_time()); kvm_guest_cpu_init(); native_smp_prepare_boot_cpu(); } static void __cpuinit kvm_guest_cpu_online(void *dummy) { + WARN_ON(kvm_register_steal_time()); kvm_guest_cpu_init(); } static void kvm_guest_cpu_offline(void *dummy) { kvm_pv_disable_apf(NULL); + native_write_msr(MSR_KVM_STEAL_TIME, 0, 0); apf_task_wake_all(); } @@ -534,6 +539,59 @@ static void __init kvm_apf_trap_init(void) set_intr_gate(14, &async_page_fault); } +static u64 kvm_account_steal_time(void) +{ + u64 delta = 0; + u64 *last_steal_info, this_steal_info; + struct kvm_steal_time *src; + int version; + + src = &get_cpu_var(steal_time); + do { + version = src->version; + rmb(); + this_steal_info = src->steal; + rmb(); + } while ((src->version & 1) || (version != src->version)); + put_cpu_var(steal_time); + + last_steal_info = &get_cpu_var(steal_info); + + if (likely(*last_steal_info)) + delta = this_steal_info - *last_steal_info; + *last_steal_info = this_steal_info; + + put_cpu_var(steal_info); + + /* + * using nanoseconds introduces noise, which accumulates easily + * leading to big steal time values. We want, however, to keep the + * interface nanosecond-based for future-proofness. The hypervisor may + * adopt a similar strategy, but we can't rely on that. + */ + delta /= NSEC_PER_MSEC; + delta *= NSEC_PER_MSEC; + + return delta; +} + + +int kvm_register_steal_time(void) +{ + int cpu = smp_processor_id(); + int low, high, ret; + + if (!hypervisor_steal_time) + return 0; + + low = (int)__pa(&per_cpu(steal_time, cpu)) | 1; + high = ((u64)__pa(&per_cpu(steal_time, cpu)) >> 32); + ret = native_write_msr_safe(MSR_KVM_STEAL_TIME, low, high); + printk(KERN_INFO "kvm-stealtime: cpu %d, msr %x:%x\n", + cpu, high, low); + return ret; +} + void __init kvm_guest_init(void) { int i; @@ -548,6 +606,9 @@ void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) x86_init.irqs.trap_init = kvm_apf_trap_init; + if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) + hypervisor_steal_time = kvm_account_steal_time; + #ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; register_cpu_notifier(&kvm_cpu_notifier); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f98d3ea..08661c6 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -164,6 +164,7 @@ static void __cpuinit kvm_setup_secondary_clock(void) static void kvm_crash_shutdown(struct pt_regs *regs) { native_write_msr(msr_kvm_system_time, 0, 0); + native_write_msr(MSR_KVM_STEAL_TIME, 0, 0); native_machine_crash_shutdown(regs); } #endif @@ -171,6 +172,7 @@ static void kvm_crash_shutdown(struct pt_regs *regs) static void kvm_shutdown(void) { native_write_msr(msr_kvm_system_time, 0, 0); + native_write_msr(MSR_KVM_STEAL_TIME, 0, 0); native_machine_shutdown(); } -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html