sched_clock() should time the vcpu run time. Subtract stolen time from realtime pvclock. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: kvm/arch/x86/kernel/kvmclock.c =================================================================== --- kvm.orig/arch/x86/kernel/kvmclock.c +++ kvm/arch/x86/kernel/kvmclock.c @@ -38,7 +38,16 @@ static int parse_no_kvmclock(char *arg) early_param("no-kvmclock", parse_no_kvmclock); /* The hypervisor will put information about time periodically here */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); +struct time_info { + struct pvclock_vcpu_time_info hv_clock; + struct kvm_vcpu_runtime_info run_info; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct time_info, time_info); + +#define hv_clock time_info.hv_clock +#define run_info time_info.run_info + static struct pvclock_wall_clock wall_clock; /* @@ -84,6 +93,40 @@ static cycle_t kvm_clock_get_cycles(stru return kvm_clock_read(); } +cycle_t kvm_runtime_read(struct pvclock_vcpu_time_info *src, + struct kvm_vcpu_runtime_info *rinfo) +{ + struct pvclock_shadow_time shadow; + unsigned version; + cycle_t ret, offset; + unsigned long long stolen; + + do { + version = pvclock_get_time_values(&shadow, src); + barrier(); + offset = pvclock_get_nsec_offset(&shadow); + stolen = rinfo->stolen_time; + ret = shadow.system_timestamp + offset - stolen; + barrier(); + } while (version != src->version); + + return ret; +} + +static cycle_t kvm_clock_read_unstolen(void) +{ + struct pvclock_vcpu_time_info *src; + struct kvm_vcpu_runtime_info *rinfo; + cycle_t ret; + + src = &get_cpu_var(hv_clock); + rinfo = &get_cpu_var(run_info); + ret = kvm_runtime_read(src, rinfo); + put_cpu_var(run_info); + put_cpu_var(hv_clock); + return ret; +} + /* * If we don't do that, there is the possibility that the guest * will calibrate under heavy load - thus, getting a lower lpj - @@ -133,14 +176,30 @@ static int kvm_register_clock(char *txt) return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); } +static int kvm_register_run_info(char *txt) +{ + int cpu = smp_processor_id(); + int low, high; + + low = (int) __pa(&per_cpu(run_info, cpu)) | 1; + high = ((u64)__pa(&per_cpu(run_info, cpu)) >> 32); + printk(KERN_INFO "kvm-runtime-info: cpu %d, msr %x:%x, %s\n", + cpu, high, low, txt); + return native_write_msr_safe(MSR_KVM_RUN_TIME, low, high); +} + #ifdef CONFIG_X86_LOCAL_APIC static void __cpuinit kvm_setup_secondary_clock(void) { + char *txt = "secondary cpu clock"; + /* * Now that the first cpu already had this clocksource initialized, * we shouldn't fail. */ - WARN_ON(kvm_register_clock("secondary cpu clock")); + WARN_ON(kvm_register_clock(txt)); + if (kvm_para_has_feature(KVM_FEATURE_RUNTIME_INFO)) + kvm_register_run_info(txt); /* ok, done with our trickery, call native */ setup_secondary_APIC_clock(); } @@ -149,7 +208,11 @@ static void __cpuinit kvm_setup_secondar #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { - WARN_ON(kvm_register_clock("primary cpu clock")); + char *txt = "primary cpu clock"; + + WARN_ON(kvm_register_clock(txt)); + if (kvm_para_has_feature(KVM_FEATURE_RUNTIME_INFO)) + kvm_register_run_info(txt); native_smp_prepare_boot_cpu(); } #endif @@ -204,4 +267,6 @@ void __init kvmclock_init(void) pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; } + if (kvm_para_has_feature(KVM_FEATURE_RUNTIME_INFO)) + pv_time_ops.sched_clock = kvm_clock_read_unstolen; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html