On Mon, Jun 13, 2011 at 07:31:33PM -0400, Glauber Costa wrote: > To implement steal time, we need the hypervisor to pass the guest information > about how much time was spent running other processes outside the VM. > This is per-vcpu, and using the kvmclock structure for that is an abuse > we decided not to make. > > In this patchset, I am introducing a new msr, KVM_MSR_STEAL_TIME, that > holds the memory area address containing information about steal time > > This patch contains the hypervisor part for it. I am keeping it separate from > the headers to facilitate backports to people who wants to backport the kernel > part but not the hypervisor, or the other way around. > > Signed-off-by: Glauber Costa <glommer@xxxxxxxxxx> > CC: Rik van Riel <riel@xxxxxxxxxx> > CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx> > CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > CC: Avi Kivity <avi@xxxxxxxxxx> > CC: Anthony Liguori <aliguori@xxxxxxxxxx> > CC: Eric B Munson <emunson@xxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 8 +++++ > arch/x86/include/asm/kvm_para.h | 4 ++ > arch/x86/kvm/x86.c | 60 +++++++++++++++++++++++++++++++++++++-- > 3 files changed, 69 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index fc38eca..5dce014 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -388,6 +388,14 @@ struct kvm_vcpu_arch { > unsigned int hw_tsc_khz; > unsigned int time_offset; > struct page *time_page; > + > + struct { > + u64 msr_val; > + gpa_t stime; > + struct kvm_steal_time steal; > + u64 this_time_out; > + } st; > + > u64 last_guest_tsc; > u64 last_kernel_ns; > u64 last_tsc_nsec; > diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h > index ac306c4..0341e61 100644 > --- a/arch/x86/include/asm/kvm_para.h > +++ b/arch/x86/include/asm/kvm_para.h > @@ -45,6 +45,10 @@ struct kvm_steal_time { > __u32 pad[6]; > }; > > +#define KVM_STEAL_ALIGNMENT_BITS 5 > +#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) > +#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) > + > #define KVM_MAX_MMU_OP_BATCH 32 > > #define KVM_ASYNC_PF_ENABLED (1 << 0) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6645634..10fe028 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -797,12 +797,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); > * kvm-specific. Those are put in the beginning of the list. > */ > > -#define KVM_SAVE_MSRS_BEGIN 8 > +#define KVM_SAVE_MSRS_BEGIN 9 > static u32 msrs_to_save[] = { > MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, > MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, > HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, > - HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, > + HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, > MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, > MSR_STAR, > #ifdef CONFIG_X86_64 > @@ -1480,6 +1480,34 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu) > } > } > > +static void record_steal_time(struct kvm_vcpu *vcpu) > +{ > + u64 delta; > + > + if (vcpu->arch.st.stime && vcpu->arch.st.this_time_out) { > + > + if (unlikely(kvm_read_guest(vcpu->kvm, vcpu->arch.st.stime, > + &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) { > + > + vcpu->arch.st.stime = 0; > + return; > + } > + > + delta = (get_kernel_ns() - vcpu->arch.st.this_time_out); > + > + vcpu->arch.st.steal.steal += delta; > + vcpu->arch.st.steal.version += 2; > + > + if (unlikely(kvm_write_guest(vcpu->kvm, vcpu->arch.st.stime, Why not use kvm_write_guest_cached() here and introduce kvm_read_guest_cached() for the read above? > + &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) { > + > + vcpu->arch.st.stime = 0; > + return; > + } > + } > + > +} > + > int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) > { > switch (msr) { > @@ -1562,6 +1590,23 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (kvm_pv_enable_async_pf(vcpu, data)) > return 1; > break; > + case MSR_KVM_STEAL_TIME: > + vcpu->arch.st.msr_val = data; > + > + if (!(data & KVM_MSR_ENABLED)) { > + vcpu->arch.st.stime = 0; > + break; > + } > + > + if (data & KVM_STEAL_RESERVED_MASK) > + return 1; > + > + vcpu->arch.st.this_time_out = get_kernel_ns(); > + vcpu->arch.st.stime = data & KVM_STEAL_VALID_BITS; > + record_steal_time(vcpu); > + > + break; > + > case MSR_IA32_MCG_CTL: > case MSR_IA32_MCG_STATUS: > case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: > @@ -1847,6 +1892,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) > case MSR_KVM_ASYNC_PF_EN: > data = vcpu->arch.apf.msr_val; > break; > + case MSR_KVM_STEAL_TIME: > + data = vcpu->arch.st.msr_val; > + break; > case MSR_IA32_P5_MC_ADDR: > case MSR_IA32_P5_MC_TYPE: > case MSR_IA32_MCG_CAP: > @@ -2158,6 +2206,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > kvm_migrate_timers(vcpu); > vcpu->cpu = cpu; > } > + > + record_steal_time(vcpu); > } > > void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > @@ -2165,6 +2215,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > kvm_x86_ops->vcpu_put(vcpu); > kvm_put_guest_fpu(vcpu); > kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); > + vcpu->arch.st.this_time_out = get_kernel_ns(); > } > Shouldn't we call record_steal_time(vcpu)/vcpu->arch.st.this_time_out = get_kernel_ns(); just before/after entering/exiting a guest? vcpu_(put|get) are called for each vcpu ioctl, not only VCPU_RUN. > static int is_efer_nx(void) > @@ -2477,7 +2528,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > (1 << KVM_FEATURE_NOP_IO_DELAY) | > (1 << KVM_FEATURE_CLOCKSOURCE2) | > (1 << KVM_FEATURE_ASYNC_PF) | > - (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); > + (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | > + (1 << KVM_FEATURE_STEAL_TIME); > entry->ebx = 0; > entry->ecx = 0; > entry->edx = 0; > @@ -6200,6 +6252,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) > > kvmclock_reset(vcpu); > > + vcpu->arch.st.stime = 0; > + > kvm_clear_async_pf_completion_queue(vcpu); > kvm_async_pf_hash_reset(vcpu); > vcpu->arch.apf.halted = false; > -- > 1.7.3.4 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html