Report stolen time (run_delay field from schedstat) to guests via pvclock. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: kvm/arch/x86/include/asm/kvm_para.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm_para.h +++ kvm/arch/x86/include/asm/kvm_para.h @@ -15,9 +15,11 @@ #define KVM_FEATURE_CLOCKSOURCE 0 #define KVM_FEATURE_NOP_IO_DELAY 1 #define KVM_FEATURE_MMU_OP 2 +#define KVM_FEATURE_RUNTIME_INFO 3 #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 +#define MSR_KVM_RUN_TIME 0x13 #define KVM_MAX_MMU_OP_BATCH 32 @@ -50,6 +52,11 @@ struct kvm_mmu_op_release_pt { #ifdef __KERNEL__ #include <asm/processor.h> +struct kvm_vcpu_runtime_info { + u64 stolen_time; /* time spent starving */ + u64 reserved[3]; /* for future use */ +}; + extern void kvmclock_init(void); Index: kvm/arch/x86/include/asm/kvm_host.h =================================================================== --- kvm.orig/arch/x86/include/asm/kvm_host.h +++ kvm/arch/x86/include/asm/kvm_host.h @@ -354,6 +354,10 @@ struct kvm_vcpu_arch { unsigned int time_offset; struct page *time_page; + bool stolen_time_enable; + struct kvm_vcpu_runtime_info stolen_time; + unsigned int stolen_time_offset; + bool singlestep; /* guest is single stepped by KVM */ bool nmi_pending; bool nmi_injected; Index: kvm/arch/x86/kvm/x86.c =================================================================== --- kvm.orig/arch/x86/kvm/x86.c +++ kvm/arch/x86/kvm/x86.c @@ -507,9 +507,9 @@ static inline u32 bit(int bitno) * kvm-specific. Those are put in the beginning of the list. */ -#define KVM_SAVE_MSRS_BEGIN 2 +#define KVM_SAVE_MSRS_BEGIN 3 static u32 msrs_to_save[] = { - MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, + MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_RUN_TIME, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_K6_STAR, #ifdef CONFIG_X86_64 @@ -679,6 +679,7 @@ static void kvm_write_guest_time(struct struct kvm_vcpu_arch *vcpu = &v->arch; void *shared_kaddr; unsigned long this_tsc_khz; + struct task_struct *task = current; if ((!vcpu->time_page)) return; @@ -700,6 +701,9 @@ static void kvm_write_guest_time(struct vcpu->hv_clock.system_time = ts.tv_nsec + (NSEC_PER_SEC * (u64)ts.tv_sec); + + vcpu->stolen_time.stolen_time = task->sched_info.run_delay; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate @@ -712,6 +716,10 @@ static void kvm_write_guest_time(struct memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); + if (vcpu->stolen_time_enable) + memcpy(shared_kaddr + vcpu->stolen_time_offset, + &vcpu->stolen_time, sizeof(vcpu->stolen_time)); + kunmap_atomic(shared_kaddr, KM_USER0); mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); @@ -937,6 +945,35 @@ int kvm_set_msr_common(struct kvm_vcpu * kvm_request_guest_time_update(vcpu); break; } + case MSR_KVM_RUN_TIME: { + struct page *page; + unsigned int stolen_time_offset; + + if (!vcpu->arch.time_page) + return 1; + + /* we verify if the enable bit is set... */ + if (!(data & 1)) + break; + + /* ...but clean it before doing the actual write */ + stolen_time_offset = data & ~(PAGE_MASK | 1); + + /* that it matches the hvclock page */ + page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); + if (is_error_page(page)) { + kvm_release_page_clean(page); + return 1; + } + if (page != vcpu->arch.time_page) { + kvm_release_page_clean(page); + return 1; + } + kvm_release_page_clean(page); + vcpu->arch.stolen_time_offset = stolen_time_offset; + vcpu->arch.stolen_time_enable = 1; + break; + } case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: @@ -1246,6 +1283,7 @@ int kvm_dev_ioctl_check_extension(long e case KVM_CAP_PIT2: case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: + case KVM_CAP_PVCLOCK_RUNTIME: r = 1; break; case KVM_CAP_COALESCED_MMIO: Index: kvm/arch/x86/kvm/Kconfig =================================================================== --- kvm.orig/arch/x86/kvm/Kconfig +++ kvm/arch/x86/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE + select SCHEDSTATS ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent Index: kvm/include/linux/kvm.h =================================================================== --- kvm.orig/include/linux/kvm.h +++ kvm/include/linux/kvm.h @@ -436,6 +436,7 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#define KVM_CAP_PVCLOCK_RUNTIME 38 #ifdef KVM_CAP_IRQ_ROUTING -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html