Allow a guest to register a second location for the VCPU time info structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW). This is intended to allow the guest kernel to map this information into a usermode accessible page, so that usermode can efficiently calculate system time from the TSC without having to make a syscall. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: vsyscall/arch/x86/include/asm/kvm_para.h =================================================================== --- vsyscall.orig/arch/x86/include/asm/kvm_para.h +++ vsyscall/arch/x86/include/asm/kvm_para.h @@ -23,6 +23,7 @@ #define KVM_FEATURE_ASYNC_PF 4 #define KVM_FEATURE_STEAL_TIME 5 #define KVM_FEATURE_PV_EOI 6 +#define KVM_FEATURE_USERSPACE_CLOCKSOURCE 7 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -39,6 +40,7 @@ #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_USERSPACE_TIME 0x4b564d05 struct kvm_steal_time { __u64 steal; Index: vsyscall/Documentation/virtual/kvm/msr.txt =================================================================== --- vsyscall.orig/Documentation/virtual/kvm/msr.txt +++ vsyscall/Documentation/virtual/kvm/msr.txt @@ -125,6 +125,22 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid leaf prior to usage. +MSR_KVM_USERSPACE_TIME: 0x4b564d05 + +Allow a guest to register a second location for the VCPU time info +structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW). +This is intended to allow the guest kernel to map this information +into a usermode accessible page, so that usermode can efficiently +calculate system time from the TSC without having to make a syscall. + +Relationship with master copy (MSR_KVM_SYSTEM_TIME_NEW): + +- This MSR must be enabled only when the master is enabled. +- Disabling updates to the master automatically disables +updates for this copy. + +Availability of this MSR must be checked via bit 7 in 0x4000001 cpuid +leaf prior to usage. MSR_KVM_WALL_CLOCK: 0x11 Index: vsyscall/arch/x86/include/asm/kvm_host.h =================================================================== --- vsyscall.orig/arch/x86/include/asm/kvm_host.h +++ vsyscall/arch/x86/include/asm/kvm_host.h @@ -415,10 +415,13 @@ struct kvm_vcpu_arch { int (*complete_userspace_io)(struct kvm_vcpu *vcpu); gpa_t time; + gpa_t uspace_time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; unsigned int time_offset; + unsigned int uspace_time_offset; struct page *time_page; + struct page *uspace_time_page; /* set guest stopped flag in pvclock flags field */ bool pvclock_set_guest_stopped_request; Index: vsyscall/arch/x86/kvm/x86.c =================================================================== --- vsyscall.orig/arch/x86/kvm/x86.c +++ vsyscall/arch/x86/kvm/x86.c @@ -809,13 +809,13 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); * kvm-specific. Those are put in the beginning of the list. */ -#define KVM_SAVE_MSRS_BEGIN 10 +#define KVM_SAVE_MSRS_BEGIN 11 static u32 msrs_to_save[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, - MSR_KVM_PV_EOI_EN, + MSR_KVM_PV_EOI_EN, MSR_KVM_USERSPACE_TIME, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_STAR, #ifdef CONFIG_X86_64 @@ -1135,16 +1135,43 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu EXPORT_SYMBOL_GPL(kvm_write_tsc); +static void kvm_write_pvtime(struct kvm_vcpu *v, struct page *page, + unsigned int offset_in_page, gpa_t gpa) +{ + struct kvm_vcpu_arch *vcpu = &v->arch; + void *shared_kaddr; + struct pvclock_vcpu_time_info *guest_hv_clock; + u8 pvclock_flags; + + shared_kaddr = kmap_atomic(page); + + guest_hv_clock = shared_kaddr + offset_in_page; + + /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ + pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); + + if (vcpu->pvclock_set_guest_stopped_request) { + pvclock_flags |= PVCLOCK_GUEST_STOPPED; + vcpu->pvclock_set_guest_stopped_request = false; + } + + vcpu->hv_clock.flags = pvclock_flags; + + memcpy(shared_kaddr + offset_in_page, &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); + + kunmap_atomic(shared_kaddr); + + mark_page_dirty(v->kvm, gpa >> PAGE_SHIFT); +} + static int kvm_guest_time_update(struct kvm_vcpu *v) { unsigned long flags; struct kvm_vcpu_arch *vcpu = &v->arch; - void *shared_kaddr; unsigned long this_tsc_khz; s64 kernel_ns, max_kernel_ns; u64 tsc_timestamp; - struct pvclock_vcpu_time_info *guest_hv_clock; - u8 pvclock_flags; /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -1235,26 +1262,11 @@ static int kvm_guest_time_update(struct */ vcpu->hv_clock.version += 2; - shared_kaddr = kmap_atomic(vcpu->time_page); - - guest_hv_clock = shared_kaddr + vcpu->time_offset; - - /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ - pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); + kvm_write_pvtime(v, vcpu->time_page, vcpu->time_offset, vcpu->time); + if (vcpu->uspace_time_page) + kvm_write_pvtime(v, vcpu->uspace_time_page, + vcpu->uspace_time_offset, vcpu->uspace_time); - if (vcpu->pvclock_set_guest_stopped_request) { - pvclock_flags |= PVCLOCK_GUEST_STOPPED; - vcpu->pvclock_set_guest_stopped_request = false; - } - - vcpu->hv_clock.flags = pvclock_flags; - - memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); - - kunmap_atomic(shared_kaddr); - - mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); return 0; } @@ -1549,6 +1561,15 @@ static void kvmclock_reset(struct kvm_vc } } +static void kvmclock_uspace_reset(struct kvm_vcpu *vcpu) +{ + vcpu->arch.uspace_time = 0; + if (vcpu->arch.uspace_time_page) { + kvm_release_page_dirty(vcpu->arch.uspace_time_page); + vcpu->arch.uspace_time_page = NULL; + } +} + static void accumulate_steal_time(struct kvm_vcpu *vcpu) { u64 delta; @@ -1639,6 +1660,31 @@ int kvm_set_msr_common(struct kvm_vcpu * vcpu->kvm->arch.wall_clock = data; kvm_write_wall_clock(vcpu->kvm, data); break; + case MSR_KVM_USERSPACE_TIME: { + kvmclock_uspace_reset(vcpu); + + if (!vcpu->arch.time_page && (data & 1)) + return 1; + + vcpu->arch.uspace_time = data; + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + + /* we verify if the enable bit is set... */ + if (!(data & 1)) + break; + + /* ...but clean it before doing the actual write */ + vcpu->arch.uspace_time_offset = data & ~(PAGE_MASK | 1); + + vcpu->arch.uspace_time_page = gfn_to_page(vcpu->kvm, + data >> PAGE_SHIFT); + + if (is_error_page(vcpu->arch.uspace_time_page)) { + kvm_release_page_clean(vcpu->arch.uspace_time_page); + vcpu->arch.uspace_time_page = NULL; + } + break; + } case MSR_KVM_SYSTEM_TIME_NEW: case MSR_KVM_SYSTEM_TIME: { kvmclock_reset(vcpu); @@ -1647,8 +1693,10 @@ int kvm_set_msr_common(struct kvm_vcpu * kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); /* we verify if the enable bit is set... */ - if (!(data & 1)) + if (!(data & 1)) { + kvmclock_uspace_reset(vcpu); break; + } /* ...but clean it before doing the actual write */ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); @@ -1656,8 +1704,10 @@ int kvm_set_msr_common(struct kvm_vcpu * vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); - if (is_error_page(vcpu->arch.time_page)) + if (is_error_page(vcpu->arch.time_page)) { vcpu->arch.time_page = NULL; + kvmclock_uspace_reset(vcpu); + } break; } @@ -2010,6 +2060,9 @@ int kvm_get_msr_common(struct kvm_vcpu * case MSR_KVM_SYSTEM_TIME_NEW: data = vcpu->arch.time; break; + case MSR_KVM_USERSPACE_TIME: + data = vcpu->arch.uspace_time; + break; case MSR_KVM_ASYNC_PF_EN: data = vcpu->arch.apf.msr_val; break; @@ -2195,6 +2248,7 @@ int kvm_dev_ioctl_check_extension(long e case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: case KVM_CAP_IRQFD_RESAMPLE: + case KVM_CAP_USERSPACE_CLOCKSOURCE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -6017,6 +6071,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu * void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + kvmclock_uspace_reset(vcpu); kvmclock_reset(vcpu); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); Index: vsyscall/arch/x86/kvm/cpuid.c =================================================================== --- vsyscall.orig/arch/x86/kvm/cpuid.c +++ vsyscall/arch/x86/kvm/cpuid.c @@ -411,7 +411,9 @@ static int do_cpuid_ent(struct kvm_cpuid (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_PV_EOI) | - (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); + (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | + (1 << KVM_FEATURE_USERSPACE_CLOCKSOURCE); + if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); Index: vsyscall/include/uapi/linux/kvm.h =================================================================== --- vsyscall.orig/include/uapi/linux/kvm.h +++ vsyscall/include/uapi/linux/kvm.h @@ -626,6 +626,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_READONLY_MEM 81 #endif #define KVM_CAP_IRQFD_RESAMPLE 82 +#define KVM_CAP_USERSPACE_CLOCKSOURCE 83 #ifdef KVM_CAP_IRQ_ROUTING -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html