[patch 08/16] KVM: x86: introduce facility to support vsyscall pvclock, via MSR

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Allow a guest to register a second location for the VCPU time info

structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
This is intended to allow the guest kernel to map this information
into a usermode accessible page, so that usermode can efficiently
calculate system time from the TSC without having to make a syscall.

Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

Index: vsyscall/arch/x86/include/asm/kvm_para.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_para.h
+++ vsyscall/arch/x86/include/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
 #define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_USERSPACE_CLOCKSOURCE 7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -39,6 +40,7 @@
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
+#define MSR_KVM_USERSPACE_TIME      0x4b564d05
 
 struct kvm_steal_time {
 	__u64 steal;
Index: vsyscall/Documentation/virtual/kvm/msr.txt
===================================================================
--- vsyscall.orig/Documentation/virtual/kvm/msr.txt
+++ vsyscall/Documentation/virtual/kvm/msr.txt
@@ -125,6 +125,22 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
 	leaf prior to usage.
 
+MSR_KVM_USERSPACE_TIME:  0x4b564d05
+
+Allow a guest to register a second location for the VCPU time info
+structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
+This is intended to allow the guest kernel to map this information
+into a usermode accessible page, so that usermode can efficiently
+calculate system time from the TSC without having to make a syscall.
+
+Relationship with master copy (MSR_KVM_SYSTEM_TIME_NEW):
+
+- This MSR must be enabled only when the master is enabled.
+- Disabling updates to the master automatically disables
+updates for this copy.
+
+Availability of this MSR must be checked via bit 7 in 0x4000001 cpuid
+leaf prior to usage.
 
 MSR_KVM_WALL_CLOCK:  0x11
 
Index: vsyscall/arch/x86/include/asm/kvm_host.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_host.h
+++ vsyscall/arch/x86/include/asm/kvm_host.h
@@ -415,10 +415,13 @@ struct kvm_vcpu_arch {
 	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
+	gpa_t uspace_time;
 	struct pvclock_vcpu_time_info hv_clock;
 	unsigned int hw_tsc_khz;
 	unsigned int time_offset;
+	unsigned int uspace_time_offset;
 	struct page *time_page;
+	struct page *uspace_time_page;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
 
Index: vsyscall/arch/x86/kvm/x86.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/x86.c
+++ vsyscall/arch/x86/kvm/x86.c
@@ -809,13 +809,13 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	10
+#define KVM_SAVE_MSRS_BEGIN	11
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-	MSR_KVM_PV_EOI_EN,
+	MSR_KVM_PV_EOI_EN, MSR_KVM_USERSPACE_TIME,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_STAR,
 #ifdef CONFIG_X86_64
@@ -1135,16 +1135,43 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
 
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
+static void kvm_write_pvtime(struct kvm_vcpu *v, struct page *page,
+			     unsigned int offset_in_page, gpa_t gpa)
+{
+	struct kvm_vcpu_arch *vcpu = &v->arch;
+	void *shared_kaddr;
+	struct pvclock_vcpu_time_info *guest_hv_clock;
+	u8 pvclock_flags;
+
+	shared_kaddr = kmap_atomic(page);
+
+	guest_hv_clock = shared_kaddr + offset_in_page;
+
+	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+	pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+
+	if (vcpu->pvclock_set_guest_stopped_request) {
+		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+		vcpu->pvclock_set_guest_stopped_request = false;
+	}
+
+	vcpu->hv_clock.flags = pvclock_flags;
+
+	memcpy(shared_kaddr + offset_in_page, &vcpu->hv_clock,
+	       sizeof(vcpu->hv_clock));
+
+	kunmap_atomic(shared_kaddr);
+
+	mark_page_dirty(v->kvm, gpa >> PAGE_SHIFT);
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
 	unsigned long flags;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
-	void *shared_kaddr;
 	unsigned long this_tsc_khz;
 	s64 kernel_ns, max_kernel_ns;
 	u64 tsc_timestamp;
-	struct pvclock_vcpu_time_info *guest_hv_clock;
-	u8 pvclock_flags;
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
@@ -1235,26 +1262,11 @@ static int kvm_guest_time_update(struct 
 	 */
 	vcpu->hv_clock.version += 2;
 
-	shared_kaddr = kmap_atomic(vcpu->time_page);
-
-	guest_hv_clock = shared_kaddr + vcpu->time_offset;
-
-	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
-	pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+ 	kvm_write_pvtime(v, vcpu->time_page, vcpu->time_offset, vcpu->time);
+ 	if (vcpu->uspace_time_page)
+ 		kvm_write_pvtime(v, vcpu->uspace_time_page,
+ 				 vcpu->uspace_time_offset, vcpu->uspace_time);
 
-	if (vcpu->pvclock_set_guest_stopped_request) {
-		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
-		vcpu->pvclock_set_guest_stopped_request = false;
-	}
-
-	vcpu->hv_clock.flags = pvclock_flags;
-
-	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-	       sizeof(vcpu->hv_clock));
-
-	kunmap_atomic(shared_kaddr);
-
-	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 	return 0;
 }
 
@@ -1549,6 +1561,15 @@ static void kvmclock_reset(struct kvm_vc
 	}
 }
 
+static void kvmclock_uspace_reset(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.uspace_time = 0;
+	if (vcpu->arch.uspace_time_page) {
+		kvm_release_page_dirty(vcpu->arch.uspace_time_page);
+		vcpu->arch.uspace_time_page = NULL;
+	}
+}
+
 static void accumulate_steal_time(struct kvm_vcpu *vcpu)
 {
 	u64 delta;
@@ -1639,6 +1660,31 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		vcpu->kvm->arch.wall_clock = data;
 		kvm_write_wall_clock(vcpu->kvm, data);
 		break;
+	case MSR_KVM_USERSPACE_TIME: {
+		kvmclock_uspace_reset(vcpu);
+
+		if (!vcpu->arch.time_page && (data & 1))
+			return 1;
+
+		vcpu->arch.uspace_time = data;
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+		/* we verify if the enable bit is set... */
+		if (!(data & 1))
+			break;
+
+		/* ...but clean it before doing the actual write */
+		vcpu->arch.uspace_time_offset = data & ~(PAGE_MASK | 1);
+
+		vcpu->arch.uspace_time_page = gfn_to_page(vcpu->kvm,
+							  data >> PAGE_SHIFT);
+
+		if (is_error_page(vcpu->arch.uspace_time_page)) {
+			kvm_release_page_clean(vcpu->arch.uspace_time_page);
+			vcpu->arch.uspace_time_page = NULL;
+		}
+		break;
+	}
 	case MSR_KVM_SYSTEM_TIME_NEW:
 	case MSR_KVM_SYSTEM_TIME: {
 		kvmclock_reset(vcpu);
@@ -1647,8 +1693,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
 		/* we verify if the enable bit is set... */
-		if (!(data & 1))
+		if (!(data & 1)) {
+			kvmclock_uspace_reset(vcpu);
 			break;
+		}
 
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
@@ -1656,8 +1704,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-		if (is_error_page(vcpu->arch.time_page))
+		if (is_error_page(vcpu->arch.time_page)) {
 			vcpu->arch.time_page = NULL;
+			kvmclock_uspace_reset(vcpu);
+		}
 
 		break;
 	}
@@ -2010,6 +2060,9 @@ int kvm_get_msr_common(struct kvm_vcpu *
 	case MSR_KVM_SYSTEM_TIME_NEW:
 		data = vcpu->arch.time;
 		break;
+	case MSR_KVM_USERSPACE_TIME:
+		data = vcpu->arch.uspace_time;
+		break;
 	case MSR_KVM_ASYNC_PF_EN:
 		data = vcpu->arch.apf.msr_val;
 		break;
@@ -2195,6 +2248,7 @@ int kvm_dev_ioctl_check_extension(long e
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
 	case KVM_CAP_IRQFD_RESAMPLE:
+	case KVM_CAP_USERSPACE_CLOCKSOURCE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -6017,6 +6071,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	kvmclock_uspace_reset(vcpu);
 	kvmclock_reset(vcpu);
 
 	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
Index: vsyscall/arch/x86/kvm/cpuid.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/cpuid.c
+++ vsyscall/arch/x86/kvm/cpuid.c
@@ -411,7 +411,9 @@ static int do_cpuid_ent(struct kvm_cpuid
 			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
 			     (1 << KVM_FEATURE_ASYNC_PF) |
 			     (1 << KVM_FEATURE_PV_EOI) |
-			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+			     (1 << KVM_FEATURE_USERSPACE_CLOCKSOURCE);
+
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
Index: vsyscall/include/uapi/linux/kvm.h
===================================================================
--- vsyscall.orig/include/uapi/linux/kvm.h
+++ vsyscall/include/uapi/linux/kvm.h
@@ -626,6 +626,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_READONLY_MEM 81
 #endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
+#define KVM_CAP_USERSPACE_CLOCKSOURCE 83
 
 #ifdef KVM_CAP_IRQ_ROUTING
 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux