Re: [PATCH 03/16] KVM: x86: set gfn-to-pfn cache length consistently with VM word size

David Woodhouse <dwmw2@xxxxxxxxxxxxx> · Mon, 14 Nov 2022 16:16:25 -0800

On Mon, 2022-11-14 at 11:27 +0000, Durrant, Paul wrote:
> > On Sun, 2022-11-13 at 13:32 +0000, David Woodhouse wrote:
> > > For the runstate area, I think we can live with using a gfn_to_hva
> > > cache instead, and writing via the userspace address (with appropriate
> > > atomicity for the RUNSTATE_runnable case as we have at the moment
> > > gating the refresh).
> > 
> > Which mostly involves just reverting commit a795cd43c5b5 I think?
> > 
> > IIRC the reason for that commit was mostly consistency with other
> > things that really *did* want to be switched to gpc.
> 
> A straight reversion would re-introduce the page-spanning check in
> kvm_xen_vcpu_set_attr() but I think we can just leave that out.

That check matches the behaviour of kvm_gfn_to_hva_cache_init() which
deliberately clears ghc->memslot if it crosses a page. (Although I
don't see why; it only really needs to do that if it crosses over onto
a second *memslot*, surely?)

So we'd then hit this in kvm_xen_update_runstate_guest():

       /* We made sure it fits in a single page */
       BUG_ON(!ghc->memslot);

We'd need a fallback code path for the page-crossing (or memslot-
crossing) case, and the natural way to do that is to just use
kvm_write_guest(). In the RUNSTATE_runnable case where we can't sleep,
can we get away with using pagefault_disable() around a call to
kvm_write_guest()? 

The worst case here is kind of awful; it's a compat guest with the
first half of its ->state_entry_time being on the first page, and the
second half of it being on the second page.

I'm playing with using a second GPC for the overrun onto the second
page. Debating if it's already too ugly to live before I even fix up
the actual copying part...

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 81114a376c4e..3fc08f416aa3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -647,6 +647,7 @@ struct kvm_vcpu_xen {
 	struct gfn_to_pfn_cache vcpu_info_cache;
 	struct gfn_to_pfn_cache vcpu_time_info_cache;
 	struct gfn_to_pfn_cache runstate_cache;
+	struct gfn_to_pfn_cache runstate2_cache;
 	u64 last_steal;
 	u64 runstate_entry_time;
 	u64 runstate_times[4];
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 4b8e9628fbf5..e297569bbc8d 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -201,35 +201,59 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
 void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
 {
 	struct kvm_vcpu_xen *vx = &v->arch.xen;
-	struct gfn_to_pfn_cache *gpc = &vx->runstate_cache;
+	struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache;
+	struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache;
 	uint64_t *user_times;
 	unsigned long flags;
-	size_t user_len;
+	size_t user_len1, user_len2 = 0;
 	int *user_state;
 
 	kvm_xen_update_runstate(v, state);
 
-	if (!vx->runstate_cache.active)
+	if (!gpc1->active)
 		return;
 
 	if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
-		user_len = sizeof(struct vcpu_runstate_info);
+		user_len1 = sizeof(struct vcpu_runstate_info);
 	else
-		user_len = sizeof(struct compat_vcpu_runstate_info);
+		user_len1 = sizeof(struct compat_vcpu_runstate_info);
 
-	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
-					   user_len)) {
-		read_unlock_irqrestore(&gpc->lock, flags);
+	if ((gpc1->gpa & ~PAGE_MASK) + user_len1 >= PAGE_SIZE) {
+		user_len2 = user_len1;
+		user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK);
+		user_len2 -= user_len1;
+	}
+
+ retry:
+	read_lock_irqsave(&gpc1->lock, flags);
+	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa,
+					   user_len1)) {
+		read_unlock_irqrestore(&gpc1->lock, flags);
 
 		/* When invoked from kvm_sched_out() we cannot sleep */
 		if (state == RUNSTATE_runnable)
 			return;
 
-		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len))
+		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1))
 			return;
 
-		read_lock_irqsave(&gpc->lock, flags);
+		read_lock_irqsave(&gpc1->lock, flags);
+	}
+	if (user_len2) {
+		read_lock(&gpc2->lock);
+		if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) {
+			read_unlock(&gpc2->lock);
+			read_unlock_irqrestore(&gpc1->lock, flags);
+
+			if (state == RUNSTATE_runnable)
+				return;
+
+			if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc2,
+							 gpc2->gpa, user_len2))
+				return;
+
+			goto retry;
+		}
 	}
 
 	/*
@@ -584,23 +608,52 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 		break;
 
-	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: {
+		size_t sz;
+
 		if (!sched_info_on()) {
 			r = -EOPNOTSUPP;
 			break;
 		}
 		if (data->u.gpa == GPA_INVALID) {
+			r = 0;
+		deactivate_out:
 			kvm_gpc_deactivate(vcpu->kvm,
 					   &vcpu->arch.xen.runstate_cache);
-			r = 0;
+		deactivate2_out:
+			kvm_gpc_deactivate(vcpu->kvm,
+					   &vcpu->arch.xen.runstate2_cache);
 			break;
 		}
 
-		r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
-				     NULL, KVM_HOST_USES_PFN, data->u.gpa,
-				     sizeof(struct vcpu_runstate_info));
+		if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode)
+			sz = sizeof(struct vcpu_runstate_info);
+		else
+			sz = sizeof(struct compat_vcpu_runstate_info);
+
+		/* Handle structures which cross a page boundary by using two GPCs */
+		if ((data->u.gpa & ~PAGE_MASK) + sz <= PAGE_SIZE) {
+			r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
+					     NULL, KVM_HOST_USES_PFN, data->u.gpa,
+					     sizeof(struct vcpu_runstate_info));
+			goto deactivate2_out;
+		} else {
+			/* Map the end of the first page... */
+			r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
+					     NULL, KVM_HOST_USES_PFN, data->u.gpa,
+					     PAGE_SIZE - (data->u.gpa & ~PAGE_MASK));
+			if (r)
+				goto deactivate2_out;
+			/* ... and the start of the second. */
+			sz -= PAGE_SIZE - (data->u.gpa & ~PAGE_MASK);
+			r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache,
+					     NULL, KVM_HOST_USES_PFN,
+					     (data->u.gpa + PAGE_SIZE) & PAGE_MASK, sz);
+			if (r)
+				goto deactivate_out;
+		}
 		break;
-
+	}
 	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
 		if (!sched_info_on()) {
 			r = -EOPNOTSUPP;
@@ -1834,6 +1887,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
 	timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
 
 	kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
+	kvm_gpc_init(&vcpu->arch.xen.runstate2_cache);
 	kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
 	kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
 }
@@ -1844,6 +1898,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 		kvm_xen_stop_timer(vcpu);
 
 	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
+	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache);
 	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
 	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
 
Attachment:
smime.p7s

Description: S/MIME cryptographic signature