[PATCH RFC 15/39] KVM: x86/xen: handle PV spinlocks slowpath

Joao Martins <joao.m.martins@xxxxxxxxxx> · Wed, 20 Feb 2019 20:15:45 +0000

From: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>

Add support for SCHEDOP_poll hypercall.

This implementation is optimized for polling for a single channel, which
is what Linux does. Polling for multiple channels is not especially
efficient (and has not been tested).

PV spinlocks slow path uses this hypercall, and explicitly crash if it's
not supported.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |   3 ++
 arch/x86/kvm/xen.c              | 108 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7fcc81dbb688..c629fedb2e21 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -554,6 +554,8 @@ struct kvm_vcpu_xen {
 	unsigned int virq_to_port[KVM_XEN_NR_VIRQS];
 	struct hrtimer timer;
 	atomic_t timer_pending;
+	wait_queue_head_t sched_waitq;
+	int poll_evtchn;
 };
 
 struct kvm_vcpu_arch {
@@ -865,6 +867,7 @@ struct kvm_xen {
 	struct shared_info *shinfo;
 
 	struct idr port_to_evt;
+	unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 	struct mutex xen_lock;
 };
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 753a6d2c11cd..07066402737d 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -563,6 +563,16 @@ static int kvm_xen_evtchn_set_pending(struct kvm_vcpu *svcpu,
 					     evfd->port);
 }
 
+static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
+{
+	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+
+	if ((vcpu_xen->poll_evtchn == port ||
+	     vcpu_xen->poll_evtchn == -1) &&
+	    test_and_clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.xen.poll_mask))
+		wake_up(&vcpu_xen->sched_waitq);
+}
+
 static int kvm_xen_evtchn_send(struct kvm_vcpu *vcpu, int port)
 {
 	struct eventfd_ctx *eventfd;
@@ -581,6 +591,8 @@ static int kvm_xen_evtchn_send(struct kvm_vcpu *vcpu, int port)
 			eventfd_signal(eventfd, 1);
 	}
 
+	kvm_xen_check_poller(kvm_get_vcpu(vcpu->kvm, evtchnfd->vcpu), port);
+
 	return 0;
 }
 
@@ -669,6 +681,94 @@ static int kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout)
 	return 0;
 }
 
+static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
+			       evtchn_port_t *ports)
+{
+	int i;
+	struct shared_info *shared_info =
+		(struct shared_info *)vcpu->kvm->arch.xen.shinfo;
+
+	for (i = 0; i < nr_ports; i++)
+		if (test_bit(ports[i],
+			     (unsigned long *)shared_info->evtchn_pending))
+			return true;
+
+	return false;
+}
+
+static int kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	struct kvm_vcpu_xen *vcpu_xen = vcpu_to_xen_vcpu(vcpu);
+	int idx, i;
+	struct sched_poll sched_poll;
+	evtchn_port_t port, *ports;
+	struct shared_info *shared_info;
+	struct evtchnfd *evtchnfd;
+	int ret = 0;
+
+	if (kvm_vcpu_read_guest(vcpu, gpa,
+				&sched_poll, sizeof(sched_poll)))
+		return -EFAULT;
+
+	shared_info = (struct shared_info *)vcpu->kvm->arch.xen.shinfo;
+
+	if (unlikely(sched_poll.nr_ports > 1)) {
+		/* Xen (unofficially) limits number of pollers to 128 */
+		if (sched_poll.nr_ports > 128)
+			return -EINVAL;
+
+		ports = kmalloc_array(sched_poll.nr_ports,
+				      sizeof(*ports), GFP_KERNEL);
+		if (!ports)
+			return -ENOMEM;
+	} else
+		ports = &port;
+
+	set_bit(vcpu->vcpu_id, vcpu->kvm->arch.xen.poll_mask);
+
+	for (i = 0; i < sched_poll.nr_ports; i++) {
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		gpa = kvm_mmu_gva_to_gpa_system(vcpu,
+						(gva_t)(sched_poll.ports + i),
+						NULL);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+		if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
+						&ports[i], sizeof(port))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		evtchnfd = idr_find(&vcpu->kvm->arch.xen.port_to_evt,
+				    ports[i]);
+		if (!evtchnfd) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (sched_poll.nr_ports == 1)
+		vcpu_xen->poll_evtchn = port;
+	else
+		vcpu_xen->poll_evtchn = -1;
+
+	if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports))
+		wait_event_interruptible_timeout(
+			 vcpu_xen->sched_waitq,
+			 wait_pending_event(vcpu, sched_poll.nr_ports, ports),
+			 sched_poll.timeout ?: KTIME_MAX);
+
+	vcpu_xen->poll_evtchn = 0;
+
+out:
+	/* Really, this is only needed in case of timeout */
+	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.xen.poll_mask);
+
+	if (unlikely(sched_poll.nr_ports > 1))
+		kfree(ports);
+	return ret;
+}
+
 static int kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, int cmd, u64 param)
 {
 	int ret = -ENOSYS;
@@ -687,6 +787,9 @@ static int kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, int cmd, u64 param)
 		kvm_vcpu_on_spin(vcpu, true);
 		ret = 0;
 		break;
+	case SCHEDOP_poll:
+		ret = kvm_xen_schedop_poll(vcpu, gpa);
+		break;
 	default:
 		break;
 	}
@@ -744,6 +847,9 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 		r = kvm_xen_hcall_sched_op(vcpu, params[0], params[1]);
 		if (!r)
 			goto hcall_success;
+		else if (params[0] == SCHEDOP_poll)
+			/* SCHEDOP_poll should be handled in kernel */
+			return r;
 		break;
 	/* fallthrough */
 	default:
@@ -770,6 +876,8 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 
 void kvm_xen_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	init_waitqueue_head(&vcpu->arch.xen.sched_waitq);
+	vcpu->arch.xen.poll_evtchn = 0;
 }
 
 void kvm_xen_vcpu_uninit(struct kvm_vcpu *vcpu)
-- 
2.11.0