[RFC v2 3/4] KVM: add support for ioregionfd cmds/replies serialization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add ioregionfd context and kvm_io_device_ops->prepare/finish()
in order to serialize all bytes requested by guest.

Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx>
---
 arch/x86/kvm/x86.c       |  19 ++++++++
 include/kvm/iodev.h      |  14 ++++++
 include/linux/kvm_host.h |   4 ++
 virt/kvm/ioregion.c      | 102 +++++++++++++++++++++++++++++++++------
 virt/kvm/kvm_main.c      |  32 ++++++++++++
 5 files changed, 157 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a04516b531da..393fb0f4bf46 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5802,6 +5802,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 	int ret = 0;
 	bool is_apic;
 
+	kvm_io_bus_prepare(vcpu, KVM_MMIO_BUS, addr, len);
+
 	do {
 		n = min(len, 8);
 		is_apic = lapic_in_kernel(vcpu) &&
@@ -5823,8 +5825,10 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 	if (ret == -EINTR) {
 		vcpu->run->exit_reason = KVM_EXIT_INTR;
 		++vcpu->stat.signal_exits;
+		return handled;
 	}
 #endif
+	kvm_io_bus_finish(vcpu, KVM_MMIO_BUS, addr, len);
 
 	return handled;
 }
@@ -5836,6 +5840,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 	int ret = 0;
 	bool is_apic;
 
+	kvm_io_bus_prepare(vcpu, KVM_MMIO_BUS, addr, len);
+
 	do {
 		n = min(len, 8);
 		is_apic = lapic_in_kernel(vcpu) &&
@@ -5858,8 +5864,10 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 	if (ret == -EINTR) {
 		vcpu->run->exit_reason = KVM_EXIT_INTR;
 		++vcpu->stat.signal_exits;
+		return handled;
 	}
 #endif
+	kvm_io_bus_finish(vcpu, KVM_MMIO_BUS, addr, len);
 
 	return handled;
 }
@@ -6442,6 +6450,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
 	int r = 0, i;
 
+	kvm_io_bus_prepare(vcpu, KVM_PIO_BUS,
+			   vcpu->arch.pio.port,
+			   vcpu->arch.pio.size);
+
 	for (i = 0; i < vcpu->arch.pio.count; i++) {
 		if (vcpu->arch.pio.in)
 			r = kvm_io_bus_read(vcpu, KVM_PIO_BUS,
@@ -6458,8 +6470,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 #ifdef CONFIG_KVM_IOREGION
 	if (vcpu->ioregion_interrupted && r == -EINTR) {
 		vcpu->ioregion_ctx.pio = i;
+		return r;
 	}
 #endif
+	kvm_io_bus_finish(vcpu, KVM_PIO_BUS,
+			  vcpu->arch.pio.port,
+			  vcpu->arch.pio.size);
 
 	return r;
 }
@@ -9309,6 +9325,7 @@ static int complete_ioregion_mmio(struct kvm_vcpu *vcpu)
 		vcpu->mmio_cur_fragment++;
 	}
 
+	vcpu->ioregion_ctx.dev->ops->finish(vcpu->ioregion_ctx.dev);
 	vcpu->mmio_needed = 0;
 	if (!vcpu->ioregion_ctx.in) {
 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -9333,6 +9350,7 @@ static int complete_ioregion_pio(struct kvm_vcpu *vcpu)
 		vcpu->ioregion_ctx.val += vcpu->ioregion_ctx.len;
 	}
 
+	vcpu->ioregion_ctx.dev->ops->finish(vcpu->ioregion_ctx.dev);
 	if (vcpu->ioregion_ctx.in)
 		r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -9352,6 +9370,7 @@ static int complete_ioregion_fast_pio(struct kvm_vcpu *vcpu)
 	complete_ioregion_access(vcpu, vcpu->ioregion_ctx.addr,
 				 vcpu->ioregion_ctx.len,
 				 vcpu->ioregion_ctx.val);
+	vcpu->ioregion_ctx.dev->ops->finish(vcpu->ioregion_ctx.dev);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	if (vcpu->ioregion_ctx.in) {
diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h
index d75fc4365746..db8a3c69b7bb 100644
--- a/include/kvm/iodev.h
+++ b/include/kvm/iodev.h
@@ -25,6 +25,8 @@ struct kvm_io_device_ops {
 		     gpa_t addr,
 		     int len,
 		     const void *val);
+	void (*prepare)(struct kvm_io_device *this);
+	void (*finish)(struct kvm_io_device *this);
 	void (*destructor)(struct kvm_io_device *this);
 };
 
@@ -55,6 +57,18 @@ static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu,
 				 : -EOPNOTSUPP;
 }
 
+static inline void kvm_iodevice_prepare(struct kvm_io_device *dev)
+{
+	if (dev->ops->prepare)
+		dev->ops->prepare(dev);
+}
+
+static inline void kvm_iodevice_finish(struct kvm_io_device *dev)
+{
+	if (dev->ops->finish)
+		dev->ops->finish(dev);
+}
+
 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
 {
 	if (dev->ops->destructor)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5cfdecfca6db..f6b9ff4c468d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -194,6 +194,10 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			       struct kvm_io_device *dev);
 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 					 gpa_t addr);
+void kvm_io_bus_prepare(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+			int len);
+void kvm_io_bus_finish(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+		       int len);
 
 #ifdef CONFIG_KVM_ASYNC_PF
 struct kvm_async_pf {
diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
index da38124e1418..3474090ccc8c 100644
--- a/virt/kvm/ioregion.c
+++ b/virt/kvm/ioregion.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/kvm_host.h>
-#include <linux/fs.h>
+#include <linux/wait.h>
 #include <kvm/iodev.h>
 #include "eventfd.h"
 #include <uapi/linux/ioregion.h>
@@ -12,15 +12,23 @@ kvm_ioregionfd_init(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->ioregions_pio);
 }
 
+/* Serializes ioregionfd cmds/replies */
+struct ioregionfd {
+	wait_queue_head_t	  wq;
+	struct file		 *rf;
+	struct kref		  kref;
+	bool			  busy;
+};
+
 struct ioregion {
-	struct list_head     list;
-	u64                  paddr;  /* guest physical address */
-	u64                  size;   /* size in bytes */
-	struct file         *rf;
-	struct file         *wf;
-	u64                  user_data; /* opaque token used by userspace */
-	struct kvm_io_device dev;
-	bool                 posted_writes;
+	struct list_head	  list;
+	u64			  paddr;   /* guest physical address */
+	u64			  size;    /* size in bytes */
+	struct file		 *wf;
+	u64			  user_data; /* opaque token used by userspace */
+	struct kvm_io_device	  dev;
+	bool			  posted_writes;
+	struct ioregionfd	 *ctx;
 };
 
 static inline struct ioregion *
@@ -29,13 +37,22 @@ to_ioregion(struct kvm_io_device *dev)
 	return container_of(dev, struct ioregion, dev);
 }
 
+/* assumes kvm->slots_lock held */
+static void ctx_free(struct kref *kref)
+{
+	struct ioregionfd *ctx = container_of(kref, struct ioregionfd, kref);
+
+	kfree(ctx);
+}
+
 /* assumes kvm->slots_lock held */
 static void
 ioregion_release(struct ioregion *p)
 {
-	fput(p->rf);
+	fput(p->ctx->rf);
 	fput(p->wf);
 	list_del(&p->list);
+	kref_put(&p->ctx->kref, ctx_free);
 	kfree(p);
 }
 
@@ -94,6 +111,28 @@ ioregion_save_ctx(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 	vcpu->ioregion_ctx.in = in;
 }
 
+static void
+ioregion_prepare(struct kvm_io_device *this)
+{
+	struct ioregion *p = to_ioregion(this);
+
+	spin_lock(&p->ctx->wq.lock);
+	wait_event_interruptible_exclusive_locked(p->ctx->wq, !p->ctx->busy);
+	p->ctx->busy = true;
+	spin_unlock(&p->ctx->wq.lock);
+}
+
+static void
+ioregion_finish(struct kvm_io_device *this)
+{
+	struct ioregion *p = to_ioregion(this);
+
+	spin_lock(&p->ctx->wq.lock);
+	p->ctx->busy = false;
+	wake_up_locked(&p->ctx->wq);
+	spin_unlock(&p->ctx->wq.lock);
+}
+
 static int
 ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 	      int len, void *val)
@@ -142,7 +181,7 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 
 get_repl:
 	memset(&buf, 0, sizeof(buf));
-	ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0);
+	ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0);
 	state += (ret == sizeof(buf.resp));
 	if (signal_pending(current)) {
 		ioregion_save_ctx(vcpu, this, 1, addr, len, buf.resp.data, state, val);
@@ -209,7 +248,7 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
 get_repl:
 	if (!p->posted_writes) {
 		memset(&buf, 0, sizeof(buf));
-		ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0);
+		ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0);
 		state += (ret == sizeof(buf.resp));
 		if (signal_pending(current)) {
 			ioregion_save_ctx(vcpu, this, 0, addr, len,
@@ -240,6 +279,8 @@ ioregion_destructor(struct kvm_io_device *this)
 static const struct kvm_io_device_ops ioregion_ops = {
 	.read       = ioregion_read,
 	.write      = ioregion_write,
+	.prepare    = ioregion_prepare,
+	.finish     = ioregion_finish,
 	.destructor = ioregion_destructor,
 };
 
@@ -295,6 +336,34 @@ get_bus_from_flags(__u32 flags)
 	return KVM_MMIO_BUS;
 }
 
+/* assumes kvm->slots_lock held */
+static bool
+ioregion_get_ctx(struct kvm *kvm, struct ioregion *p, struct file *rf, int bus_idx)
+{
+	struct ioregion *_p;
+	struct list_head *ioregions;
+
+	ioregions = get_ioregion_list(kvm, bus_idx);
+	list_for_each_entry(_p, ioregions, list)
+		if (file_inode(_p->ctx->rf)->i_ino == file_inode(rf)->i_ino) {
+			p->ctx = _p->ctx;
+			kref_get(&p->ctx->kref);
+			return true;
+		}
+
+	p->ctx = kzalloc(sizeof(*p->ctx), GFP_KERNEL_ACCOUNT);
+	if (!p->ctx) {
+		kfree(p);
+		return false;
+	}
+	p->ctx->rf = rf;
+	p->ctx->busy = false;
+	init_waitqueue_head(&p->ctx->wq);
+	kref_get(&p->ctx->kref);
+
+	return true;
+}
+
 int
 kvm_set_ioregion(struct kvm *kvm, struct kvm_ioregion *args)
 {
@@ -327,11 +396,10 @@ kvm_set_ioregion(struct kvm *kvm, struct kvm_ioregion *args)
 	}
 
 	INIT_LIST_HEAD(&p->list);
+	p->wf = wfile;
 	p->paddr = args->guest_paddr;
 	p->size = args->memory_size;
 	p->user_data = args->user_data;
-	p->rf = rfile;
-	p->wf = wfile;
 	p->posted_writes = args->flags & KVM_IOREGION_POSTED_WRITES;
 	bus_idx = get_bus_from_flags(args->flags);
 
@@ -341,6 +409,12 @@ kvm_set_ioregion(struct kvm *kvm, struct kvm_ioregion *args)
 		ret = -EEXIST;
 		goto unlock_fail;
 	}
+
+	if (!ioregion_get_ctx(kvm, p, rfile, bus_idx)) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
 	kvm_iodevice_init(&p->dev, &ioregion_ops);
 	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->paddr, p->size,
 				      &p->dev);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index df387857f51f..096504a6cc62 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4308,6 +4308,38 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 	return r < 0 ? r : 0;
 }
 
+void kvm_io_bus_prepare(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len)
+{
+	struct kvm_io_bus *bus;
+	int idx;
+
+	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return;
+
+	idx = kvm_io_bus_get_first_dev(bus, addr, len);
+	if (idx < 0)
+		return;
+
+	kvm_iodevice_prepare(bus->range[idx].dev);
+}
+
+void kvm_io_bus_finish(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len)
+{
+	struct kvm_io_bus *bus;
+	int idx;
+
+	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return;
+
+	idx = kvm_io_bus_get_first_dev(bus, addr, len);
+	if (idx < 0)
+		return;
+
+	kvm_iodevice_finish(bus->range[idx].dev);
+}
+
 /* Caller must hold slots_lock. */
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			    int len, struct kvm_io_device *dev)
-- 
2.25.1




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux