Add ioregionfd context and kvm_io_device_ops->prepare/finish() in order to serialize all bytes requested by guest. Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx> --- arch/x86/kvm/x86.c | 19 ++++++++ include/kvm/iodev.h | 14 ++++++ include/linux/kvm_host.h | 4 ++ virt/kvm/ioregion.c | 102 +++++++++++++++++++++++++++++++++------ virt/kvm/kvm_main.c | 32 ++++++++++++ 5 files changed, 157 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a04516b531da..393fb0f4bf46 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5802,6 +5802,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, int ret = 0; bool is_apic; + kvm_io_bus_prepare(vcpu, KVM_MMIO_BUS, addr, len); + do { n = min(len, 8); is_apic = lapic_in_kernel(vcpu) && @@ -5823,8 +5825,10 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, if (ret == -EINTR) { vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.signal_exits; + return handled; } #endif + kvm_io_bus_finish(vcpu, KVM_MMIO_BUS, addr, len); return handled; } @@ -5836,6 +5840,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) int ret = 0; bool is_apic; + kvm_io_bus_prepare(vcpu, KVM_MMIO_BUS, addr, len); + do { n = min(len, 8); is_apic = lapic_in_kernel(vcpu) && @@ -5858,8 +5864,10 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) if (ret == -EINTR) { vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.signal_exits; + return handled; } #endif + kvm_io_bus_finish(vcpu, KVM_MMIO_BUS, addr, len); return handled; } @@ -6442,6 +6450,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) { int r = 0, i; + kvm_io_bus_prepare(vcpu, KVM_PIO_BUS, + vcpu->arch.pio.port, + vcpu->arch.pio.size); + for (i = 0; i < vcpu->arch.pio.count; i++) { if (vcpu->arch.pio.in) r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, @@ -6458,8 +6470,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) #ifdef CONFIG_KVM_IOREGION if (vcpu->ioregion_interrupted && r == -EINTR) { vcpu->ioregion_ctx.pio = i; + return r; } #endif + kvm_io_bus_finish(vcpu, KVM_PIO_BUS, + vcpu->arch.pio.port, + vcpu->arch.pio.size); return r; } @@ -9309,6 +9325,7 @@ static int complete_ioregion_mmio(struct kvm_vcpu *vcpu) vcpu->mmio_cur_fragment++; } + vcpu->ioregion_ctx.dev->ops->finish(vcpu->ioregion_ctx.dev); vcpu->mmio_needed = 0; if (!vcpu->ioregion_ctx.in) { srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -9333,6 +9350,7 @@ static int complete_ioregion_pio(struct kvm_vcpu *vcpu) vcpu->ioregion_ctx.val += vcpu->ioregion_ctx.len; } + vcpu->ioregion_ctx.dev->ops->finish(vcpu->ioregion_ctx.dev); if (vcpu->ioregion_ctx.in) r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -9352,6 +9370,7 @@ static int complete_ioregion_fast_pio(struct kvm_vcpu *vcpu) complete_ioregion_access(vcpu, vcpu->ioregion_ctx.addr, vcpu->ioregion_ctx.len, vcpu->ioregion_ctx.val); + vcpu->ioregion_ctx.dev->ops->finish(vcpu->ioregion_ctx.dev); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (vcpu->ioregion_ctx.in) { diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h index d75fc4365746..db8a3c69b7bb 100644 --- a/include/kvm/iodev.h +++ b/include/kvm/iodev.h @@ -25,6 +25,8 @@ struct kvm_io_device_ops { gpa_t addr, int len, const void *val); + void (*prepare)(struct kvm_io_device *this); + void (*finish)(struct kvm_io_device *this); void (*destructor)(struct kvm_io_device *this); }; @@ -55,6 +57,18 @@ static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, : -EOPNOTSUPP; } +static inline void kvm_iodevice_prepare(struct kvm_io_device *dev) +{ + if (dev->ops->prepare) + dev->ops->prepare(dev); +} + +static inline void kvm_iodevice_finish(struct kvm_io_device *dev) +{ + if (dev->ops->finish) + dev->ops->finish(dev); +} + static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) { if (dev->ops->destructor) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5cfdecfca6db..f6b9ff4c468d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -194,6 +194,10 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev); struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr); +void kvm_io_bus_prepare(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + int len); +void kvm_io_bus_finish(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + int len); #ifdef CONFIG_KVM_ASYNC_PF struct kvm_async_pf { diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c index da38124e1418..3474090ccc8c 100644 --- a/virt/kvm/ioregion.c +++ b/virt/kvm/ioregion.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/kvm_host.h> -#include <linux/fs.h> +#include <linux/wait.h> #include <kvm/iodev.h> #include "eventfd.h" #include <uapi/linux/ioregion.h> @@ -12,15 +12,23 @@ kvm_ioregionfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioregions_pio); } +/* Serializes ioregionfd cmds/replies */ +struct ioregionfd { + wait_queue_head_t wq; + struct file *rf; + struct kref kref; + bool busy; +}; + struct ioregion { - struct list_head list; - u64 paddr; /* guest physical address */ - u64 size; /* size in bytes */ - struct file *rf; - struct file *wf; - u64 user_data; /* opaque token used by userspace */ - struct kvm_io_device dev; - bool posted_writes; + struct list_head list; + u64 paddr; /* guest physical address */ + u64 size; /* size in bytes */ + struct file *wf; + u64 user_data; /* opaque token used by userspace */ + struct kvm_io_device dev; + bool posted_writes; + struct ioregionfd *ctx; }; static inline struct ioregion * @@ -29,13 +37,22 @@ to_ioregion(struct kvm_io_device *dev) return container_of(dev, struct ioregion, dev); } +/* assumes kvm->slots_lock held */ +static void ctx_free(struct kref *kref) +{ + struct ioregionfd *ctx = container_of(kref, struct ioregionfd, kref); + + kfree(ctx); +} + /* assumes kvm->slots_lock held */ static void ioregion_release(struct ioregion *p) { - fput(p->rf); + fput(p->ctx->rf); fput(p->wf); list_del(&p->list); + kref_put(&p->ctx->kref, ctx_free); kfree(p); } @@ -94,6 +111,28 @@ ioregion_save_ctx(struct kvm_vcpu *vcpu, struct kvm_io_device *this, vcpu->ioregion_ctx.in = in; } +static void +ioregion_prepare(struct kvm_io_device *this) +{ + struct ioregion *p = to_ioregion(this); + + spin_lock(&p->ctx->wq.lock); + wait_event_interruptible_exclusive_locked(p->ctx->wq, !p->ctx->busy); + p->ctx->busy = true; + spin_unlock(&p->ctx->wq.lock); +} + +static void +ioregion_finish(struct kvm_io_device *this) +{ + struct ioregion *p = to_ioregion(this); + + spin_lock(&p->ctx->wq.lock); + p->ctx->busy = false; + wake_up_locked(&p->ctx->wq); + spin_unlock(&p->ctx->wq.lock); +} + static int ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, void *val) @@ -142,7 +181,7 @@ ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, get_repl: memset(&buf, 0, sizeof(buf)); - ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0); + ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0); state += (ret == sizeof(buf.resp)); if (signal_pending(current)) { ioregion_save_ctx(vcpu, this, 1, addr, len, buf.resp.data, state, val); @@ -209,7 +248,7 @@ ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, get_repl: if (!p->posted_writes) { memset(&buf, 0, sizeof(buf)); - ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0); + ret = kernel_read(p->ctx->rf, &buf.resp, sizeof(buf.resp), 0); state += (ret == sizeof(buf.resp)); if (signal_pending(current)) { ioregion_save_ctx(vcpu, this, 0, addr, len, @@ -240,6 +279,8 @@ ioregion_destructor(struct kvm_io_device *this) static const struct kvm_io_device_ops ioregion_ops = { .read = ioregion_read, .write = ioregion_write, + .prepare = ioregion_prepare, + .finish = ioregion_finish, .destructor = ioregion_destructor, }; @@ -295,6 +336,34 @@ get_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } +/* assumes kvm->slots_lock held */ +static bool +ioregion_get_ctx(struct kvm *kvm, struct ioregion *p, struct file *rf, int bus_idx) +{ + struct ioregion *_p; + struct list_head *ioregions; + + ioregions = get_ioregion_list(kvm, bus_idx); + list_for_each_entry(_p, ioregions, list) + if (file_inode(_p->ctx->rf)->i_ino == file_inode(rf)->i_ino) { + p->ctx = _p->ctx; + kref_get(&p->ctx->kref); + return true; + } + + p->ctx = kzalloc(sizeof(*p->ctx), GFP_KERNEL_ACCOUNT); + if (!p->ctx) { + kfree(p); + return false; + } + p->ctx->rf = rf; + p->ctx->busy = false; + init_waitqueue_head(&p->ctx->wq); + kref_get(&p->ctx->kref); + + return true; +} + int kvm_set_ioregion(struct kvm *kvm, struct kvm_ioregion *args) { @@ -327,11 +396,10 @@ kvm_set_ioregion(struct kvm *kvm, struct kvm_ioregion *args) } INIT_LIST_HEAD(&p->list); + p->wf = wfile; p->paddr = args->guest_paddr; p->size = args->memory_size; p->user_data = args->user_data; - p->rf = rfile; - p->wf = wfile; p->posted_writes = args->flags & KVM_IOREGION_POSTED_WRITES; bus_idx = get_bus_from_flags(args->flags); @@ -341,6 +409,12 @@ kvm_set_ioregion(struct kvm *kvm, struct kvm_ioregion *args) ret = -EEXIST; goto unlock_fail; } + + if (!ioregion_get_ctx(kvm, p, rfile, bus_idx)) { + ret = -ENOMEM; + goto fail; + } + kvm_iodevice_init(&p->dev, &ioregion_ops); ret = kvm_io_bus_register_dev(kvm, bus_idx, p->paddr, p->size, &p->dev); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index df387857f51f..096504a6cc62 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4308,6 +4308,38 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, return r < 0 ? r : 0; } +void kvm_io_bus_prepare(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len) +{ + struct kvm_io_bus *bus; + int idx; + + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return; + + idx = kvm_io_bus_get_first_dev(bus, addr, len); + if (idx < 0) + return; + + kvm_iodevice_prepare(bus->range[idx].dev); +} + +void kvm_io_bus_finish(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len) +{ + struct kvm_io_bus *bus; + int idx; + + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return; + + idx = kvm_io_bus_get_first_dev(bus, addr, len); + if (idx < 0) + return; + + kvm_iodevice_finish(bus->range[idx].dev); +} + /* Caller must hold slots_lock. */ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev) -- 2.25.1