This patch is a slightly different take on the ioregionfd mechanism previously described here: https://lore.kernel.org/all/88ca79d2e378dcbfb3988b562ad2c16c4f929ac7.camel@xxxxxxxxx/ The goal of this new mechanism, which we tentatively call shadow ioeventfd in lack of a better name, is to speed up doorbell writes on NVMe controllers emulated outside of the VMM. Currently, a doorbell write to an NVMe SQ tail doorbell requires returning from ioctl(KVM_RUN) and the VMM communicating the event, along with the doorbell value, to the NVMe controller emulation task. With the shadow ioeventfd, the NVMe emulation task is directly notified of the doorbell write and can find the doorbell value in a known location, without the interference of the VMM. To demonstrate the performance benefit of the shadow ioeventfd mechanism, I've implemented a test using the vfio-user protocol for enabling out-of-process device emulation, which can be found here: https://github.com/tmakatos/muser/commit/7adfe45 I've patched QEMU to enable shadow ioeventfd here: https://github.com/tmakatos/qemu-oracle/commit/55f2781 This is based on John Johnson's not-yet-merged vfio-user server patches. In this test, the guest repeatedly writes to two pieces of memory: one accelarated by a shadow ioeventfd and the other not. Writing to the piece of memory accelarated by the shadow ioeventfd is 4 times faster. Signed-off-by: Thanos Makatos <thanos.makatos@xxxxxxxxxxx> --- include/uapi/linux/kvm.h | 5 ++++- tools/include/uapi/linux/kvm.h | 2 ++ virt/kvm/eventfd.c | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eed0315a77a6..0a884ac1cc76 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -804,6 +804,7 @@ enum { kvm_ioeventfd_flag_nr_deassign, kvm_ioeventfd_flag_nr_virtio_ccw_notify, kvm_ioeventfd_flag_nr_fast_mmio, + kvm_ioevetnfd_flag_nr_commit_write, kvm_ioeventfd_flag_nr_max, }; @@ -812,16 +813,18 @@ enum { #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) #define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) +#define KVM_IOEVENTFD_FLAG_COMMIT_WRITE (1 << kvm_ioevetnfd_flag_nr_commit_write) #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) struct kvm_ioeventfd { __u64 datamatch; __u64 addr; /* legal pio/mmio address */ + __u64 vaddr; /* user address to write to if COMMIT_WRITE is set */ __u32 len; /* 1, 2, 4, or 8 bytes; or 0 to ignore length */ __s32 fd; __u32 flags; - __u8 pad[36]; + __u8 pad[28]; }; #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index eed0315a77a6..ee64ff1abccc 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -804,6 +804,7 @@ enum { kvm_ioeventfd_flag_nr_deassign, kvm_ioeventfd_flag_nr_virtio_ccw_notify, kvm_ioeventfd_flag_nr_fast_mmio, + kvm_ioevetnfd_flag_nr_commit_write, kvm_ioeventfd_flag_nr_max, }; @@ -812,6 +813,7 @@ enum { #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) #define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) +#define KVM_IOEVENTFD_FLAG_COMMIT_WRITE (1 << kvm_ioevetnfd_flag_nr_commit_write) #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2a3ed401ce46..c98e7b54fafa 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -682,6 +682,8 @@ struct _ioeventfd { struct kvm_io_device dev; u8 bus_idx; bool wildcard; + bool commit_write; + void *vaddr; }; static inline struct _ioeventfd * @@ -753,6 +755,10 @@ ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, if (!ioeventfd_in_range(p, addr, len, val)) return -EOPNOTSUPP; + if (p->commit_write) { + if (unlikely(copy_to_user(p->vaddr, val, len))) + return -EFAULT; + } eventfd_signal(p->eventfd, 1); return 0; } @@ -832,6 +838,9 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, else p->wildcard = true; + p->commit_write = args->flags & KVM_IOEVENTFD_FLAG_COMMIT_WRITE; + p->vaddr = (void *)args->vaddr; + mutex_lock(&kvm->slots_lock); /* Verify that there isn't a match already */ -- 2.22.3