ioeventfd is a mechanism to register PIO/MMIO regions to trigger an eventfd signal when written to by a guest. Host userspace can register any arbitrary IO address with a corresponding eventfd and then pass the eventfd to a specific end-point of interest for handling. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d34e6b175e61821026893ec5298cc8e7558df43a Basic framework code is taken from kvm implementation. Credit goes to kvm irqfd/ioeventfd developers. Signed-off-by: Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx> --- drivers/hv/hv_eventfd.c | 248 +++++++++++++++++++++++++++++++++-- drivers/hv/mshv_main.c | 20 ++- include/linux/mshv.h | 4 + include/linux/mshv_eventfd.h | 21 ++- include/uapi/linux/mshv.h | 26 +++- 5 files changed, 300 insertions(+), 19 deletions(-) diff --git a/drivers/hv/hv_eventfd.c b/drivers/hv/hv_eventfd.c index 11fcafd1df08..5ed77901fb0b 100644 --- a/drivers/hv/hv_eventfd.c +++ b/drivers/hv/hv_eventfd.c @@ -2,8 +2,8 @@ /* * eventfd support for mshv * - * Heavily inspired from KVM implementation of irqfd. The basic framework - * code is taken from the kvm implementation. + * Heavily inspired from KVM implementation of irqfd/ioeventfd. The basic + * framework code is taken from the kvm implementation. * * All credits to kvm developers. */ @@ -210,13 +210,6 @@ mshv_irqfd_assign(struct mshv_partition *partition, return ret; } -void -mshv_irqfd_init(struct mshv_partition *partition) -{ - spin_lock_init(&partition->irqfds.lock); - INIT_LIST_HEAD(&partition->irqfds.items); -} - /* * shutdown any irqfd's that match fd+gsi */ @@ -261,10 +254,10 @@ mshv_irqfd(struct mshv_partition *partition, struct mshv_irqfd *args) } /* - * This function is called as the mshv VM fd is being released. Shutdown all - * irqfds that still remain open + * This function is called as the mshv VM fd is being released. + * Shutdown all irqfds that still remain open */ -void +static void mshv_irqfd_release(struct mshv_partition *partition) { struct mshv_kernel_irqfd *irqfd, *tmp; @@ -297,3 +290,234 @@ void mshv_irqfd_wq_cleanup(void) { destroy_workqueue(irqfd_cleanup_wq); } + +/* + * -------------------------------------------------------------------- + * ioeventfd: translate a MMIO memory write to an eventfd signal. + * + * userspace can register a MMIO address with an eventfd for receiving + * notification when the memory has been touched. + * + * TODO: Implement eventfd for PIO as well. + * -------------------------------------------------------------------- + */ + +static void +ioeventfd_release(struct kernel_mshv_ioeventfd *p, u64 partition_id) +{ + if (p->doorbell_id > 0) + hv_unregister_doorbell(partition_id, p->doorbell_id); + eventfd_ctx_put(p->eventfd); + list_del(&p->list); + kfree(p); +} + +/* MMIO writes trigger an event if the addr/val match */ +static void +ioeventfd_mmio_write(int doorbell_id, void *data) +{ + struct mshv_partition *partition = (struct mshv_partition *)data; + struct kernel_mshv_ioeventfd *p; + unsigned long flags; + + spin_lock_irqsave(&partition->ioeventfds.lock, flags); + list_for_each_entry(p, &partition->ioeventfds.items, list) { + if (p->doorbell_id == doorbell_id) { + eventfd_signal(p->eventfd, 1); + break; + } + } + spin_unlock_irqrestore(&partition->ioeventfds.lock, flags); +} + +static bool +ioeventfd_check_collision(struct mshv_partition *partition, + struct kernel_mshv_ioeventfd *p) +{ + struct kernel_mshv_ioeventfd *_p; + + list_for_each_entry(_p, &partition->ioeventfds.items, list) + if (_p->addr == p->addr && _p->length == p->length && + (_p->wildcard || p->wildcard || + _p->datamatch == p->datamatch)) + return true; + + return false; +} + +static int +mshv_assign_ioeventfd(struct mshv_partition *partition, + struct mshv_ioeventfd *args) +{ + struct kernel_mshv_ioeventfd *p; + struct eventfd_ctx *eventfd; + u64 doorbell_flags = 0; + unsigned long irqflags; + int ret; + + if (args->flags & MSHV_IOEVENTFD_FLAG_PIO) + return -EOPNOTSUPP; + + /* must be natural-word sized */ + switch (args->len) { + case 0: + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY; + break; + case 1: + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE; + break; + case 2: + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD; + break; + case 4: + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD; + break; + case 8: + doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD; + break; + default: + pr_warn("ioeventfd: invalid length specified\n"); + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~MSHV_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + ret = -ENOMEM; + goto fail; + } + + INIT_LIST_HEAD(&p->list); + p->addr = args->addr; + p->length = args->len; + p->eventfd = eventfd; + + /* The datamatch feature is optional, otherwise this is a wildcard */ + if (args->flags & MSHV_IOEVENTFD_FLAG_DATAMATCH) + p->datamatch = args->datamatch; + else { + p->wildcard = true; + doorbell_flags |= HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE; + } + + spin_lock_irqsave(&partition->ioeventfds.lock, irqflags); + + if (ioeventfd_check_collision(partition, p)) { + ret = -EEXIST; + goto unlock_fail; + } + + ret = hv_register_doorbell(partition->id, ioeventfd_mmio_write, + (void *)partition, p->addr, + p->datamatch, doorbell_flags); + if (ret < 0) { + pr_err("Failed to register ioeventfd doorbell!\n"); + goto unlock_fail; + } + + p->doorbell_id = ret; + list_add_tail(&p->list, &partition->ioeventfds.items); + + spin_unlock_irqrestore(&partition->ioeventfds.lock, irqflags); + + return 0; + +unlock_fail: + spin_unlock_irqrestore(&partition->ioeventfds.lock, irqflags); + + kfree(p); + +fail: + eventfd_ctx_put(eventfd); + + return ret; +} + +static int +mshv_deassign_ioeventfd(struct mshv_partition *partition, + struct mshv_ioeventfd *args) +{ + struct kernel_mshv_ioeventfd *p, *tmp; + struct eventfd_ctx *eventfd; + unsigned long flags; + int ret = -ENOENT; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + spin_lock_irqsave(&partition->ioeventfds.lock, flags); + + list_for_each_entry_safe(p, tmp, &partition->ioeventfds.items, list) { + bool wildcard = !(args->flags & MSHV_IOEVENTFD_FLAG_DATAMATCH); + + if (p->eventfd != eventfd || + p->addr != args->addr || + p->length != args->len || + p->wildcard != wildcard) + continue; + + if (!p->wildcard && p->datamatch != args->datamatch) + continue; + + ioeventfd_release(p, partition->id); + ret = 0; + break; + } + + spin_unlock_irqrestore(&partition->ioeventfds.lock, flags); + + eventfd_ctx_put(eventfd); + + return ret; +} + +int +mshv_ioeventfd(struct mshv_partition *partition, + struct mshv_ioeventfd *args) +{ + /* PIO not yet implemented */ + if (args->flags & MSHV_IOEVENTFD_FLAG_PIO) + return -EOPNOTSUPP; + + if (args->flags & MSHV_IOEVENTFD_FLAG_DEASSIGN) + return mshv_deassign_ioeventfd(partition, args); + + return mshv_assign_ioeventfd(partition, args); +} + +void +mshv_eventfd_init(struct mshv_partition *partition) +{ + spin_lock_init(&partition->irqfds.lock); + INIT_LIST_HEAD(&partition->irqfds.items); + + spin_lock_init(&partition->ioeventfds.lock); + INIT_LIST_HEAD(&partition->ioeventfds.items); +} + +void +mshv_eventfd_release(struct mshv_partition *partition) +{ + struct kernel_mshv_ioeventfd *p, *tmp; + unsigned long flags; + + spin_lock_irqsave(&partition->ioeventfds.lock, flags); + list_for_each_entry_safe(p, tmp, &partition->ioeventfds.items, list) { + ioeventfd_release(p, partition->id); + } + spin_unlock_irqrestore(&partition->ioeventfds.lock, flags); + + mshv_irqfd_release(partition); +} diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c index e124119e65eb..e1caecd27f09 100644 --- a/drivers/hv/mshv_main.c +++ b/drivers/hv/mshv_main.c @@ -828,6 +828,18 @@ mshv_partition_ioctl_assert_interrupt(struct mshv_partition *partition, args.control); } +static long +mshv_partition_ioctl_ioeventfd(struct mshv_partition *partition, + void __user *user_args) +{ + struct mshv_ioeventfd args; + + if (copy_from_user(&args, user_args, sizeof(args))) + return -EFAULT; + + return mshv_ioeventfd(partition, &args); +} + static long mshv_partition_ioctl_irqfd(struct mshv_partition *partition, void __user *user_args) @@ -882,6 +894,10 @@ mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) ret = mshv_partition_ioctl_irqfd(partition, (void __user *)arg); break; + case MSHV_IOEVENTFD: + ret = mshv_partition_ioctl_ioeventfd(partition, + (void __user *)arg); + break; default: ret = -ENOTTY; } @@ -972,7 +988,7 @@ mshv_partition_release(struct inode *inode, struct file *filp) { struct mshv_partition *partition = filp->private_data; - mshv_irqfd_release(partition); + mshv_eventfd_release(partition); mshv_partition_put(partition); @@ -1068,7 +1084,7 @@ mshv_ioctl_create_partition(void __user *user_arg) fd_install(fd, file); - mshv_irqfd_init(partition); + mshv_eventfd_init(partition); return fd; diff --git a/include/linux/mshv.h b/include/linux/mshv.h index 5707c457b72d..217c91725828 100644 --- a/include/linux/mshv.h +++ b/include/linux/mshv.h @@ -52,6 +52,10 @@ struct mshv_partition { spinlock_t lock; struct list_head items; } irqfds; + struct { + spinlock_t lock; + struct list_head items; + } ioeventfds; }; struct mshv_lapic_irq { diff --git a/include/linux/mshv_eventfd.h b/include/linux/mshv_eventfd.h index 3e7b16d0717f..fd0012f72616 100644 --- a/include/linux/mshv_eventfd.h +++ b/include/linux/mshv_eventfd.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * - * irqfd: Allows an fd to be used to inject an interrupt to the guest + * irqfd: Allows an fd to be used to inject an interrupt to the guest. + * ioeventfd: Allow an fd to be used to receive a signal from the guest. * All credit goes to kvm developers. */ @@ -11,6 +12,9 @@ #include <linux/mshv.h> #include <linux/poll.h> +void mshv_eventfd_init(struct mshv_partition *partition); +void mshv_eventfd_release(struct mshv_partition *partition); + struct mshv_kernel_irqfd { struct mshv_partition *partition; struct eventfd_ctx *eventfd; @@ -26,10 +30,19 @@ struct mshv_kernel_irqfd { int mshv_irqfd(struct mshv_partition *partition, struct mshv_irqfd *args); -void mshv_irqfd_init(struct mshv_partition *partition); -void mshv_irqfd_release(struct mshv_partition *partition); - int mshv_irqfd_wq_init(void); void mshv_irqfd_wq_cleanup(void); +struct kernel_mshv_ioeventfd { + struct list_head list; + u64 addr; + int length; + struct eventfd_ctx *eventfd; + u64 datamatch; + int doorbell_id; + bool wildcard; +}; + +int mshv_ioeventfd(struct mshv_partition *kvm, struct mshv_ioeventfd *args); + #endif /* __LINUX_MSHV_EVENTFD_H */ diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 792844276134..e32dee679360 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -93,6 +93,29 @@ struct mshv_irqfd { __u8 pad[2]; }; +enum { + mshv_ioeventfd_flag_nr_datamatch, + mshv_ioeventfd_flag_nr_pio, + mshv_ioeventfd_flag_nr_deassign, + mshv_ioeventfd_flag_nr_max, +}; + +#define MSHV_IOEVENTFD_FLAG_DATAMATCH (1 << mshv_ioeventfd_flag_nr_datamatch) +#define MSHV_IOEVENTFD_FLAG_PIO (1 << mshv_ioeventfd_flag_nr_pio) +#define MSHV_IOEVENTFD_FLAG_DEASSIGN (1 << mshv_ioeventfd_flag_nr_deassign) + +#define MSHV_IOEVENTFD_VALID_FLAG_MASK ((1 << mshv_ioeventfd_flag_nr_max) - 1) + +struct mshv_ioeventfd { + __u64 datamatch; + __u64 addr; /* legal pio/mmio address */ + __u32 len; /* 1, 2, 4, or 8 bytes */ + __s32 fd; + __u32 flags; + __u8 pad[4]; +}; + + #define MSHV_IOCTL 0xB8 /* mshv device */ @@ -109,7 +132,8 @@ struct mshv_irqfd { _IOW(MSHV_IOCTL, 0xC, struct mshv_partition_property) #define MSHV_GET_PARTITION_PROPERTY \ _IOWR(MSHV_IOCTL, 0xD, struct mshv_partition_property) -#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0xE, struct mshv_irqfd) +#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0xE, struct mshv_irqfd) +#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0xF, struct mshv_ioeventfd) /* vp device */ #define MSHV_GET_VP_REGISTERS _IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers) -- 2.25.1