On Thu, 2015-03-19 at 15:55 +0100, Eric Auger wrote: > This patch introduces a new KVM_DEV_VFIO_DEVICE group. > > This is a new control channel which enables KVM to cooperate with > viable VFIO devices. > > The patch introduces 2 attributes for this group: > KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. > Their purpose is to turn a VFIO device IRQ into a forwarded IRQ and > respectively unset the feature. > > The generic part introduced here interact with VFIO, genirq, KVM while > the architecture specific part mostly takes care of the virtual interrupt > controller programming. > > Architecture specific implementation is enabled when > __KVM_HAVE_ARCH_KVM_VFIO_FORWARD is set. When not set those > functions are void. > > Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx> > > --- > > v3 -> v4: > - use new kvm_vfio_dev_irq struct > - improve error handling according to Alex comments > - full rework or generic/arch specific split to accomodate for > unforward that never fails > - kvm_vfio_get_vfio_device and kvm_vfio_put_vfio_device removed from > that patch file and introduced before (since also used by Feng) > - guard kvm_vfio_control_irq_forward call with > __KVM_HAVE_ARCH_KVM_VFIO_FORWARD > > v2 -> v3: > - add API comments in kvm_host.h > - improve the commit message > - create a private kvm_vfio_fwd_irq struct > - fwd_irq_action replaced by a bool and removal of VFIO_IRQ_CLEANUP. This > latter action will be handled in vgic. > - add a vfio_device handle argument to kvm_arch_set_fwd_state. The goal is > to move platform specific stuff in architecture specific code. > - kvm_arch_set_fwd_state renamed into kvm_arch_vfio_set_forward > - increment the ref counter each time we do an IRQ forwarding and decrement > this latter each time one IRQ forward is unset. Simplifies the whole > ref counting. > - simplification of list handling: create, search, removal > > v1 -> v2: > - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD > - original patch file separated into 2 parts: generic part moved in vfio.c > and ARM specific part(kvm_arch_set_fwd_state) > --- > include/linux/kvm_host.h | 47 +++++++ > virt/kvm/vfio.c | 311 ++++++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 355 insertions(+), 3 deletions(-) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index f4e1829..8f17f87 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -1042,6 +1042,15 @@ struct kvm_device_ops { > unsigned long arg); > }; > > +/* internal self-contained structure describing a forwarded IRQ */ > +struct kvm_fwd_irq { > + struct kvm *kvm; /* VM to inject the GSI into */ > + struct vfio_device *vdev; /* vfio device the IRQ belongs to */ > + __u32 index; /* VFIO device IRQ index */ > + __u32 subindex; /* VFIO device IRQ subindex */ > + __u32 gsi; /* gsi, ie. virtual IRQ number */ > +}; > + > void kvm_device_get(struct kvm_device *dev); > void kvm_device_put(struct kvm_device *dev); > struct kvm_device *kvm_device_from_filp(struct file *filp); > @@ -1065,6 +1074,44 @@ inline void kvm_arch_resume_guest(struct kvm *kvm) {} > > #endif > > +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD > +/** > + * kvm_arch_set_forward - Sets forwarding for a given IRQ > + * > + * @kvm: handle to the VM > + * @host_irq: physical IRQ number > + * @guest_irq: virtual IRQ number > + * returns 0 on success, < 0 on failure > + */ > +int kvm_arch_set_forward(struct kvm *kvm, > + unsigned int host_irq, unsigned int guest_irq); > + > +/** > + * kvm_arch_unset_forward - Unsets forwarding for a given IRQ > + * > + * @kvm: handle to the VM > + * @host_irq: physical IRQ number > + * @guest_irq: virtual IRQ number > + * @active: returns whether the IRQ is active > + */ > +void kvm_arch_unset_forward(struct kvm *kvm, > + unsigned int host_irq, > + unsigned int guest_irq, > + bool *active); > + > +#else > +static inline int kvm_arch_set_forward(struct kvm *kvm, > + unsigned int host_irq, > + unsigned int guest_irq) > +{ > + return -ENOENT; > +} > +static inline void kvm_arch_unset_forward(struct kvm *kvm, > + unsigned int host_irq, > + unsigned int guest_irq, > + bool *active) {} > +#endif > + > #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT > > static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) > diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c > index c995e51..4847597 100644 > --- a/virt/kvm/vfio.c > +++ b/virt/kvm/vfio.c > @@ -19,14 +19,30 @@ > #include <linux/uaccess.h> > #include <linux/vfio.h> > #include "vfio.h" > +#include <linux/platform_device.h> > +#include <linux/irq.h> > +#include <linux/spinlock.h> > +#include <linux/interrupt.h> > +#include <linux/delay.h> > + > +#define DEBUG_FORWARD > +#define DEBUG_UNFORWARD These shouldn't be here > > struct kvm_vfio_group { > struct list_head node; > struct vfio_group *vfio_group; > }; > > +/* private linkable kvm_fwd_irq struct */ > +struct kvm_vfio_fwd_irq_node { > + struct list_head link; > + struct kvm_fwd_irq fwd_irq; > +}; > + > struct kvm_vfio { > struct list_head group_list; > + /* list of registered VFIO forwarded IRQs */ > + struct list_head fwd_node_list; > struct mutex lock; > bool noncoherent; > }; > @@ -320,12 +336,293 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) > return -ENXIO; > } > > +/** > + * kvm_vfio_find_fwd_irq - checks whether a forwarded IRQ already is > + * registered in the list of forwarded IRQs > + * > + * @kv: handle to the kvm-vfio device > + * @fwd: handle to the forwarded irq struct > + * In the positive returns the handle to its node in the kvm-vfio > + * forwarded IRQ list, returns NULL otherwise. > + * Must be called with kv->lock hold. > + */ > +static struct kvm_vfio_fwd_irq_node *kvm_vfio_find_fwd_irq( > + struct kvm_vfio *kv, > + struct kvm_fwd_irq *fwd) > +{ > + struct kvm_vfio_fwd_irq_node *node; > + > + list_for_each_entry(node, &kv->fwd_node_list, link) { > + if ((node->fwd_irq.index == fwd->index) && > + (node->fwd_irq.subindex == fwd->subindex) && > + (node->fwd_irq.vdev == fwd->vdev)) > + return node; > + } > + return NULL; > +} > +/** > + * kvm_vfio_register_fwd_irq - Allocates, populates and registers a > + * forwarded IRQ > + * > + * @kv: handle to the kvm-vfio device > + * @fwd: handle to the forwarded irq struct > + * In case of success returns a handle to the new list node, > + * NULL otherwise. > + * Must be called with kv->lock hold. > + */ > +static struct kvm_vfio_fwd_irq_node *kvm_vfio_register_fwd_irq( > + struct kvm_vfio *kv, > + struct kvm_fwd_irq *fwd) > +{ > + struct kvm_vfio_fwd_irq_node *node; > + > + node = kmalloc(sizeof(*node), GFP_KERNEL); > + if (!node) > + return ERR_PTR(-ENOMEM); > + > + node->fwd_irq = *fwd; > + > + list_add(&node->link, &kv->fwd_node_list); > + > + return node; > +} > + > +/** > + * kvm_vfio_unregister_fwd_irq - unregisters and frees a forwarded IRQ > + * > + * @node: handle to the node struct > + * Must be called with kv->lock hold. > + */ > +static void kvm_vfio_unregister_fwd_irq(struct kvm_vfio_fwd_irq_node *node) > +{ > + list_del(&node->link); > + kvm_vfio_put_vfio_device(node->fwd_irq.vdev); > + kfree(node); > +} > + > +static int kvm_vfio_platform_get_irq(struct vfio_device *vdev, int index) > +{ > + int hwirq; > + struct platform_device *platdev; > + struct device *dev = kvm_vfio_external_base_device(vdev); > + > + if (dev->bus == &platform_bus_type) { > + platdev = to_platform_device(dev); > + hwirq = platform_get_irq(platdev, index); > + return hwirq; > + } else > + return -EINVAL; > +} This seems like it was intended to be bus_type agnostic, but it's got platform in the name. > + > +/** > + * kvm_vfio_set_forward - turns a VFIO device IRQ into a forwarded IRQ > + * @kv: handle to the kvm-vfio device > + * @fd: file descriptor of the vfio device the IRQ belongs to > + * @fwd: handle to the forwarded irq struct > + * > + * Registers and turns an IRQ as forwarded. The operation only is allowed > + * if the IRQ is not in progress (active at interrupt controller level or > + * auto-masked by the handler). In case the user-space masked the IRQ, > + * the operation will fail too. > + * returns: > + * -EAGAIN if the IRQ is in progress or VFIO masked > + * -EEXIST if the IRQ is already registered as forwarded > + * -ENOMEM on memory shortage > + */ > +static int kvm_vfio_set_forward(struct kvm_vfio *kv, int fd, > + struct kvm_fwd_irq *fwd) > +{ > + int ret = -EAGAIN; > + struct kvm_vfio_fwd_irq_node *node; > + struct vfio_platform_device *vpdev = vfio_device_data(fwd->vdev); > + int index = fwd->index; > + int host_irq = kvm_vfio_platform_get_irq(fwd->vdev, fwd->index); > + bool active; > + > + kvm_arch_halt_guest(fwd->kvm); > + > + disable_irq(host_irq); > + > + active = kvm_vfio_external_is_active(vpdev, index); > + > + if (active) > + goto out; > + > + node = kvm_vfio_register_fwd_irq(kv, fwd); > + if (IS_ERR(node)) { > + ret = PTR_ERR(node); > + goto out; > + } > + > + kvm_vfio_external_set_automasked(vpdev, index, false); > + > + ret = kvm_arch_set_forward(fwd->kvm, host_irq, fwd->gsi); > + > +out: > + enable_irq(host_irq); > + > + kvm_arch_resume_guest(fwd->kvm); > + > + return ret; If only we were passing around a vfio_device instead of a vfio_platform_device and we had abstraction in place for the kvm_vfio_external_foo callbacks... > +} > + > +/** > + * kvm_vfio_unset_forward - Sets a VFIO device IRQ as non forwarded > + * @kv: handle to the kvm-vfio device > + * @node: handle to the node to unset > + * > + * Calls the architecture specific implementation of set_forward and > + * unregisters the IRQ from the forwarded IRQ list. > + */ > +static void kvm_vfio_unset_forward(struct kvm_vfio *kv, > + struct kvm_vfio_fwd_irq_node *node) > +{ > + struct kvm_fwd_irq fwd = node->fwd_irq; > + int host_irq = kvm_vfio_platform_get_irq(fwd.vdev, fwd.index); > + int index = fwd.index; > + struct vfio_platform_device *vpdev = vfio_device_data(fwd.vdev); > + bool active = false; > + > + kvm_arch_halt_guest(fwd.kvm); > + > + disable_irq(host_irq); > + > + kvm_arch_unset_forward(fwd.kvm, host_irq, fwd.gsi, &active); > + > + if (active) > + kvm_vfio_external_mask(vpdev, index); > + > + kvm_vfio_external_set_automasked(vpdev, index, true); > + enable_irq(host_irq); > + > + kvm_arch_resume_guest(fwd.kvm); > + > + kvm_vfio_unregister_fwd_irq(node); Same > +} > + > +static int kvm_vfio_control_irq_forward(struct kvm_device *kdev, long attr, > + int32_t __user *argp) > +{ > + struct kvm_vfio_dev_irq user_fwd_irq; > + struct kvm_fwd_irq fwd; > + struct vfio_device *vdev; > + struct kvm_vfio *kv = kdev->private; > + struct kvm_vfio_fwd_irq_node *node; > + unsigned long minsz; > + int ret = 0; > + u32 *gsi = NULL; > + size_t size; > + > + minsz = offsetofend(struct kvm_vfio_dev_irq, count); > + > + if (copy_from_user(&user_fwd_irq, argp, minsz)) > + return -EFAULT; > + > + if (user_fwd_irq.argsz < minsz) > + return -EINVAL; > + > + vdev = kvm_vfio_get_vfio_device(user_fwd_irq.fd); > + if (IS_ERR(vdev)) > + return PTR_ERR(vdev); > + > + ret = kvm_vfio_platform_get_irq(vdev, user_fwd_irq.index); > + if (ret < 0) > + goto error; > + > + size = sizeof(__u32); > + if (user_fwd_irq.argsz - minsz < size) { > + ret = -EINVAL; > + goto error; > + } > + > + gsi = memdup_user((void __user *)((unsigned long)argp + minsz), size); > + if (IS_ERR(gsi)) { > + ret = PTR_ERR(gsi); > + goto error; > + } > + > + fwd.vdev = vdev; > + fwd.kvm = kdev->kvm; > + fwd.index = user_fwd_irq.index; > + fwd.subindex = 0; > + fwd.gsi = *gsi; > + > + node = kvm_vfio_find_fwd_irq(kv, &fwd); > + > + switch (attr) { > + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: > + if (node) { > + ret = -EEXIST; > + goto error; > + } > + mutex_lock(&kv->lock); > + ret = kvm_vfio_set_forward(kv, user_fwd_irq.fd, &fwd); > + mutex_unlock(&kv->lock); > + break; > + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: > + if (!node) { > + ret = -ENOENT; > + goto error; > + } > + mutex_lock(&kv->lock); > + kvm_vfio_unset_forward(kv, node); > + mutex_unlock(&kv->lock); > + kvm_vfio_put_vfio_device(vdev); > + ret = 0; > + break; > + } > + > + kfree(gsi); > +error: > + if (ret < 0) > + kvm_vfio_put_vfio_device(vdev); > + return ret; > +} > + > +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) > +{ > + int32_t __user *argp = (int32_t __user *)(unsigned long)arg; > + int ret; > + > + switch (attr) { > +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD > + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: > + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: > + ret = kvm_vfio_control_irq_forward(kdev, attr, argp); > + break; > +#endif > + default: > + ret = -ENXIO; > + } > + return ret; > +} > + > +/** > + * kvm_vfio_clean_fwd_irq - Unset forwarding state of all > + * registered forwarded IRQs and free their list nodes. > + * @kv: kvm-vfio device > + * > + * Loop on all registered device/IRQ combos, reset the non forwarded state, > + * void the lists and release the reference > + */ > +static int kvm_vfio_clean_fwd_irq(struct kvm_vfio *kv) > +{ > + struct kvm_vfio_fwd_irq_node *node, *tmp; > + > + list_for_each_entry_safe(node, tmp, &kv->fwd_node_list, link) { > + kvm_vfio_unset_forward(kv, node); > + } > + return 0; > +} > + > static int kvm_vfio_set_attr(struct kvm_device *dev, > struct kvm_device_attr *attr) > { > switch (attr->group) { > case KVM_DEV_VFIO_GROUP: > return kvm_vfio_set_group(dev, attr->attr, attr->addr); > + case KVM_DEV_VFIO_DEVICE: > + return kvm_vfio_set_device(dev, attr->attr, attr->addr); > } > > return -ENXIO; > @@ -341,10 +638,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, > case KVM_DEV_VFIO_GROUP_DEL: > return 0; > } > - > break; > +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD > + case KVM_DEV_VFIO_DEVICE: > + switch (attr->attr) { > + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: > + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: > + return 0; > + } > + break; > +#endif > } > - > return -ENXIO; > } > > @@ -358,7 +662,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) > list_del(&kvg->node); > kfree(kvg); > } > - > + kvm_vfio_clean_fwd_irq(kv); > kvm_vfio_update_coherency(dev); > > kfree(kv); > @@ -390,6 +694,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) > return -ENOMEM; > > INIT_LIST_HEAD(&kv->group_list); > + INIT_LIST_HEAD(&kv->fwd_node_list); > mutex_init(&kv->lock); > > dev->private = kv; It's so close to not tainting the kvm-vfio device with anything platform specific, can't we provide that last bit of abstraction somehow? Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html