This patch introduces a new KVM_DEV_VFIO_DEVICE attribute. This is a new control channel which enables KVM to cooperate with viable VFIO devices. The kvm-vfio device now holds a list of devices (kvm_vfio_device) in addition to a list of groups (kvm_vfio_group). The new infrastructure enables to check the validity of the VFIO device file descriptor, get and hold a reference to it. The first concrete implemented command is IRQ forward control: KVM_DEV_VFIO_DEVICE_FORWARD_IRQ, KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ. It consists in programing the VFIO driver and KVM in a consistent manner so that an optimized IRQ injection/completion is set up. Each kvm_vfio_device holds a list of forwarded IRQ. When putting a kvm_vfio_device, the implementation makes sure the forwarded IRQs are set again in the normal handling state (non forwarded). The forwarding programmming is architecture specific, embodied by the kvm_arch_set_fwd_state function. Its implementation is given in a separate patch file. The forwarding control modality is enabled by the __KVM_HAVE_ARCH_KVM_VFIO_FORWARD define. Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx> --- v1 -> v2: - __KVM_HAVE_ARCH_KVM_VFIO renamed into __KVM_HAVE_ARCH_KVM_VFIO_FORWARD - original patch file separated into 2 parts: generic part moved in vfio.c and ARM specific part(kvm_arch_set_fwd_state) --- include/linux/kvm_host.h | 27 +++ virt/kvm/vfio.c | 452 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 477 insertions(+), 2 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b3..24350dc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1065,6 +1065,21 @@ struct kvm_device_ops { unsigned long arg); }; +enum kvm_fwd_irq_action { + KVM_VFIO_IRQ_SET_FORWARD, + KVM_VFIO_IRQ_SET_NORMAL, + KVM_VFIO_IRQ_CLEANUP, +}; + +/* internal structure describing a forwarded IRQ */ +struct kvm_fwd_irq { + struct list_head link; + __u32 index; /* platform device irq index */ + __u32 hwirq; /*physical IRQ */ + __u32 gsi; /* virtual IRQ */ + struct kvm_vcpu *vcpu; /* vcpu to inject into*/ +}; + void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); @@ -1075,6 +1090,18 @@ extern struct kvm_device_ops kvm_vfio_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_flic_ops; +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD +int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd, + enum kvm_fwd_irq_action action); + +#else +static inline int kvm_arch_set_fwd_state(struct kvm_fwd_irq *pfwd, + enum kvm_fwd_irq_action action) +{ + return 0; +} +#endif + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 76dc7a1..e4a81c4 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -18,14 +18,24 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include <linux/platform_device.h> struct kvm_vfio_group { struct list_head node; struct vfio_group *vfio_group; }; +struct kvm_vfio_device { + struct list_head node; + struct vfio_device *vfio_device; + /* list of forwarded IRQs for that VFIO device */ + struct list_head fwd_irq_list; + int fd; +}; + struct kvm_vfio { struct list_head group_list; + struct list_head device_list; struct mutex lock; bool noncoherent; }; @@ -246,12 +256,441 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) return -ENXIO; } +/** + * get_vfio_device - returns the vfio-device corresponding to this fd + * @fd:fd of the vfio platform device + * + * checks it is a vfio device + * increment its ref counter + */ +static struct vfio_device *kvm_vfio_get_vfio_device(int fd) +{ + struct fd f; + struct vfio_device *vdev; + + f = fdget(fd); + if (!f.file) + return NULL; + vdev = kvm_vfio_device_get_external_user(f.file); + fdput(f); + return vdev; +} + +/** + * put_vfio_device: put the vfio platform device + * @vdev: vfio_device to put + * + * decrement the ref counter + */ +static void kvm_vfio_put_vfio_device(struct vfio_device *vdev) +{ + kvm_vfio_device_put_external_user(vdev); +} + +/** + * kvm_vfio_find_device - look for the device in the assigned + * device list + * @kv: the kvm-vfio device + * @vdev: the vfio_device to look for + * + * returns the associated kvm_vfio_device if the device is known, + * meaning at least 1 IRQ is forwarded for this device. + * in the device is not registered, returns NULL. + */ +struct kvm_vfio_device *kvm_vfio_find_device(struct kvm_vfio *kv, + struct vfio_device *vdev) +{ + struct kvm_vfio_device *kvm_vdev_iter; + + list_for_each_entry(kvm_vdev_iter, &kv->device_list, node) { + if (kvm_vdev_iter->vfio_device == vdev) + return kvm_vdev_iter; + } + return NULL; +} + +/** + * kvm_vfio_find_irq - look for a an irq in the device IRQ list + * @kvm_vdev: the kvm_vfio_device + * @irq_index: irq index + * + * returns the forwarded irq struct if it exists, NULL in the negative + */ +struct kvm_fwd_irq *kvm_vfio_find_irq(struct kvm_vfio_device *kvm_vdev, + int irq_index) +{ + struct kvm_fwd_irq *fwd_irq_iter; + + list_for_each_entry(fwd_irq_iter, &kvm_vdev->fwd_irq_list, link) { + if (fwd_irq_iter->index == irq_index) + return fwd_irq_iter; + } + return NULL; +} + +/** + * validate_forward - checks whether forwarding a given IRQ is meaningful + * @vdev: vfio_device the IRQ belongs to + * @fwd_irq: user struct containing the irq_index to forward + * @kvm_vdev: if a forwarded IRQ already exists for that VFIO device, + * kvm_vfio_device that holds it + * @hwirq: irq numberthe irq index corresponds to + * + * checks the vfio-device is a platform vfio device + * checks the irq_index corresponds to an actual hwirq and + * checks this hwirq is not already forwarded + * returns < 0 on following errors: + * not a platform device, bad irq index, already forwarded + */ +static int kvm_vfio_validate_forward(struct kvm_vfio *kv, + struct vfio_device *vdev, + struct kvm_arch_forwarded_irq *fwd_irq, + struct kvm_vfio_device **kvm_vdev, + int *hwirq) +{ + struct device *dev = kvm_vfio_external_base_device(vdev); + struct platform_device *platdev; + + *hwirq = -1; + *kvm_vdev = NULL; + if (strcmp(dev->bus->name, "platform") == 0) { + platdev = to_platform_device(dev); + *hwirq = platform_get_irq(platdev, fwd_irq->index); + if (*hwirq < 0) { + kvm_err("%s incorrect index\n", __func__); + return -EINVAL; + } + } else { + kvm_err("%s not a platform device\n", __func__); + return -EINVAL; + } + /* is a ref to this device already owned by the KVM-VFIO device? */ + *kvm_vdev = kvm_vfio_find_device(kv, vdev); + if (*kvm_vdev) { + if (kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index)) { + kvm_err("%s irq %d already forwarded\n", + __func__, *hwirq); + return -EINVAL; + } + } + return 0; +} + +/** + * validate_unforward: check a deassignment is meaningful + * @kv: the kvm_vfio device + * @vdev: the vfio_device whose irq to deassign belongs to + * @fwd_irq: the user struct that contains the fd and irq_index of the irq + * @kvm_vdev: the kvm_vfio_device the forwarded irq belongs to, if + * it exists + * + * returns 0 if the provided irq effectively is forwarded + * (a ref to this vfio_device is hold and this irq belongs to + * the forwarded irq of this device) + * returns -EINVAL in the negative + */ +static int kvm_vfio_validate_unforward(struct kvm_vfio *kv, + struct vfio_device *vdev, + struct kvm_arch_forwarded_irq *fwd_irq, + struct kvm_vfio_device **kvm_vdev) +{ + struct kvm_fwd_irq *pfwd; + + *kvm_vdev = kvm_vfio_find_device(kv, vdev); + if (!kvm_vdev) { + kvm_err("%s no forwarded irq for this device\n", __func__); + return -EINVAL; + } + pfwd = kvm_vfio_find_irq(*kvm_vdev, fwd_irq->index); + if (!pfwd) { + kvm_err("%s irq %d is not forwarded\n", __func__, fwd_irq->fd); + return -EINVAL; + } + return 0; +} + +/** + * kvm_vfio_forward - set a forwarded IRQ + * @kdev: the kvm device + * @vdev: the vfio device the IRQ belongs to + * @fwd_irq: the user struct containing the irq_index and guest irq + * @must_put: tells the caller whether the vfio_device must be put after + * the call (ref must be released in case a ref onto this device was + * already hold or in case of new device and failure) + * + * validate the injection, activate forward and store the information + * about which irq and which device is concerned so that on deassign or + * kvm-vfio destruction everuthing can be cleaned up. + */ +static int kvm_vfio_forward(struct kvm_device *kdev, + struct vfio_device *vdev, + struct kvm_arch_forwarded_irq *fwd_irq, + bool *must_put) +{ + int ret; + struct kvm_fwd_irq *pfwd = NULL; + struct kvm_vfio_device *kvm_vdev = NULL; + struct kvm_vfio *kv = kdev->private; + int hwirq; + + *must_put = true; + ret = kvm_vfio_validate_forward(kv, vdev, fwd_irq, + &kvm_vdev, &hwirq); + if (ret < 0) + return -EINVAL; + + pfwd = kzalloc(sizeof(*pfwd), GFP_KERNEL); + if (!pfwd) + return -ENOMEM; + pfwd->index = fwd_irq->index; + pfwd->gsi = fwd_irq->gsi; + pfwd->hwirq = hwirq; + pfwd->vcpu = kvm_get_vcpu(kdev->kvm, 0); + ret = kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_SET_FORWARD); + if (ret < 0) { + kvm_arch_set_fwd_state(pfwd, KVM_VFIO_IRQ_CLEANUP); + kfree(pfwd); + return ret; + } + + if (!kvm_vdev) { + /* create & insert the new device and keep the ref */ + kvm_vdev = kzalloc(sizeof(*kvm_vdev), GFP_KERNEL); + if (!kvm_vdev) { + kvm_arch_set_fwd_state(pfwd, false); + kfree(pfwd); + return -ENOMEM; + } + + kvm_vdev->vfio_device = vdev; + kvm_vdev->fd = fwd_irq->fd; + INIT_LIST_HEAD(&kvm_vdev->fwd_irq_list); + list_add(&kvm_vdev->node, &kv->device_list); + /* + * the only case where we keep the ref: + * new device and forward setting successful + */ + *must_put = false; + } + + list_add(&pfwd->link, &kvm_vdev->fwd_irq_list); + + kvm_debug("forwarding set for fd=%d, hwirq=%d, gsi=%d\n", + fwd_irq->fd, hwirq, fwd_irq->gsi); + + return 0; +} + +/** + * remove_assigned_device - put a given device from the list + * @kv: the kvm-vfio device + * @vdev: the vfio-device to remove + * + * change the state of all forwarded IRQs, free the forwarded IRQ list, + * remove the corresponding kvm_vfio_device from the assigned device + * list. + * returns true if the device could be removed, false in the negative + */ +bool remove_assigned_device(struct kvm_vfio *kv, + struct vfio_device *vdev) +{ + struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev; + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; + bool removed = false; + int ret; + + list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev, + &kv->device_list, node) { + if (kvm_vdev_iter->vfio_device == vdev) { + /* loop on all its forwarded IRQ */ + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, + &kvm_vdev_iter->fwd_irq_list, + link) { + ret = kvm_arch_set_fwd_state(fwd_irq_iter, + KVM_VFIO_IRQ_SET_NORMAL); + if (ret < 0) + return ret; + list_del(&fwd_irq_iter->link); + kfree(fwd_irq_iter); + } + /* all IRQs could be deassigned */ + list_del(&kvm_vdev_iter->node); + kvm_vfio_device_put_external_user( + kvm_vdev_iter->vfio_device); + kfree(kvm_vdev_iter); + removed = true; + break; + } + } + return removed; +} + + +/** + * remove_fwd_irq - remove a forwarded irq + * + * @kv: kvm-vfio device + * kvm_vdev: the kvm_vfio_device the IRQ belongs to + * irq_index: the index of the IRQ + * + * change the forwarded state of the IRQ, remove the IRQ from + * the device forwarded IRQ list. In case it is the last one, + * put the device + */ +int remove_fwd_irq(struct kvm_vfio *kv, + struct kvm_vfio_device *kvm_vdev, + int irq_index) +{ + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; + int ret = -1; + + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, + &kvm_vdev->fwd_irq_list, link) { + if (fwd_irq_iter->index == irq_index) { + ret = kvm_arch_set_fwd_state(fwd_irq_iter, + KVM_VFIO_IRQ_SET_NORMAL); + if (ret < 0) + break; + list_del(&fwd_irq_iter->link); + kfree(fwd_irq_iter); + ret = 0; + break; + } + } + if (list_empty(&kvm_vdev->fwd_irq_list)) + remove_assigned_device(kv, kvm_vdev->vfio_device); + + return ret; +} + +/** + * kvm_vfio_unforward - remove a forwarded IRQ + * @kdev: the kvm device + * @vdev: the vfio_device + * @fwd_irq: user struct + * after checking this IRQ effectively is forwarded, change its state, + * remove it from the corresponding kvm_vfio_device list + */ +static int kvm_vfio_unforward(struct kvm_device *kdev, + struct vfio_device *vdev, + struct kvm_arch_forwarded_irq *fwd_irq) +{ + struct kvm_vfio *kv = kdev->private; + struct kvm_vfio_device *kvm_vdev; + int ret; + + ret = kvm_vfio_validate_unforward(kv, vdev, fwd_irq, &kvm_vdev); + if (ret < 0) + return -EINVAL; + + ret = remove_fwd_irq(kv, kvm_vdev, fwd_irq->index); + if (ret < 0) + kvm_err("%s fail unforwarding (fd=%d, index=%d)\n", + __func__, fwd_irq->fd, fwd_irq->index); + else + kvm_debug("%s unforwarding IRQ (fd=%d, index=%d)\n", + __func__, fwd_irq->fd, fwd_irq->index); + return ret; +} + + + + +/** + * kvm_vfio_set_device - the top function for interracting with a vfio + * device + */ +static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg) +{ + struct kvm_vfio *kv = kdev->private; + struct vfio_device *vdev; + struct kvm_arch_forwarded_irq fwd_irq; /* user struct */ + int32_t __user *argp = (int32_t __user *)(unsigned long)arg; + + switch (attr) { +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ:{ + bool must_put; + int ret; + + if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq))) + return -EFAULT; + vdev = kvm_vfio_get_vfio_device(fwd_irq.fd); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + mutex_lock(&kv->lock); + ret = kvm_vfio_forward(kdev, vdev, &fwd_irq, &must_put); + if (must_put) + kvm_vfio_put_vfio_device(vdev); + mutex_unlock(&kv->lock); + return ret; + } + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: { + int ret; + + if (copy_from_user(&fwd_irq, argp, sizeof(fwd_irq))) + return -EFAULT; + vdev = kvm_vfio_get_vfio_device(fwd_irq.fd); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + kvm_vfio_device_put_external_user(vdev); + mutex_lock(&kv->lock); + ret = kvm_vfio_unforward(kdev, vdev, &fwd_irq); + mutex_unlock(&kv->lock); + return ret; + } +#endif + default: + return -ENXIO; + } +} + +/** + * kvm_vfio_put_all_devices - cancel forwarded IRQs and put all devices + * @kv: kvm-vfio device + * + * loop on all got devices and their associated forwarded IRQs + * restore the non forwarded state, remove IRQs and their devices from + * the respective list, put the vfio platform devices + * + * When this function is called, the vcpu already are destroyed. No + * vgic manipulation can happen hence the KVM_VFIO_IRQ_CLEANUP + * kvm_arch_set_fwd_state action + */ +int kvm_vfio_put_all_devices(struct kvm_vfio *kv) +{ + struct kvm_fwd_irq *fwd_irq_iter, *tmp_irq; + struct kvm_vfio_device *kvm_vdev_iter, *tmp_vdev; + + /* loop on all the assigned devices */ + list_for_each_entry_safe(kvm_vdev_iter, tmp_vdev, + &kv->device_list, node) { + + /* loop on all its forwarded IRQ */ + list_for_each_entry_safe(fwd_irq_iter, tmp_irq, + &kvm_vdev_iter->fwd_irq_list, link) { + kvm_arch_set_fwd_state(fwd_irq_iter, + KVM_VFIO_IRQ_CLEANUP); + list_del(&fwd_irq_iter->link); + kfree(fwd_irq_iter); + } + list_del(&kvm_vdev_iter->node); + kvm_vfio_device_put_external_user(kvm_vdev_iter->vfio_device); + kfree(kvm_vdev_iter); + } + return 0; +} + + static int kvm_vfio_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { switch (attr->group) { case KVM_DEV_VFIO_GROUP: return kvm_vfio_set_group(dev, attr->attr, attr->addr); + case KVM_DEV_VFIO_DEVICE: + return kvm_vfio_set_device(dev, attr->attr, attr->addr); } return -ENXIO; @@ -267,10 +706,17 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, case KVM_DEV_VFIO_GROUP_DEL: return 0; } - break; +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_FORWARD + case KVM_DEV_VFIO_DEVICE: + switch (attr->attr) { + case KVM_DEV_VFIO_DEVICE_FORWARD_IRQ: + case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ: + return 0; + } + break; +#endif } - return -ENXIO; } @@ -284,6 +730,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) list_del(&kvg->node); kfree(kvg); } + kvm_vfio_put_all_devices(kv); kvm_vfio_update_coherency(dev); @@ -306,6 +753,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) return -ENOMEM; INIT_LIST_HEAD(&kv->group_list); + INIT_LIST_HEAD(&kv->device_list); mutex_init(&kv->lock); dev->private = kv; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html