Hi Eric, On Wed, Dec 2, 2020 at 8:14 PM Auger Eric <eric.auger@xxxxxxxxxx> wrote: > > Hi Vikas, > > On 11/24/20 5:16 PM, Vikas Gupta wrote: > > MSI support for platform devices. > > > > Signed-off-by: Vikas Gupta <vikas.gupta@xxxxxxxxxxxx> > > --- > > drivers/vfio/platform/vfio_platform_common.c | 99 ++++++- > > drivers/vfio/platform/vfio_platform_irq.c | 260 +++++++++++++++++- > > drivers/vfio/platform/vfio_platform_private.h | 16 ++ > > include/uapi/linux/vfio.h | 43 +++ > > 4 files changed, 401 insertions(+), 17 deletions(-) > > > > diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c > > index c0771a9567fb..b0bfc0f4ee1f 100644 > > --- a/drivers/vfio/platform/vfio_platform_common.c > > +++ b/drivers/vfio/platform/vfio_platform_common.c > > @@ -16,6 +16,7 @@ > > #include <linux/types.h> > > #include <linux/uaccess.h> > > #include <linux/vfio.h> > > +#include <linux/nospec.h> > > > > #include "vfio_platform_private.h" > > > > @@ -344,9 +345,16 @@ static long vfio_platform_ioctl(void *device_data, > > > > } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { > > struct vfio_irq_info info; > > + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; > > + struct vfio_irq_info_cap_msi *msi_info = NULL; > > + unsigned long capsz; > > + int ext_irq_index = vdev->num_irqs - vdev->num_ext_irqs; > > > > minsz = offsetofend(struct vfio_irq_info, count); > > > > + /* For backward compatibility, cannot require this */ > > + capsz = offsetofend(struct vfio_irq_info, cap_offset); > > + > > if (copy_from_user(&info, (void __user *)arg, minsz)) > > return -EFAULT; > > > > @@ -356,9 +364,89 @@ static long vfio_platform_ioctl(void *device_data, > > if (info.index >= vdev->num_irqs) > > return -EINVAL; > > > > + if (info.argsz >= capsz) > > + minsz = capsz; > > + > > + if (info.index == ext_irq_index) { > nit: n case we add new ext indices afterwards, I would check info.index > - ext_irq_index against an VFIO_EXT_IRQ_MSI define. > > + struct vfio_irq_info_cap_type cap_type = { > > + .header.id = VFIO_IRQ_INFO_CAP_TYPE, > > + .header.version = 1 }; > > + int i; > > + int ret; > > + int num_msgs; > > + size_t msi_info_size; > > + struct vfio_platform_irq *irq; > nit: I think generally the opposite order (length) is chosen. This also > would better match the existing style in this file Ok will fix it > > + > > + info.index = array_index_nospec(info.index, > > + vdev->num_irqs); > > + > > + irq = &vdev->irqs[info.index]; > > + > > + info.flags = irq->flags; > I think this can be removed given [*] Sure. > > + cap_type.type = irq->type; > > + cap_type.subtype = irq->subtype; > > + > > + ret = vfio_info_add_capability(&caps, > > + &cap_type.header, > > + sizeof(cap_type)); > > + if (ret) > > + return ret; > > + > > + num_msgs = irq->num_ctx; > do would want to return the cap even if !num_ctx? If num_ctx = 0 then VFIO_IRQ_INFO_CAP_MSI_DESCS should not be written. I`ll take care of same. > > + > > + msi_info_size = struct_size(msi_info, msgs, num_msgs); > > + > > + msi_info = kzalloc(msi_info_size, GFP_KERNEL); > > + if (!msi_info) { > > + kfree(caps.buf); > > + return -ENOMEM; > > + } > > + > > + msi_info->header.id = VFIO_IRQ_INFO_CAP_MSI_DESCS; > > + msi_info->header.version = 1; > > + msi_info->nr_msgs = num_msgs; > > + > > + for (i = 0; i < num_msgs; i++) { > > + struct vfio_irq_ctx *ctx = &irq->ctx[i]; > > + > > + msi_info->msgs[i].addr_lo = ctx->msg.address_lo; > > + msi_info->msgs[i].addr_hi = ctx->msg.address_hi; > > + msi_info->msgs[i].data = ctx->msg.data; > > + } > > + > > + ret = vfio_info_add_capability(&caps, &msi_info->header, > > + msi_info_size); > > + if (ret) { > > + kfree(msi_info); > > + kfree(caps.buf); > > + return ret; > > + } > > + } > > + > > info.flags = vdev->irqs[info.index].flags; > [*] Will fix it. > > info.count = vdev->irqs[info.index].count; > > > > + if (caps.size) { > > + info.flags |= VFIO_IRQ_INFO_FLAG_CAPS; > > + if (info.argsz < sizeof(info) + caps.size) { > > + info.argsz = sizeof(info) + caps.size; > > + info.cap_offset = 0; > > + } else { > > + vfio_info_cap_shift(&caps, sizeof(info)); > > + if (copy_to_user((void __user *)arg + > > + sizeof(info), caps.buf, > > + caps.size)) { > > + kfree(msi_info); > > + kfree(caps.buf); > > + return -EFAULT; > > + } > > + info.cap_offset = sizeof(info); > > + } > > + > > + kfree(msi_info); > > + kfree(caps.buf); > > + } > > + > > return copy_to_user((void __user *)arg, &info, minsz) ? > > -EFAULT : 0; > > > > @@ -366,6 +454,7 @@ static long vfio_platform_ioctl(void *device_data, > > struct vfio_irq_set hdr; > > u8 *data = NULL; > > int ret = 0; > > + int max; > > size_t data_size = 0; > > > > minsz = offsetofend(struct vfio_irq_set, count); > > @@ -373,8 +462,14 @@ static long vfio_platform_ioctl(void *device_data, > > if (copy_from_user(&hdr, (void __user *)arg, minsz)) > > return -EFAULT; > > > > - ret = vfio_set_irqs_validate_and_prepare(&hdr, vdev->num_irqs, > > - vdev->num_irqs, &data_size); > > + if (hdr.index >= vdev->num_irqs) > > + return -EINVAL; > > + > > + max = vdev->irqs[hdr.index].count; > > + > > + ret = vfio_set_irqs_validate_and_prepare(&hdr, max, > > + vdev->num_irqs, > > + &data_size); > > if (ret) > > return ret; > > > > diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c > > index c5b09ec0a3c9..4066223e5b2e 100644 > > --- a/drivers/vfio/platform/vfio_platform_irq.c > > +++ b/drivers/vfio/platform/vfio_platform_irq.c > > @@ -8,10 +8,12 @@ > > > > #include <linux/eventfd.h> > > #include <linux/interrupt.h> > > +#include <linux/eventfd.h> > > #include <linux/slab.h> > > #include <linux/types.h> > > #include <linux/vfio.h> > > #include <linux/irq.h> > > +#include <linux/msi.h> > > > > #include "vfio_platform_private.h" > > > > @@ -253,6 +255,195 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, > > return 0; > > } > > > > +/* MSI/MSIX */ > > +static irqreturn_t vfio_msihandler(int irq, void *arg) > > +{ > > + struct eventfd_ctx *trigger = arg; > > + > > + eventfd_signal(trigger, 1); > > + return IRQ_HANDLED; > > +} > > + > > +static void msi_write(struct msi_desc *desc, struct msi_msg *msg) > > +{ > > + int i; > > + struct vfio_platform_irq *irq; > > + u16 index = desc->platform.msi_index; > > + struct device *dev = msi_desc_to_dev(desc); > > + struct vfio_device *device = dev_get_drvdata(dev); > > + struct vfio_platform_device *vdev = (struct vfio_platform_device *) > > + vfio_device_data(device); > > + > > + for (i = 0; i < vdev->num_irqs; i++) > > + if (vdev->irqs[i].type == VFIO_IRQ_TYPE_MSI) > > + irq = &vdev->irqs[i]; > > + > > + irq->ctx[index].msg.address_lo = msg->address_lo; > > + irq->ctx[index].msg.address_hi = msg->address_hi; > > + irq->ctx[index].msg.data = msg->data; > > +} > > + > > +static int vfio_msi_enable(struct vfio_platform_device *vdev, > > + struct vfio_platform_irq *irq, int nvec) > > +{ > > + int ret; > > + int msi_idx = 0; > > + struct msi_desc *desc; > > + struct device *dev = vdev->device; > > + > > + irq->ctx = kcalloc(nvec, sizeof(struct vfio_irq_ctx), GFP_KERNEL); > > + if (!irq->ctx) > > + return -ENOMEM; > > + > > + /* Allocate platform MSIs */ > > + ret = platform_msi_domain_alloc_irqs(dev, nvec, msi_write); > > + if (ret < 0) { > > + kfree(irq->ctx); > > + return ret; > > + } > > + > > + for_each_msi_entry(desc, dev) { > > + irq->ctx[msi_idx].hwirq = desc->irq; > > + msi_idx++; > > + } > > + > > + irq->num_ctx = nvec; > > + irq->config_msi = 1; > > + > > + return 0; > > +} > > + > > +static int vfio_msi_set_vector_signal(struct vfio_platform_irq *irq, > > + int vector, int fd) > > +{ > > + struct eventfd_ctx *trigger; > > + int irq_num, ret; > > + > > + if (vector < 0 || vector >= irq->num_ctx) > > + return -EINVAL; > > + > > + irq_num = irq->ctx[vector].hwirq; > > + > > + if (irq->ctx[vector].trigger) { > > + free_irq(irq_num, irq->ctx[vector].trigger); > > + kfree(irq->ctx[vector].name); > > + eventfd_ctx_put(irq->ctx[vector].trigger); > > + irq->ctx[vector].trigger = NULL; > > + } > > + > > + if (fd < 0) > > + return 0; > > + > > + irq->ctx[vector].name = kasprintf(GFP_KERNEL, > > + "vfio-msi[%d]", vector); > > + if (!irq->ctx[vector].name) > > + return -ENOMEM; > > + > > + trigger = eventfd_ctx_fdget(fd); > > + if (IS_ERR(trigger)) { > > + kfree(irq->ctx[vector].name); > > + return PTR_ERR(trigger); > > + } > > + > > + ret = request_irq(irq_num, vfio_msihandler, 0, > > + irq->ctx[vector].name, trigger); > > + if (ret) { > > + kfree(irq->ctx[vector].name); > > + eventfd_ctx_put(trigger); > > + return ret; > > + } > > + > > + irq->ctx[vector].trigger = trigger; > > + > > + return 0; > > +} > > + > > +static int vfio_msi_set_block(struct vfio_platform_irq *irq, unsigned int start, > > + unsigned int count, int32_t *fds) > > +{ > > + int i, j, ret = 0; > > + > > + if (start >= irq->num_ctx || start + count > irq->num_ctx) > > + return -EINVAL; > > + > > + for (i = 0, j = start; i < count && !ret; i++, j++) { > > + int fd = fds ? fds[i] : -1; > > + > > + ret = vfio_msi_set_vector_signal(irq, j, fd); > > + } > > + > > + if (ret) { > > + for (--j; j >= (int)start; j--) > > + vfio_msi_set_vector_signal(irq, j, -1); > > + } > > + > > + return ret; > > +} > > + > > +static void vfio_msi_disable(struct vfio_platform_device *vdev, > > + struct vfio_platform_irq *irq) > > +{ > > + struct device *dev = vdev->device; > > + > > + vfio_msi_set_block(irq, 0, irq->num_ctx, NULL); > > + > > + platform_msi_domain_free_irqs(dev); > > + > > + irq->config_msi = 0; > > + irq->num_ctx = 0; > > + > > + kfree(irq->ctx); > > +} > > + > > +static int vfio_set_msi_trigger(struct vfio_platform_device *vdev, > > + unsigned int index, unsigned int start, > > + unsigned int count, uint32_t flags, void *data) > > +{ > > + int i; > > + struct vfio_platform_irq *irq = &vdev->irqs[index]; > > + > > + if (start + count > irq->count) > > + return -EINVAL; > > + > > + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) { > > + vfio_msi_disable(vdev, irq); > > + return 0; > > + } > > + > > + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { > > + s32 *fds = data; > > + int ret; > > + > > + if (irq->config_msi) > > + return vfio_msi_set_block(irq, start, count, > > + fds); > > + ret = vfio_msi_enable(vdev, irq, start + count); > > + if (ret) > > + return ret; > > + > > + ret = vfio_msi_set_block(irq, start, count, fds); > > + if (ret) > > + vfio_msi_disable(vdev, irq); > > + > > + return ret; > > + } > > + > > + for (i = start; i < start + count; i++) { > > + if (!irq->ctx[i].trigger) > > + continue; > > + if (flags & VFIO_IRQ_SET_DATA_NONE) { > > + eventfd_signal(irq->ctx[i].trigger, 1); > > + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { > > + u8 *bools = data; > > + > > + if (bools[i - start]) > > + eventfd_signal(irq->ctx[i].trigger, 1); > > + } > > + } > > + > > + return 0; > > +} > > + > > int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, > > uint32_t flags, unsigned index, unsigned start, > > unsigned count, void *data) > > @@ -261,16 +452,29 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, > > unsigned start, unsigned count, uint32_t flags, > > void *data) = NULL; > > > > - switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { > > - case VFIO_IRQ_SET_ACTION_MASK: > > - func = vfio_platform_set_irq_mask; > > - break; > > - case VFIO_IRQ_SET_ACTION_UNMASK: > > - func = vfio_platform_set_irq_unmask; > > - break; > > - case VFIO_IRQ_SET_ACTION_TRIGGER: > > - func = vfio_platform_set_irq_trigger; > > - break; > > + struct vfio_platform_irq *irq = &vdev->irqs[index]; > > + > > + if (irq->type == VFIO_IRQ_TYPE_MSI) { > > + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { > > + case VFIO_IRQ_SET_ACTION_MASK: > > + case VFIO_IRQ_SET_ACTION_UNMASK: > > + break; > > + case VFIO_IRQ_SET_ACTION_TRIGGER: > > + func = vfio_set_msi_trigger; > > + break; > > + } > > + } else { > > + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { > > + case VFIO_IRQ_SET_ACTION_MASK: > > + func = vfio_platform_set_irq_mask; > > + break; > > + case VFIO_IRQ_SET_ACTION_UNMASK: > > + func = vfio_platform_set_irq_unmask; > > + break; > > + case VFIO_IRQ_SET_ACTION_TRIGGER: > > + func = vfio_platform_set_irq_trigger; > > + break; > > + } > > } > > > > if (!func) > > @@ -281,12 +485,21 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, > > > > int vfio_platform_irq_init(struct vfio_platform_device *vdev) > > { > > - int cnt = 0, i; > > + int i; > > + int cnt = 0; > > + int num_irqs; > > + struct device *dev = vdev->device; > > > > while (vdev->get_irq(vdev, cnt) >= 0) > > cnt++; > > > > - vdev->irqs = kcalloc(cnt, sizeof(struct vfio_platform_irq), GFP_KERNEL); > > + num_irqs = cnt; > > + > > + if (dev->msi_domain) > > + num_irqs++; > > + > > + vdev->irqs = kcalloc(num_irqs, sizeof(struct vfio_platform_irq), > > + GFP_KERNEL); > > if (!vdev->irqs) > > return -ENOMEM; > > > > @@ -309,7 +522,19 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) > > vdev->irqs[i].masked = false; > > } > > > > - vdev->num_irqs = cnt; > > + /* > > + * MSI block is added at last index and its an ext irq > it is > > + */ > > + if (dev->msi_domain) { > > + vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; > > + vdev->irqs[i].count = NR_IRQS; > why NR_IRQS? Since different devices can have different numbers of MSI(s) so we need to initialize with max possible values. Can you please suggest if this value does not seem appropriate? > > + vdev->irqs[i].hwirq = 0; > > + vdev->irqs[i].masked = false; > > + vdev->irqs[i].type = VFIO_IRQ_TYPE_MSI; > > + vdev->num_ext_irqs = 1; > > + } > > + > > + vdev->num_irqs = num_irqs; > > > > return 0; > > err: > > @@ -321,8 +546,13 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) > > { > > int i; > > > > - for (i = 0; i < vdev->num_irqs; i++) > > - vfio_set_trigger(vdev, i, -1, NULL); > > + for (i = 0; i < vdev->num_irqs; i++) { > > + if (vdev->irqs[i].type == VFIO_IRQ_TYPE_MSI) > > + vfio_set_msi_trigger(vdev, i, 0, 0, > > + VFIO_IRQ_SET_DATA_NONE, NULL); > > + else > > + vfio_set_trigger(vdev, i, -1, NULL); > > + } > > > > vdev->num_irqs = 0; > > kfree(vdev->irqs); > > diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h > > index 289089910643..7bbb05988705 100644 > > --- a/drivers/vfio/platform/vfio_platform_private.h > > +++ b/drivers/vfio/platform/vfio_platform_private.h > > @@ -9,6 +9,7 @@ > > > > #include <linux/types.h> > > #include <linux/interrupt.h> > > +#include <linux/msi.h> > > > > #define VFIO_PLATFORM_OFFSET_SHIFT 40 > > #define VFIO_PLATFORM_OFFSET_MASK (((u64)(1) << VFIO_PLATFORM_OFFSET_SHIFT) - 1) > > @@ -19,9 +20,18 @@ > > #define VFIO_PLATFORM_INDEX_TO_OFFSET(index) \ > > ((u64)(index) << VFIO_PLATFORM_OFFSET_SHIFT) > > > > +struct vfio_irq_ctx { > > + int hwirq; > > + char *name; > > + struct msi_msg msg; > > + struct eventfd_ctx *trigger; > > +}; > > + > > struct vfio_platform_irq { > > u32 flags; > > u32 count; > > + int num_ctx; > > + struct vfio_irq_ctx *ctx; > > int hwirq; > > char *name; > > struct eventfd_ctx *trigger; > > @@ -29,6 +39,11 @@ struct vfio_platform_irq { > > spinlock_t lock; > > struct virqfd *unmask; > > struct virqfd *mask; > > + > > + /* for extended irqs */ > > + u32 type; > > + u32 subtype; > > + int config_msi; > > }; > > > > struct vfio_platform_region { > > @@ -46,6 +61,7 @@ struct vfio_platform_device { > > u32 num_regions; > > struct vfio_platform_irq *irqs; > > u32 num_irqs; > > + int num_ext_irqs; > > int refcnt; > > struct mutex igate; > > struct module *parent_module; > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > > index 2f313a238a8f..598d1c944283 100644 > > --- a/include/uapi/linux/vfio.h > > +++ b/include/uapi/linux/vfio.h > > @@ -697,11 +697,54 @@ struct vfio_irq_info { > > #define VFIO_IRQ_INFO_MASKABLE (1 << 1) > > #define VFIO_IRQ_INFO_AUTOMASKED (1 << 2) > > #define VFIO_IRQ_INFO_NORESIZE (1 << 3) > > +#define VFIO_IRQ_INFO_FLAG_CAPS (1 << 4) /* Info supports caps */ > > __u32 index; /* IRQ index */ > > __u32 count; /* Number of IRQs within this index */ > > + __u32 cap_offset; /* Offset within info struct of first cap */ > > }; > > #define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9) > > > > +/* > > + * The irq type capability allows IRQs unique to a specific device or > > + * class of devices to be exposed. > > + * > > + * The structures below define version 1 of this capability. > > + */ > > +#define VFIO_IRQ_INFO_CAP_TYPE 3 > > + > > +struct vfio_irq_info_cap_type { > > + struct vfio_info_cap_header header; > > + __u32 type; /* global per bus driver */ > > + __u32 subtype; /* type specific */ > > +}; > > + > > +/* > > + * List of IRQ types, global per bus driver. > > + * If you introduce a new type, please add it here. > > + */ > > + > > +/* Non PCI devices having MSI(s) support */ > > +#define VFIO_IRQ_TYPE_MSI (1) > > + > > +/* > > + * The msi capability allows the user to use the msi msg to > > + * configure the iova for the msi configuration. > > + * The structures below define version 1 of this capability. > > + */ > > +#define VFIO_IRQ_INFO_CAP_MSI_DESCS 4 > > + > > +struct vfio_irq_msi_msg { > > + __u32 addr_lo; > > + __u32 addr_hi; > > + __u32 data; > > +}; > > + > > +struct vfio_irq_info_cap_msi { > > + struct vfio_info_cap_header header; > > + __u32 nr_msgs; > I think you should align a __u32 reserved field to have a 64b alignment Sure. > > + struct vfio_irq_msi_msg msgs[]; > Please can you clarify why this cap is needed versus your prior approach. In the previous patch, the reset module was configuring the device with MSI msg/data now since the module is not available, user space needs to have this data. With this approach userspace just needs the pairs <msg and ctx > associated with the MSI(s) and it can choose to configure the MSI(s) sources accordingly. Let me know if this approach does not look appropriate. Thanks, Vikas > > +}; > > + > > /** > > * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set) > > * > > > Thanks > > Eric >
Attachment:
smime.p7s
Description: S/MIME Cryptographic Signature