On Fri, Feb 05, 2021 at 01:53:24PM -0700, Dave Jiang wrote: > +static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma) > { > - /* FIXME: Fill in later */ > + if (vma->vm_end < vma->vm_start) > + return -EINVAL; These checks are redundant > -static int idxd_mdev_host_release(struct idxd_device *idxd) > +static int idxd_vdcm_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) > +{ > + unsigned int wq_idx, rc; > + unsigned long req_size, pgoff = 0, offset; > + pgprot_t pg_prot; > + struct vdcm_idxd *vidxd = mdev_get_drvdata(mdev); > + struct idxd_wq *wq = vidxd->wq; > + struct idxd_device *idxd = vidxd->idxd; > + enum idxd_portal_prot virt_portal, phys_portal; > + phys_addr_t base = pci_resource_start(idxd->pdev, IDXD_WQ_BAR); > + struct device *dev = mdev_dev(mdev); > + > + rc = check_vma(wq, vma); > + if (rc) > + return rc; > + > + pg_prot = vma->vm_page_prot; > + req_size = vma->vm_end - vma->vm_start; > + vma->vm_flags |= VM_DONTCOPY; > + > + offset = (vma->vm_pgoff << PAGE_SHIFT) & > + ((1ULL << VFIO_PCI_OFFSET_SHIFT) - 1); > + > + wq_idx = offset >> (PAGE_SHIFT + 2); > + if (wq_idx >= 1) { > + dev_err(dev, "mapping invalid wq %d off %lx\n", > + wq_idx, offset); > + return -EINVAL; > + } > + > + /* > + * Check and see if the guest wants to map to the limited or unlimited portal. > + * The driver will allow mapping to unlimited portal only if the the wq is a > + * dedicated wq. Otherwise, it goes to limited. > + */ > + virt_portal = ((offset >> PAGE_SHIFT) & 0x3) == 1; > + phys_portal = IDXD_PORTAL_LIMITED; > + if (virt_portal == IDXD_PORTAL_UNLIMITED && wq_dedicated(wq)) > + phys_portal = IDXD_PORTAL_UNLIMITED; > + > + /* We always map IMS portals to the guest */ > + pgoff = (base + idxd_get_wq_portal_full_offset(wq->id, phys_portal, > + IDXD_IRQ_IMS)) >> PAGE_SHIFT; > + dev_dbg(dev, "mmap %lx %lx %lx %lx\n", vma->vm_start, pgoff, req_size, > + pgprot_val(pg_prot)); > + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > + vma->vm_private_data = mdev; What ensures the mdev pointer is valid strictly longer than the VMA? This needs refcounting. > + vma->vm_pgoff = pgoff; > + > + return remap_pfn_range(vma, vma->vm_start, pgoff, req_size, pg_prot); Nothing validated req_size - did you copy this from the Intel RDMA driver? It had a huge security bug just like this. > + > +static int msix_trigger_unregister(struct vdcm_idxd *vidxd, int index) > +{ > + struct mdev_device *mdev = vidxd->vdev.mdev; > + struct device *dev = mdev_dev(mdev); > + struct ims_irq_entry *irq_entry; > + int rc; > + > + if (!vidxd->vdev.msix_trigger[index]) > + return 0; > + > + dev_dbg(dev, "disable MSIX trigger %d\n", index); > + if (index) { > + u32 auxval; > + > + irq_entry = &vidxd->irq_entries[index]; > + if (irq_entry->irq_set) { > + free_irq(irq_entry->irq, irq_entry); > + irq_entry->irq_set = false; > + } > + > + auxval = ims_ctrl_pasid_aux(0, false); > + rc = irq_set_auxdata(irq_entry->irq, IMS_AUXDATA_CONTROL_WORD, auxval); > + if (rc) > + return rc; > + } > + eventfd_ctx_put(vidxd->vdev.msix_trigger[index]); > + vidxd->vdev.msix_trigger[index] = NULL; > + > + return 0; > +} > + > +static int msix_trigger_register(struct vdcm_idxd *vidxd, u32 fd, int index) > +{ > + struct mdev_device *mdev = vidxd->vdev.mdev; > + struct device *dev = mdev_dev(mdev); > + struct ims_irq_entry *irq_entry; > + struct eventfd_ctx *trigger; > + int rc; > + > + if (vidxd->vdev.msix_trigger[index]) > + return 0; > + > + dev_dbg(dev, "enable MSIX trigger %d\n", index); > + trigger = eventfd_ctx_fdget(fd); > + if (IS_ERR(trigger)) { > + dev_warn(dev, "eventfd_ctx_fdget failed %d\n", index); > + return PTR_ERR(trigger); > + } > + > + if (index) { > + u32 pasid; > + u32 auxval; > + > + irq_entry = &vidxd->irq_entries[index]; > + rc = idxd_mdev_get_pasid(mdev, &pasid); > + if (rc < 0) > + return rc; > + > + /* > + * Program and enable the pasid field in the IMS entry. The programmed pasid and > + * enabled field is checked against the pasid and enable field for the work queue > + * configuration and the pasid for the descriptor. A mismatch will result in blocked > + * IMS interrupt. > + */ > + auxval = ims_ctrl_pasid_aux(pasid, true); > + rc = irq_set_auxdata(irq_entry->irq, IMS_AUXDATA_CONTROL_WORD, auxval); > + if (rc < 0) > + return rc; > + > + rc = request_irq(irq_entry->irq, idxd_guest_wq_completion, 0, "idxd-ims", > + irq_entry); > + if (rc) { > + dev_warn(dev, "failed to request ims irq\n"); > + eventfd_ctx_put(trigger); > + auxval = ims_ctrl_pasid_aux(0, false); > + irq_set_auxdata(irq_entry->irq, IMS_AUXDATA_CONTROL_WORD, auxval); > + return rc; > + } > + irq_entry->irq_set = true; > + } > + > + vidxd->vdev.msix_trigger[index] = trigger; > + return 0; > +} > + > +static int vdcm_idxd_set_msix_trigger(struct vdcm_idxd *vidxd, > + unsigned int index, unsigned int start, > + unsigned int count, uint32_t flags, > + void *data) > +{ > + int i, rc = 0; > + > + if (count > VIDXD_MAX_MSIX_ENTRIES - 1) > + count = VIDXD_MAX_MSIX_ENTRIES - 1; > + > + if (count == 0 && (flags & VFIO_IRQ_SET_DATA_NONE)) { > + /* Disable all MSIX entries */ > + for (i = 0; i < VIDXD_MAX_MSIX_ENTRIES; i++) { > + rc = msix_trigger_unregister(vidxd, i); > + if (rc < 0) > + return rc; > + } > + return 0; > + } > + > + for (i = 0; i < count; i++) { > + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { > + u32 fd = *(u32 *)(data + i * sizeof(u32)); > + > + rc = msix_trigger_register(vidxd, fd, i); > + if (rc < 0) > + return rc; > + } else if (flags & VFIO_IRQ_SET_DATA_NONE) { > + rc = msix_trigger_unregister(vidxd, i); > + if (rc < 0) > + return rc; > + } > + } > + return rc; > +} > + > +static int idxd_vdcm_set_irqs(struct vdcm_idxd *vidxd, uint32_t flags, > + unsigned int index, unsigned int start, > + unsigned int count, void *data) > +{ > + int (*func)(struct vdcm_idxd *vidxd, unsigned int index, > + unsigned int start, unsigned int count, uint32_t flags, > + void *data) = NULL; > + struct mdev_device *mdev = vidxd->vdev.mdev; > + struct device *dev = mdev_dev(mdev); > + > + switch (index) { > + case VFIO_PCI_INTX_IRQ_INDEX: > + dev_warn(dev, "intx interrupts not supported.\n"); > + break; > + case VFIO_PCI_MSI_IRQ_INDEX: > + dev_dbg(dev, "msi interrupt.\n"); > + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { > + case VFIO_IRQ_SET_ACTION_MASK: > + case VFIO_IRQ_SET_ACTION_UNMASK: > + break; > + case VFIO_IRQ_SET_ACTION_TRIGGER: > + func = vdcm_idxd_set_msix_trigger; This would be a good place to insert a common VFIO helper library to take care of the MSI-X emulation for IMS. > +int idxd_mdev_host_init(struct idxd_device *idxd) > +{ > + struct device *dev = &idxd->pdev->dev; > + int rc; > + > + if (!test_bit(IDXD_FLAG_IMS_SUPPORTED, &idxd->flags)) > + return -EOPNOTSUPP; > + > + if (iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX)) { > + rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_AUX); Huh. This is the first user of IOMMU_DEV_FEAT_AUX, why has so much dead-code infrastructure been already merged around this? > @@ -34,6 +1024,7 @@ static int idxd_mdev_aux_probe(struct auxiliary_device *auxdev, > return rc; > } > > + set_bit(IDXD_FLAG_MDEV_ENABLED, &idxd->flags); Something is being done wrong if this flag is needed > +int vidxd_send_interrupt(struct ims_irq_entry *iie) > +{ > + /* PLACE HOLDER */ > + return 0; > +} Here too, don't structure the patches like this > diff --git a/drivers/vfio/mdev/idxd/vdev.h b/drivers/vfio/mdev/idxd/vdev.h > new file mode 100644 > index 000000000000..cc2ba6ccff7b > +++ b/drivers/vfio/mdev/idxd/vdev.h > @@ -0,0 +1,19 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* Copyright(c) 2019,2020 Intel Corporation. All rights rsvd. */ > + > +#ifndef _IDXD_VDEV_H_ > +#define _IDXD_VDEV_H_ > + > +#include "mdev.h" > + > +int vidxd_mmio_read(struct vdcm_idxd *vidxd, u64 pos, void *buf, unsigned int size); > +int vidxd_mmio_write(struct vdcm_idxd *vidxd, u64 pos, void *buf, unsigned int size); > +int vidxd_cfg_read(struct vdcm_idxd *vidxd, unsigned int pos, void *buf, unsigned int count); > +int vidxd_cfg_write(struct vdcm_idxd *vidxd, unsigned int pos, void *buf, unsigned int size); > +void vidxd_mmio_init(struct vdcm_idxd *vidxd); > +void vidxd_reset(struct vdcm_idxd *vidxd); > +int vidxd_send_interrupt(struct ims_irq_entry *iie); > +int vidxd_setup_ims_entries(struct vdcm_idxd *vidxd); > +void vidxd_free_ims_entries(struct vdcm_idxd *vidxd); Why are these functions special?? Jason