In order to avoid having userspace to use MI_MEM_FENCE, we are adding a mechanism for userspace to generate a PCI memory barrier with low overhead (avoiding IOCTL call). This is implemented by memory-mapping a page as uncached that is backed by MMIO on the dGPU and thus allowing userspace to do memory write to the page without invoking an IOCTL. We are selecting the MMIO so that it is not accessible from the PCI bus so that the MMIO writes themselves are ignored, but the PCI memory barrier will still take action as the MMIO filtering will happen after the memory barrier effect. When we detect special defined offset in mmap(), We are mapping 4K page which contains the last of page of doorbell MMIO range to userspace for same purpose. Note: Test coverage for this is added by IGT https://patchwork.freedesktop.org/patch/618931/ here. Signed-off-by: Tejas Upadhyay <tejas.upadhyay@xxxxxxxxx> --- drivers/gpu/drm/xe/xe_device.c | 68 +++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index cd241a8e1838..c97a4d1f0a98 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -239,12 +239,78 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo #define xe_drm_compat_ioctl NULL #endif +static void barrier_open(struct vm_area_struct *vma) +{ + drm_dev_get(vma->vm_private_data); +} + +static void barrier_close(struct vm_area_struct *vma) +{ + drm_dev_put(vma->vm_private_data); +} + +static const struct vm_operations_struct vm_ops_barrier = { + .open = barrier_open, + .close = barrier_close, +}; + +static int xe_pci_barrier_mmap(struct file *filp, + struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + unsigned long pfn; + pgprot_t prot; + + if (vma->vm_end - vma->vm_start > PAGE_SIZE) + return -EINVAL; + + if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + + if (vma->vm_flags & (VM_READ | VM_EXEC)) + return -EINVAL; + + vm_flags_clear(vma, VM_MAYREAD | VM_MAYEXEC); + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); + + prot = vm_get_page_prot(vma->vm_flags); +#define LAST_DB_PAGE_OFFSET 0x7ff001 + pfn = PHYS_PFN(pci_resource_start(to_pci_dev(dev->dev), 0) + + LAST_DB_PAGE_OFFSET); + if (vmf_insert_pfn_prot(vma, vma->vm_start, pfn, + pgprot_noncached(prot)) != VM_FAULT_NOPAGE) + return -EFAULT; + + vma->vm_ops = &vm_ops_barrier; + vma->vm_private_data = dev; + drm_dev_get(vma->vm_private_data); + return 0; +} + +static int xe_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + +#define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << PAGE_SHIFT) + switch (vma->vm_pgoff) { + case XE_PCI_BARRIER_MMAP_OFFSET >> PAGE_SHIFT: + return xe_pci_barrier_mmap(filp, vma); + } + + return drm_gem_mmap(filp, vma); +} + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release_noglobal, .unlocked_ioctl = xe_drm_ioctl, - .mmap = drm_gem_mmap, + .mmap = xe_mmap, .poll = drm_poll, .read = drm_read, .compat_ioctl = xe_drm_compat_ioctl, -- 2.34.1