On Tue, 9 Mar 2021 19:41:27 -0400 Jason Gunthorpe <jgg@xxxxxxxxxx> wrote: > On Tue, Mar 09, 2021 at 12:26:07PM -0700, Alex Williamson wrote: > > > In the new series, I think the fault handler becomes (untested): > > > > static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) > > { > > struct vm_area_struct *vma = vmf->vma; > > struct vfio_pci_device *vdev = vma->vm_private_data; > > unsigned long base_pfn, pgoff; > > vm_fault_t ret = VM_FAULT_SIGBUS; > > > > if (vfio_pci_bar_vma_to_pfn(vma, &base_pfn)) > > return ret; > > > > pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; > > I don't think this math is completely safe, it needs to parse the > vm_pgoff.. > > I'm worried userspace could split/punch/mangle a VMA using > munmap/mremap/etc/etc in a way that does update the pg_off but is > incompatible with the above. parsing vm_pgoff is done in: static int vfio_pci_bar_vma_to_pfn(struct vm_area_struct *vma, unsigned long *pfn) { struct vfio_pci_device *vdev = vma->vm_private_data; struct pci_dev *pdev = vdev->pdev; int index; u64 pgoff; index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); if (index >= VFIO_PCI_ROM_REGION_INDEX || !vdev->bar_mmap_supported[index] || !vdev->barmap[index]) return -EINVAL; pgoff = vma->vm_pgoff & ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); *pfn = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; return 0; } But given Peter's concern about faulting individual pages, I think the fault handler becomes: static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_device *vdev = vma->vm_private_data; unsigned long vaddr, pfn; vm_fault_t ret = VM_FAULT_SIGBUS; if (vfio_pci_bar_vma_to_pfn(vma, &pfn)) return ret; down_read(&vdev->memory_lock); if (__vfio_pci_memory_enabled(vdev)) { for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE, pfn++) { ret = vmf_insert_pfn_prot(vma, vaddr, pfn, pgprot_decrypted(vma->vm_page_prot)); if (ret != VM_FAULT_NOPAGE) { zap_vma_ptes(vma, vma->vm_start, vaddr - vma->vm_start); break; } } } up_read(&vdev->memory_lock); return ret; } Thanks, Alex