On Tue, Mar 09, 2021 at 12:56:39PM -0700, Alex Williamson wrote: > And I think this is what we end up with for the current code base: Yeah, that looks Ok > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c > index 65e7e6b44578..2f247ab18c66 100644 > +++ b/drivers/vfio/pci/vfio_pci.c > @@ -1568,19 +1568,24 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev, u16 cmd) > } > > /* Caller holds vma_lock */ > -static int __vfio_pci_add_vma(struct vfio_pci_device *vdev, > - struct vm_area_struct *vma) > +struct vfio_pci_mmap_vma *__vfio_pci_add_vma(struct vfio_pci_device *vdev, > + struct vm_area_struct *vma) > { > struct vfio_pci_mmap_vma *mmap_vma; > > + list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { > + if (mmap_vma->vma == vma) > + return ERR_PTR(-EEXIST); > + } > + > mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL); > if (!mmap_vma) > - return -ENOMEM; > + return ERR_PTR(-ENOMEM); > > mmap_vma->vma = vma; > list_add(&mmap_vma->vma_next, &vdev->vma_list); > > - return 0; > + return mmap_vma; > } > > /* > @@ -1612,30 +1617,39 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) > { > struct vm_area_struct *vma = vmf->vma; > struct vfio_pci_device *vdev = vma->vm_private_data; > - vm_fault_t ret = VM_FAULT_NOPAGE; > + struct vfio_pci_mmap_vma *mmap_vma; > + unsigned long vaddr, pfn; > + vm_fault_t ret; > > mutex_lock(&vdev->vma_lock); > down_read(&vdev->memory_lock); > > if (!__vfio_pci_memory_enabled(vdev)) { > ret = VM_FAULT_SIGBUS; > - mutex_unlock(&vdev->vma_lock); > goto up_out; > } > > - if (__vfio_pci_add_vma(vdev, vma)) { > - ret = VM_FAULT_OOM; > - mutex_unlock(&vdev->vma_lock); > + mmap_vma = __vfio_pci_add_vma(vdev, vma); > + if (IS_ERR(mmap_vma)) { > + /* A concurrent fault might have already inserted the page */ > + ret = (PTR_ERR(mmap_vma) == -EEXIST) ? VM_FAULT_NOPAGE : > + VM_FAULT_OOM; I think -EEIXST should not be an error, lets just go down to the vmf_insert_pfn() and let the MM resolve the race naturally. I suspect returning VM_FAULT_NOPAGE will be averse to the userspace if it hits this race?? Also the _prot does look needed at least due to the SME, but possibly also to ensure NC gets set.. Jason