Hi Eric, > > -----Original Message----- > > From: Eric Auger [mailto:eric.auger@xxxxxxxxxx] > > Sent: 16 November 2020 11:00 > > To: eric.auger.pro@xxxxxxxxx; eric.auger@xxxxxxxxxx; > > iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; > > kvm@xxxxxxxxxxxxxxx; kvmarm@xxxxxxxxxxxxxxxxxxxxx; will@xxxxxxxxxx; > > joro@xxxxxxxxxx; maz@xxxxxxxxxx; robin.murphy@xxxxxxx; > > alex.williamson@xxxxxxxxxx > > Cc: jean-philippe@xxxxxxxxxx; zhangfei.gao@xxxxxxxxxx; > > zhangfei.gao@xxxxxxxxx; vivek.gautam@xxxxxxx; Shameerali Kolothum > > Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>; > > jacob.jun.pan@xxxxxxxxxxxxxxx; yi.l.liu@xxxxxxxxx; tn@xxxxxxxxxxxx; > > nicoleotsuka@xxxxxxxxx; yuzenghui <yuzenghui@xxxxxxxxxx> > > Subject: [PATCH v11 12/13] vfio/pci: Register a DMA fault response > > region > > > > In preparation for vSVA, let's register a DMA fault response region, > > where the userspace will push the page responses and increment the > > head of the buffer. The kernel will pop those responses and inject > > them on iommu side. > > > > Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx> > > --- > > drivers/vfio/pci/vfio_pci.c | 114 +++++++++++++++++++++++++--- > > drivers/vfio/pci/vfio_pci_private.h | 5 ++ > > drivers/vfio/pci/vfio_pci_rdwr.c | 39 ++++++++++ > > include/uapi/linux/vfio.h | 32 ++++++++ > > 4 files changed, 181 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c > > index 65a83fd0e8c0..e9a904ce3f0d 100644 > > --- a/drivers/vfio/pci/vfio_pci.c > > +++ b/drivers/vfio/pci/vfio_pci.c > > @@ -318,9 +318,20 @@ static void vfio_pci_dma_fault_release(struct > > vfio_pci_device *vdev, > > kfree(vdev->fault_pages); > > } > > > > -static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev, > > - struct vfio_pci_region *region, > > - struct vm_area_struct *vma) > > +static void > > +vfio_pci_dma_fault_response_release(struct vfio_pci_device *vdev, > > + struct vfio_pci_region *region) { > > + if (vdev->dma_fault_response_wq) > > + destroy_workqueue(vdev->dma_fault_response_wq); > > + kfree(vdev->fault_response_pages); > > + vdev->fault_response_pages = NULL; > > +} > > + > > +static int __vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev, > > + struct vfio_pci_region *region, > > + struct vm_area_struct *vma, > > + u8 *pages) > > { > > u64 phys_len, req_len, pgoff, req_start; > > unsigned long long addr; > > @@ -333,14 +344,14 @@ static int vfio_pci_dma_fault_mmap(struct > > vfio_pci_device *vdev, > > ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); > > req_start = pgoff << PAGE_SHIFT; > > > > - /* only the second page of the producer fault region is mmappable */ > > + /* only the second page of the fault region is mmappable */ > > if (req_start < PAGE_SIZE) > > return -EINVAL; > > > > if (req_start + req_len > phys_len) > > return -EINVAL; > > > > - addr = virt_to_phys(vdev->fault_pages); > > + addr = virt_to_phys(pages); > > vma->vm_private_data = vdev; > > vma->vm_pgoff = (addr >> PAGE_SHIFT) + pgoff; > > > > @@ -349,13 +360,29 @@ static int vfio_pci_dma_fault_mmap(struct > > vfio_pci_device *vdev, > > return ret; > > } > > > > -static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev, > > - struct vfio_pci_region *region, > > - struct vfio_info_cap *caps) > > +static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev, > > + struct vfio_pci_region *region, > > + struct vm_area_struct *vma) > > +{ > > + return __vfio_pci_dma_fault_mmap(vdev, region, vma, > > vdev->fault_pages); > > +} > > + > > +static int > > +vfio_pci_dma_fault_response_mmap(struct vfio_pci_device *vdev, > > + struct vfio_pci_region *region, > > + struct vm_area_struct *vma) > > +{ > > + return __vfio_pci_dma_fault_mmap(vdev, region, vma, > > vdev->fault_response_pages); > > +} > > + > > +static int __vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev, > > + struct vfio_pci_region *region, > > + struct vfio_info_cap *caps, > > + u32 cap_id) > > { > > struct vfio_region_info_cap_sparse_mmap *sparse = NULL; > > struct vfio_region_info_cap_fault cap = { > > - .header.id = VFIO_REGION_INFO_CAP_DMA_FAULT, > > + .header.id = cap_id, > > .header.version = 1, > > .version = 1, > > }; > > @@ -383,6 +410,14 @@ static int > > vfio_pci_dma_fault_add_capability(struct > > vfio_pci_device *vdev, > > return ret; > > } > > > > +static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev, > > + struct vfio_pci_region *region, > > + struct vfio_info_cap *caps) { > > + return __vfio_pci_dma_fault_add_capability(vdev, region, caps, > > + VFIO_REGION_INFO_CAP_DMA_FAULT); } > > + > > static const struct vfio_pci_regops vfio_pci_dma_fault_regops = { > > .rw = vfio_pci_dma_fault_rw, > > .release = vfio_pci_dma_fault_release, > > @@ -390,6 +425,13 @@ static const struct vfio_pci_regops > > vfio_pci_dma_fault_regops = { > > .add_capability = vfio_pci_dma_fault_add_capability, > > }; > > > > +static const struct vfio_pci_regops vfio_pci_dma_fault_response_regops = { > > + .rw = vfio_pci_dma_fault_response_rw, > > + .release = vfio_pci_dma_fault_response_release, > > + .mmap = vfio_pci_dma_fault_response_mmap, > > + .add_capability = vfio_pci_dma_fault_add_capability, As I mentioned in the Qemu patch ([RFC v7 26/26] vfio/pci: Implement return_page_response page response callback), it looks like we are using the VFIO_REGION_INFO_CAP_DMA_FAULT cap id for the dma_fault_response here as well. Is that intentional? (Was wondering how it worked in the first place and noted this). Please check. Thanks, Shameer