On Tue, 5 Jan 2021 07:36:53 -0800 Steve Sistare <steven.sistare@xxxxxxxxxx> wrote: > Block translation of host virtual address while an iova range is suspended. > > Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> > --- > drivers/vfio/vfio_iommu_type1.c | 48 ++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 45 insertions(+), 3 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 2c164a6..8035b9a 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -31,6 +31,8 @@ > #include <linux/rbtree.h> > #include <linux/sched/signal.h> > #include <linux/sched/mm.h> > +#include <linux/delay.h> > +#include <linux/kthread.h> > #include <linux/slab.h> > #include <linux/uaccess.h> > #include <linux/vfio.h> > @@ -484,6 +486,34 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, > return ret; > } > > +static bool vfio_iommu_contained(struct vfio_iommu *iommu) > +{ > + struct vfio_domain *domain = iommu->external_domain; > + struct vfio_group *group; > + > + if (!domain) > + domain = list_first_entry(&iommu->domain_list, > + struct vfio_domain, next); > + > + group = list_first_entry(&domain->group_list, struct vfio_group, next); > + return vfio_iommu_group_contained(group->iommu_group); > +} This seems really backwards for a vfio iommu backend to ask vfio-core whether its internal object is active. > + > + > +bool vfio_vaddr_valid(struct vfio_iommu *iommu, struct vfio_dma *dma) > +{ > + while (dma->suspended) { > + mutex_unlock(&iommu->lock); > + msleep_interruptible(10); > + mutex_lock(&iommu->lock); > + if (kthread_should_stop() || !vfio_iommu_contained(iommu) || > + fatal_signal_pending(current)) { > + return false; > + } > + } > + return true; > +} > + > /* > * Attempt to pin pages. We really don't want to track all the pfns and > * the iommu can only map chunks of consecutive pfns anyway, so get the > @@ -690,6 +720,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > continue; > } > > + if (!vfio_vaddr_valid(iommu, dma)) { > + ret = -EFAULT; > + goto pin_unwind; > + } This could have dropped iommu->lock, we have no basis to believe our vfio_dma object is still valid. Also this is called with an elevated reference to the container, so we theoretically should not need to test whether the iommu is still contained. > + > remote_vaddr = dma->vaddr + (iova - dma->iova); > ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], > do_accounting); > @@ -1514,12 +1549,16 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, > i += PAGE_SIZE; > } > } else { > - unsigned long pfn; > - unsigned long vaddr = dma->vaddr + > - (iova - dma->iova); > + unsigned long pfn, vaddr; > size_t n = dma->iova + dma->size - iova; > long npage; > > + if (!vfio_vaddr_valid(iommu, dma)) { > + ret = -EFAULT; > + goto unwind; > + } This one is even scarier, do we have any basis to assume either object is still valid after iommu->lock is released? We can only get here if we're attaching a group to the iommu with suspended vfio_dmas. Do we need to allow that? It seems we could -EAGAIN and let the user figure out that they can't attach a group while mappings have invalid vaddrs. > + vaddr = dma->vaddr + (iova - dma->iova); > + > npage = vfio_pin_pages_remote(dma, vaddr, > n >> PAGE_SHIFT, > &pfn, limit); > @@ -2965,6 +3004,9 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, > if (count > dma->size - offset) > count = dma->size - offset; > > + if (!vfio_vaddr_valid(iommu, dma)) Same as the pinning path, this should hold a container user reference but we cannot assume our vfio_dma object is valid if we've released the lock. Thanks, Alex > + goto out; > + > vaddr = dma->vaddr + offset; > > if (write) {