On 10/12/2016 4:01 PM, Tian, Kevin wrote: >> From: Kirti Wankhede [mailto:kwankhede@xxxxxxxxxx] >> Sent: Tuesday, October 11, 2016 4:29 AM >> > [...] >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index 2ba19424e4a1..ce6d6dcbd9a8 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -55,18 +55,26 @@ MODULE_PARM_DESC(disable_hugepages, >> >> struct vfio_iommu { >> struct list_head domain_list; >> + struct vfio_domain *local_domain; > > Hi, Kirti, can you help explain the meaning of 'local" here? I have a hard time > to understand its intention... In your later change of vaddr_get_pfn, it's > even more confusing where get_user_pages_remote is used on a 'local_mm': > 'local' in local_domain is to describe that the domain for local page tracking. 'local_mm' in vaddr_get_pfn() is local variable in vaddr_get_pfn() function. struct mm_struct *local_mm = (mm ? mm : current->mm); > + if (mm) { > + down_read(&local_mm->mmap_sem); > + ret = get_user_pages_remote(NULL, local_mm, vaddr, 1, > + !!(prot & IOMMU_WRITE), 0, page, NULL); > + up_read(&local_mm->mmap_sem); > + } else > + ret = get_user_pages_fast(vaddr, 1, > + !!(prot & IOMMU_WRITE), page); > > > [...] >> -static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn) >> +static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, >> + int prot, unsigned long *pfn) >> { >> struct page *page[1]; >> struct vm_area_struct *vma; >> + struct mm_struct *local_mm = (mm ? mm : current->mm); > > it'd be clearer if you call this variable as 'mm' while the earlier input parameter > as 'local_mm'. > Like I mentioned above, 'local' here is for local variable in this function. >> int ret = -EFAULT; >> >> - if (get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), page) == 1) { >> + if (mm) { >> + down_read(&local_mm->mmap_sem); >> + ret = get_user_pages_remote(NULL, local_mm, vaddr, 1, >> + !!(prot & IOMMU_WRITE), 0, page, NULL); >> + up_read(&local_mm->mmap_sem); >> + } else >> + ret = get_user_pages_fast(vaddr, 1, >> + !!(prot & IOMMU_WRITE), page); >> + >> + if (ret == 1) { >> *pfn = page_to_pfn(page[0]); >> return 0; >> } >> >> - down_read(¤t->mm->mmap_sem); >> + down_read(&local_mm->mmap_sem); >> >> - vma = find_vma_intersection(current->mm, vaddr, vaddr + 1); >> + vma = find_vma_intersection(local_mm, vaddr, vaddr + 1); >> >> if (vma && vma->vm_flags & VM_PFNMAP) { >> *pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; > > [...] >> +static long __vfio_pin_pages_local(struct vfio_domain *domain, >> + unsigned long vaddr, int prot, >> + unsigned long *pfn_base, >> + bool do_accounting) > > 'pages' -> 'page' since only one page is handled here. > > [...] >> + >> +static void __vfio_unpin_pages_local(struct vfio_domain *domain, >> + unsigned long pfn, int prot, >> + bool do_accounting) > > ditto > Ok. >> +{ >> + put_pfn(pfn, prot); >> + >> + if (do_accounting) >> + vfio_lock_acct(domain->local_addr_space->task, -1); >> +} >> + >> +static int vfio_unpin_pfn(struct vfio_domain *domain, >> + struct vfio_pfn *vpfn, bool do_accounting) >> +{ >> + __vfio_unpin_pages_local(domain, vpfn->pfn, vpfn->prot, >> + do_accounting); >> + >> + if (atomic_dec_and_test(&vpfn->ref_count)) >> + vfio_remove_from_pfn_list(domain, vpfn); >> + >> + return 1; >> +} >> + >> +static long vfio_iommu_type1_pin_pages(void *iommu_data, >> + unsigned long *user_pfn, >> + long npage, int prot, >> + unsigned long *phys_pfn) >> +{ >> + struct vfio_iommu *iommu = iommu_data; >> + struct vfio_domain *domain; >> + int i, j, ret; >> + long retpage; >> + unsigned long remote_vaddr; >> + unsigned long *pfn = phys_pfn; >> + struct vfio_dma *dma; >> + bool do_accounting = false; >> + >> + if (!iommu || !user_pfn || !phys_pfn) >> + return -EINVAL; >> + >> + mutex_lock(&iommu->lock); >> + >> + if (!iommu->local_domain) { >> + ret = -EINVAL; >> + goto pin_done; >> + } >> + >> + domain = iommu->local_domain; >> + >> + /* >> + * If iommu capable domain exist in the container then all pages are >> + * already pinned and accounted. Accouting should be done if there is no >> + * iommu capable domain in the container. >> + */ >> + do_accounting = !IS_IOMMU_CAPABLE_DOMAIN_IN_CONTAINER(iommu); >> + >> + for (i = 0; i < npage; i++) { >> + struct vfio_pfn *p; >> + dma_addr_t iova; >> + >> + iova = user_pfn[i] << PAGE_SHIFT; >> + >> + dma = vfio_find_dma(iommu, iova, 0); >> + if (!dma) { >> + ret = -EINVAL; >> + goto pin_unwind; >> + } >> + >> + remote_vaddr = dma->vaddr + iova - dma->iova; > > again, why "remote"_vaddr on a 'local' function? > Not 'local' function, its local_domain. __vfio_pin_pages_local() pins pages for local_domain. When this function is called from other process, other than QEMU process, vaddr from QEMU process is remote_vaddr for caller. >> + >> + retpage = __vfio_pin_pages_local(domain, remote_vaddr, prot, >> + &pfn[i], do_accounting); >> + if (retpage <= 0) { >> + WARN_ON(!retpage); >> + ret = (int)retpage; >> + goto pin_unwind; >> + } >> + >> + mutex_lock(&domain->local_addr_space->pfn_list_lock); >> + >> + /* search if pfn exist */ >> + p = vfio_find_pfn(domain, pfn[i]); >> + if (p) { >> + atomic_inc(&p->ref_count); >> + mutex_unlock(&domain->local_addr_space->pfn_list_lock); >> + continue; >> + } >> + >> + ret = vfio_add_to_pfn_list(domain, remote_vaddr, iova, >> + pfn[i], prot); >> + mutex_unlock(&domain->local_addr_space->pfn_list_lock); >> + >> + if (ret) { >> + __vfio_unpin_pages_local(domain, pfn[i], prot, >> + do_accounting); >> + goto pin_unwind; >> + } >> + } >> + >> + ret = i; >> + goto pin_done; >> + >> +pin_unwind: >> + pfn[i] = 0; >> + mutex_lock(&domain->local_addr_space->pfn_list_lock); >> + for (j = 0; j < i; j++) { >> + struct vfio_pfn *p; >> + >> + p = vfio_find_pfn(domain, pfn[j]); >> + if (p) >> + vfio_unpin_pfn(domain, p, do_accounting); >> + >> + pfn[j] = 0; >> + } >> + mutex_unlock(&domain->local_addr_space->pfn_list_lock); >> + >> +pin_done: >> + mutex_unlock(&iommu->lock); >> + return ret; >> +} >> + >> +static long vfio_iommu_type1_unpin_pages(void *iommu_data, unsigned long *pfn, >> + long npage) >> +{ >> + struct vfio_iommu *iommu = iommu_data; >> + struct vfio_domain *domain = NULL; >> + long unlocked = 0; >> + int i; >> + >> + if (!iommu || !pfn) >> + return -EINVAL; >> + > > acquire iommu lock... > Yes, Alex has pointed this out and I'm going to fix it in v9. >> + domain = iommu->local_domain; >> + >> + for (i = 0; i < npage; i++) { >> + struct vfio_pfn *p; >> + >> + mutex_lock(&domain->local_addr_space->pfn_list_lock); >> + >> + /* verify if pfn exist in pfn_list */ >> + p = vfio_find_pfn(domain, pfn[i]); >> + if (p) >> + unlocked += vfio_unpin_pfn(domain, p, true); > > Should we force update accounting here even when there is iommu capable > domain? It's not consistent to earlier pin_pages. > Yes, fixing this. >> + >> + mutex_unlock(&domain->local_addr_space->pfn_list_lock); >> + } >> >> return unlocked; >> } >> @@ -341,6 +636,12 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct >> vfio_dma *dma) >> >> if (!dma->size) >> return; >> + >> + if (!IS_IOMMU_CAPABLE_DOMAIN_IN_CONTAINER(iommu)) >> + return; > > Is above check redundant to following dma->iommu_mapped? > I'm going to remove dma->iommu_mapped and changing accounting code as per Alex's comment and problem that Alex pointed out. Thanks, Kirti >> + >> + if (!dma->iommu_mapped) >> + return; >> /* >> * We use the IOMMU to track the physical addresses, otherwise we'd >> * need a much more complicated tracking system. Unfortunately that > > Thanks > Kevin > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html