Am 2021-08-19 um 10:56 a.m. schrieb Philip Yang: > Restore retry fault or prefetch range, or restore svm range after > eviction to map range to GPU with correct read or write access > permission. > > Range may includes multiple VMAs, update GPU page table with offset of > prange, number of pages for each VMA according VMA access permission. > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Minor nitpicks, and one question. See inline. It looks good otherwise. > --- > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 131 +++++++++++++++++---------- > 1 file changed, 84 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index cf1009bb532a..94612581963f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange) > > static int > svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, > + unsigned long offset, unsigned long npages, > unsigned long *hmm_pfns, uint32_t gpuidx) > { > enum dma_data_direction dir = DMA_BIDIRECTIONAL; > @@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, > prange->dma_addr[gpuidx] = addr; > } > > - for (i = 0; i < prange->npages; i++) { > + addr += offset; > + for (i = 0; i < npages; i++) { > if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), > "leaking dma mapping\n")) > dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); > @@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, > > static int > svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, > + unsigned long offset, unsigned long npages, > unsigned long *hmm_pfns) > { > struct kfd_process *p; > @@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, > } > adev = (struct amdgpu_device *)pdd->dev->kgd; > > - r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx); > + r = svm_range_dma_map_dev(adev, prange, offset, npages, > + hmm_pfns, gpuidx); > if (r) > break; > } > @@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, > pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; > > pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); > - > - pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", > - prange->svms, prange->start, prange->last, > - (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags); > - > return pte_flags; > } > > @@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, > > static int > svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, > - struct svm_range *prange, dma_addr_t *dma_addr, > + struct svm_range *prange, unsigned long offset, > + unsigned long npages, bool readonly, dma_addr_t *dma_addr, > struct amdgpu_device *bo_adev, struct dma_fence **fence) > { > struct amdgpu_bo_va bo_va; > @@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, > int r = 0; > int64_t i; > > - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, > - prange->last); > + last_start = prange->start + offset; > + > + pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, > + last_start, last_start + npages - 1, readonly); > > if (prange->svm_bo && prange->ttm_res) > bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); > > - last_start = prange->start; > - for (i = 0; i < prange->npages; i++) { > + for (i = offset; i < offset + npages; i++) { > last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; > dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; > if ((prange->start + i) < prange->last && > @@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, > > pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", > last_start, prange->start + i, last_domain ? "GPU" : "CPU"); > + > pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); > - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, > - last_start, > + if (readonly) > + pte_flags &= ~AMDGPU_PTE_WRITEABLE; > + > + pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n", > + prange->svms, last_start, prange->start + i, > + (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, > + pte_flags); > + > + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, > + NULL, last_start, > prange->start + i, pte_flags, > last_start - prange->start, > - NULL, > - dma_addr, > + NULL, dma_addr, > &vm->last_update, > &table_freed); > if (r) { > @@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, > return r; > } > > -static int svm_range_map_to_gpus(struct svm_range *prange, > - unsigned long *bitmap, bool wait) > +static int > +svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, > + unsigned long npages, bool readonly, > + unsigned long *bitmap, bool wait) > { > struct kfd_process_device *pdd; > struct amdgpu_device *bo_adev; > @@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange, > } > > r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), > - prange, prange->dma_addr[gpuidx], > + prange, offset, npages, readonly, > + prange->dma_addr[gpuidx], > bo_adev, wait ? &fence : NULL); > if (r) > break; > @@ -1390,6 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, > int32_t gpuidx, bool intr, bool wait) > { > struct svm_validate_context ctx; > + unsigned long start, end, addr; > struct hmm_range *hmm_range; > struct kfd_process *p; > void *owner; > @@ -1448,40 +1461,64 @@ static int svm_range_validate_and_map(struct mm_struct *mm, > break; > } > } > - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, > - prange->start << PAGE_SHIFT, > - prange->npages, &hmm_range, > - false, true, owner); > - if (r) { > - pr_debug("failed %d to get svm range pages\n", r); > - goto unreserve_out; > - } > > - r = svm_range_dma_map(prange, ctx.bitmap, > - hmm_range->hmm_pfns); > - if (r) { > - pr_debug("failed %d to dma map range\n", r); > - goto unreserve_out; > - } > + start = prange->start << PAGE_SHIFT; > + end = (prange->last + 1) << PAGE_SHIFT; > + for (addr = start; addr < end && !r; ) { > + struct vm_area_struct *vma; > + unsigned long next; > + unsigned long offset; > + unsigned long npages; > + bool readonly; > > - prange->validated_once = true; > + vma = find_vma(mm, addr); > + if (!vma || addr < vma->vm_start) { > + r = -EINVAL; I think -EFAULT would be the appropriate error code here. > + goto unreserve_out; > + } > + readonly = !(vma->vm_flags & VM_WRITE); > > - svm_range_lock(prange); > - if (amdgpu_hmm_range_get_pages_done(hmm_range)) { > - pr_debug("hmm update the range, need validate again\n"); > - r = -EAGAIN; > - goto unlock_out; > - } > - if (!list_empty(&prange->child_list)) { > - pr_debug("range split by unmap in parallel, validate again\n"); > - r = -EAGAIN; > - goto unlock_out; > - } > + next = min(vma->vm_end, end); > + npages = (next - addr) / PAGE_SIZE; Use >> PAGE_SHIFT for consistency. > + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, > + addr, npages, &hmm_range, > + readonly, true, owner); > + if (r) { > + pr_debug("failed %d to get svm range pages\n", r); > + goto unreserve_out; > + } > > - r = svm_range_map_to_gpus(prange, ctx.bitmap, wait); > + offset = (addr - start) / PAGE_SIZE; >> PAGE_SHIFT > + r = svm_range_dma_map(prange, ctx.bitmap, offset, npages, > + hmm_range->hmm_pfns); > + if (r) { > + pr_debug("failed %d to dma map range\n", r); > + goto unreserve_out; > + } > + > + svm_range_lock(prange); > + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { > + pr_debug("hmm update the range, need validate again\n"); > + r = -EAGAIN; > + goto unlock_out; > + } > + if (!list_empty(&prange->child_list)) { > + pr_debug("range split by unmap in parallel, validate again\n"); > + r = -EAGAIN; > + goto unlock_out; > + } > + > + r = svm_range_map_to_gpus(prange, offset, npages, readonly, > + ctx.bitmap, wait); > > unlock_out: > - svm_range_unlock(prange); > + svm_range_unlock(prange); > + > + addr = next; > + } > + > + prange->validated_once = true; Should this be conditional on "!r"? Regards, Felix > + > unreserve_out: > svm_range_unreserve_bos(&ctx); >