Am 2021-10-06 um 10:32 a.m. schrieb Philip Yang: > migrate_vma_setup may return cpages 0, means 0 page can be migrated, > treat this as error case to skip the rest of migration steps, and don't > change prange actual loc, to avoid warning message "VRAM BO missing > during validation". > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 48 ++++++++++++++---------- > 1 file changed, 29 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > index 069422337cf7..9b68e3e8f2a1 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > @@ -409,20 +409,25 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, > r, prange->svms, prange->start, prange->last); > goto out_free; > } > - if (migrate.cpages != npages) { > - pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n", > - migrate.cpages, > - npages); > - } > > - if (migrate.cpages) { > - r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, > - scratch); > - migrate_vma_pages(&migrate); > - svm_migrate_copy_done(adev, mfence); > - migrate_vma_finalize(&migrate); > + if (migrate.cpages != npages) > + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", > + migrate.cpages, npages); > + else > + pr_debug("0x%lx pages migrated\n", migrate.cpages); > + > + if (!migrate.cpages) { > + pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n", > + prange->start, prange->last); > + r = -ENOMEM; I think just returning an error here is incorrect. This error gets handled in svm_migrate_ram_to_vram and prevents the following VMAs from migrating as well (if the range spans multiple VMAs). Maybe return the number of pages migrated, if successful. Then the caller can add up all the successful migrations and update prange->actual_loc only if the total is > 0. Regards, Felix > + goto out_free; > } > > + r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch); > + migrate_vma_pages(&migrate); > + svm_migrate_copy_done(adev, mfence); > + migrate_vma_finalize(&migrate); > + > svm_range_dma_unmap(adev->dev, scratch, 0, npages); > svm_range_free_dma_mappings(prange); > > @@ -636,19 +641,24 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, > goto out_free; > } > > - pr_debug("cpages %ld\n", migrate.cpages); > + if (migrate.cpages != npages) > + pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", > + migrate.cpages, npages); > + else > + pr_debug("0x%lx pages migrated\n", migrate.cpages); > > - if (migrate.cpages) { > - r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, > - scratch, npages); > - migrate_vma_pages(&migrate); > - svm_migrate_copy_done(adev, mfence); > - migrate_vma_finalize(&migrate); > - } else { > + if (!migrate.cpages) { > pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", > prange->start, prange->last); > + r = -ENOMEM; > + goto out_free; > } > > + r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, > + scratch, npages); > + migrate_vma_pages(&migrate); > + svm_migrate_copy_done(adev, mfence); > + migrate_vma_finalize(&migrate); > svm_range_dma_unmap(adev->dev, scratch, 0, npages); > > out_free: