Am 2021-10-12 um 9:55 a.m. schrieb Philip Yang: > No function change, use pr_debug_ratelimited to avoid per page debug > message overflowing dmesg buf and console log. > > use dev_err to show error message from unexpected situation, to provide > clue to help debug without enabling dynamic debug log. Define dev_fmt to > output function name in error message. > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 34 +++++++++++++----------- > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +++++++----- > 2 files changed, 30 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > index f53e17a94ad8..b05c0579d0b9 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > @@ -20,7 +20,6 @@ > * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > * OTHER DEALINGS IN THE SOFTWARE. > */ > - > #include <linux/types.h> > #include <linux/hmm.h> > #include <linux/dma-direction.h> > @@ -34,6 +33,11 @@ > #include "kfd_svm.h" > #include "kfd_migrate.h" > > +#ifdef dev_fmt > +#undef dev_fmt > +#endif > +#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__ > + > static uint64_t > svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) > { > @@ -151,14 +155,14 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, > gart_d = svm_migrate_direct_mapping_addr(adev, *vram); > } > if (r) { > - pr_debug("failed %d to create gart mapping\n", r); > + dev_err(adev->dev, "fail %d create gart mapping\n", r); > goto out_unlock; > } > > r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, > NULL, &next, false, true, false); > if (r) { > - pr_debug("failed %d to copy memory\n", r); > + dev_err(adev->dev, "fail %d to copy memory\n", r); > goto out_unlock; > } > > @@ -285,7 +289,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, > > r = svm_range_vram_node_new(adev, prange, true); > if (r) { > - pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); > + dev_err(adev->dev, "fail %d to alloc vram\n", r); > goto out; > } > > @@ -305,7 +309,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, > DMA_TO_DEVICE); > r = dma_mapping_error(dev, src[i]); > if (r) { > - pr_debug("failed %d dma_map_page\n", r); > + dev_err(adev->dev, "fail %d dma_map_page\n", r); > goto out_free_vram_pages; > } > } else { > @@ -325,8 +329,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, > continue; > } > > - pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", > - src[i] >> PAGE_SHIFT, page_to_pfn(spage)); > + pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n", > + src[i] >> PAGE_SHIFT, page_to_pfn(spage)); > > if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) { > r = svm_migrate_copy_memory_gart(adev, src + i - j, > @@ -405,8 +409,8 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, > > r = migrate_vma_setup(&migrate); > if (r) { > - pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", > - r, prange->svms, prange->start, prange->last); > + dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, > + prange->start, prange->last); > goto out_free; > } > if (migrate.cpages != npages) { > @@ -506,7 +510,7 @@ static void svm_migrate_page_free(struct page *page) > struct svm_range_bo *svm_bo = page->zone_device_data; > > if (svm_bo) { > - pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref)); > + pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref)); > svm_range_bo_unref(svm_bo); > } > } > @@ -572,12 +576,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, > dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); > r = dma_mapping_error(dev, dst[i]); > if (r) { > - pr_debug("failed %d dma_map_page\n", r); > + dev_err(adev->dev, "fail %d dma_map_page\n", r); > goto out_oom; > } > > - pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n", > - dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); > + pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n", > + dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); > > migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); > migrate->dst[i] |= MIGRATE_PFN_LOCKED; > @@ -631,8 +635,8 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, > > r = migrate_vma_setup(&migrate); > if (r) { > - pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", > - r, prange->svms, prange->start, prange->last); > + dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, > + prange->start, prange->last); > goto out_free; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index 425d55deca10..49c92713c2ad 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -33,6 +33,11 @@ > #include "kfd_svm.h" > #include "kfd_migrate.h" > > +#ifdef dev_fmt > +#undef dev_fmt > +#endif > +#define dev_fmt(fmt) "kfd_svm: %s: " fmt, __func__ > + > #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 > > /* Long enough to ensure no retry fault comes after svm range is restored and > @@ -158,17 +163,17 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, > bo_adev->vm_manager.vram_base_offset - > bo_adev->kfd.dev->pgmap.range.start; > addr[i] |= SVM_RANGE_VRAM_DOMAIN; > - pr_debug("vram address detected: 0x%llx\n", addr[i]); > + pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]); > continue; > } > addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); > r = dma_mapping_error(dev, addr[i]); > if (r) { > - pr_debug("failed %d dma_map_page\n", r); > + dev_err(dev, "failed %d dma_map_page\n", r); > return r; > } > - pr_debug("dma mapping 0x%llx for page addr 0x%lx\n", > - addr[i] >> PAGE_SHIFT, page_to_pfn(page)); > + pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", > + addr[i] >> PAGE_SHIFT, page_to_pfn(page)); > } > return 0; > } > @@ -217,7 +222,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, > for (i = offset; i < offset + npages; i++) { > if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i])) > continue; > - pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); > + pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); > dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); > dma_addr[i] = 0; > } > @@ -1454,7 +1459,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, > /* This should never happen. actual_loc gets set by > * svm_migrate_ram_to_vram after allocating a BO. > */ > - WARN(1, "VRAM BO missing during validation\n"); > + WARN_ONCE(1, "VRAM BO missing during validation\n"); > return -EINVAL; > } >