To support better memory access performance on non-Large BAR devices, use SDMA copies instead of MM access. SDMA access is restricted to PAGE_SIZE'd access to account for the PTRACED process memory r/w operation use case. Any other access size will use MMIO. Failure to do an SDMA copy will result in a fallback to MM access. Note: This is an attempt readdress patch request 'drm/amdgpu: extend ttm memory access to do sdma copies' with the addition of restrictions and fallbacks. Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 97 +++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 23fc57506a20..1cb984252f58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1741,6 +1741,91 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, } } +/** + * amdgpu_ttm_access_memory_page_sdma - Read/write page of memory that backs a buffer object. + * + * @bo: The buffer object to read/write + * @offset: Offset into buffer object + * @buf: Secondary buffer to write/read from + * @write: true if writing + * + * This is used to access a page of VRAM that backs a buffer object via SDMA + * access for debugging purposes. + */ +static int amdgpu_ttm_access_memory_page_sdma(struct ttm_buffer_object *bo, + unsigned long offset, void *buf, + int write) +{ + struct amdgpu_bo *dst_bo, *abo = ttm_to_amdgpu_bo(bo); + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_copy_mem src, dst; + struct drm_gem_object *gobj; + struct dma_fence *fence; + struct page *dst_page; + struct ttm_tt *dst_ttm; + int ret; + + /* Create an SG BO to dma map the target buffer for direct copy. */ + ret = amdgpu_gem_object_create(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_CPU, + 0, ttm_bo_type_sg, NULL, &gobj); + if (ret) + return ret; + + dst_bo = gem_to_amdgpu_bo(gobj); + dst_ttm = dst_bo->tbo.ttm; + dst_ttm->sg = kmalloc(sizeof(*dst_ttm->sg), GFP_KERNEL); + if (unlikely(!dst_ttm->sg)) { + ret = -ENOMEM; + goto free_bo; + } + + dst_page = virt_to_page(buf); + ret = sg_alloc_table_from_pages(dst_ttm->sg, &dst_page, 1, 0, + 1 << PAGE_SHIFT, GFP_KERNEL); + if (unlikely(ret)) + goto free_sg; + + ret = dma_map_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0); + if (unlikely(ret)) + goto release_sg; + + drm_prime_sg_to_dma_addr_array(dst_ttm->sg, dst_ttm->dma_address, 1); + + amdgpu_bo_placement_from_domain(dst_bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&dst_bo->tbo, &dst_bo->placement, &ctx); + if (ret) + goto unmap_sg; + + src.mem = bo->resource; + src.offset = offset; + dst.bo = &dst_bo->tbo; + dst.mem = dst.bo->resource; + dst.offset = 0; + + /* Do the direct copy and wait for fence response. */ + ret = amdgpu_ttm_copy_mem_to_mem(adev, write ? &dst : &src, write ? &src : &dst, + 1 << PAGE_SHIFT, amdgpu_bo_encrypted(abo), + bo->base.resv, &fence); + if (!ret && fence) { + if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) + ret = -ETIMEDOUT; + + dma_fence_put(fence); + } + +unmap_sg: + dma_unmap_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0); +release_sg: + sg_free_table(dst_ttm->sg); +free_sg: + kfree(dst_ttm->sg); + dst_ttm->sg = NULL; +free_bo: + gobj->funcs->free(gobj); + return ret; +} + /** * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. * @@ -1765,7 +1850,19 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->resource->mem_type != TTM_PL_VRAM) return -EIO; + /* + * Attempt SDMA access over non-visible VRAM first. + * On failure, fall back to MMIO access. + * + * Restrict this to PAGE_SIZE access for PTRACED memory operations. + * Any other access size should use MM access. + */ amdgpu_res_first(bo->resource, offset, len, &cursor); + if (adev->gmc.visible_vram_size < cursor.start + len && len == PAGE_SIZE && + !amdgpu_in_reset(adev) && + !amdgpu_ttm_access_memory_page_sdma(bo, offset, buf, write)) + return len; + while (cursor.remaining) { size_t count, size = cursor.size; loff_t pos = cursor.start; -- 2.25.1