To support better memory access performance on non-Large BAR devices, use
SDMA copies instead of MM access.
SDMA access is restricted to PAGE_SIZE'd access to account for the PTRACED
process memory r/w operation use case. Any other access size will use
MMIO.
Failure to do an SDMA copy will result in a fallback to MM access.
Note: This is an attempt readdress patch request
'drm/amdgpu: extend ttm memory access to do sdma copies'
with the addition of restrictions and fallbacks.
Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 97 +++++++++++++++++++++++++
1 file changed, 97 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 23fc57506a20..1cb984252f58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1741,6 +1741,91 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
}
}
+/**
+ * amdgpu_ttm_access_memory_page_sdma - Read/write page of memory that backs a buffer object.
+ *
+ * @bo: The buffer object to read/write
+ * @offset: Offset into buffer object
+ * @buf: Secondary buffer to write/read from
+ * @write: true if writing
+ *
+ * This is used to access a page of VRAM that backs a buffer object via SDMA
+ * access for debugging purposes.
+ */
+static int amdgpu_ttm_access_memory_page_sdma(struct ttm_buffer_object *bo,
+ unsigned long offset, void *buf,
+ int write)
+{
+ struct amdgpu_bo *dst_bo, *abo = ttm_to_amdgpu_bo(bo);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_copy_mem src, dst;
+ struct drm_gem_object *gobj;
+ struct dma_fence *fence;
+ struct page *dst_page;
+ struct ttm_tt *dst_ttm;
+ int ret;
+
+ /* Create an SG BO to dma map the target buffer for direct copy. */
+ ret = amdgpu_gem_object_create(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_CPU,
+ 0, ttm_bo_type_sg, NULL, &gobj);
+ if (ret)
+ return ret;
+
+ dst_bo = gem_to_amdgpu_bo(gobj);
+ dst_ttm = dst_bo->tbo.ttm;
+ dst_ttm->sg = kmalloc(sizeof(*dst_ttm->sg), GFP_KERNEL);
+ if (unlikely(!dst_ttm->sg)) {
+ ret = -ENOMEM;
+ goto free_bo;
+ }
+
+ dst_page = virt_to_page(buf);
+ ret = sg_alloc_table_from_pages(dst_ttm->sg, &dst_page, 1, 0,
+ 1 << PAGE_SHIFT, GFP_KERNEL);
+ if (unlikely(ret))
+ goto free_sg;
+
+ ret = dma_map_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+ if (unlikely(ret))
+ goto release_sg;
+
+ drm_prime_sg_to_dma_addr_array(dst_ttm->sg, dst_ttm->dma_address, 1);
+
+ amdgpu_bo_placement_from_domain(dst_bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&dst_bo->tbo, &dst_bo->placement, &ctx);
+ if (ret)
+ goto unmap_sg;
+
+ src.mem = bo->resource;
+ src.offset = offset;
+ dst.bo = &dst_bo->tbo;
+ dst.mem = dst.bo->resource;
+ dst.offset = 0;
+
+ /* Do the direct copy and wait for fence response. */
+ ret = amdgpu_ttm_copy_mem_to_mem(adev, write ? &dst : &src, write ? &src : &dst,
+ 1 << PAGE_SHIFT, amdgpu_bo_encrypted(abo),
+ bo->base.resv, &fence);
+ if (!ret && fence) {
+ if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+ ret = -ETIMEDOUT;
+
+ dma_fence_put(fence);
+ }
+
+unmap_sg:
+ dma_unmap_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+release_sg:
+ sg_free_table(dst_ttm->sg);
+free_sg:
+ kfree(dst_ttm->sg);
+ dst_ttm->sg = NULL;
+free_bo:
+ gobj->funcs->free(gobj);
+ return ret;
+}
+
/**
* amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
*
@@ -1765,7 +1850,19 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
if (bo->resource->mem_type != TTM_PL_VRAM)
return -EIO;
+ /*
+ * Attempt SDMA access over non-visible VRAM first.
+ * On failure, fall back to MMIO access.
+ *
+ * Restrict this to PAGE_SIZE access for PTRACED memory operations.
+ * Any other access size should use MM access.
+ */
amdgpu_res_first(bo->resource, offset, len, &cursor);
+ if (adev->gmc.visible_vram_size < cursor.start + len && len == PAGE_SIZE &&
+ !amdgpu_in_reset(adev) &&
+ !amdgpu_ttm_access_memory_page_sdma(bo, offset, buf, write))
+ return len;
+
while (cursor.remaining) {
size_t count, size = cursor.size;
loff_t pos = cursor.start;