Re: [PATCH] drm/amdgpu: use sdma to optimize debugged non-visible device memory access

Christian König <christian.koenig@xxxxxxx> · Fri, 3 Dec 2021 08:18:41 +0100

Well NAK.

We already discussed this and decided to not use any hardware 
acceleration for the debug access.

Apart from that you implementation is absolutely horrible and won't work 
in all cases.

Regards,
Christian.

Am 02.12.21 um 22:43 schrieb Jonathan Kim:
To support better memory access performance on non-Large BAR devices, use
SDMA copies instead of MM access.

SDMA access is restricted to PAGE_SIZE'd access to account for the PTRACED
process memory r/w operation use case.  Any other access size will use
MMIO.

Failure to do an SDMA copy will result in a fallback to MM access.

Note: This is an attempt readdress patch request
'drm/amdgpu: extend ttm memory access to do sdma copies'
with the addition of restrictions and fallbacks.

Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 97 +++++++++++++++++++++++++
  1 file changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 23fc57506a20..1cb984252f58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1741,6 +1741,91 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
  	}
  }
  
+/**
+ * amdgpu_ttm_access_memory_page_sdma - Read/write page of memory that backs a buffer object.
+ *
+ * @bo:  The buffer object to read/write
+ * @offset:  Offset into buffer object
+ * @buf:  Secondary buffer to write/read from
+ * @write:  true if writing
+ *
+ * This is used to access a page of VRAM that backs a buffer object via SDMA
+ * access for debugging purposes.
+ */
+static int amdgpu_ttm_access_memory_page_sdma(struct ttm_buffer_object *bo,
+					unsigned long offset, void *buf,
+					int write)
+{
+	struct amdgpu_bo *dst_bo, *abo = ttm_to_amdgpu_bo(bo);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_copy_mem src, dst;
+	struct drm_gem_object *gobj;
+	struct dma_fence *fence;
+	struct page *dst_page;
+	struct ttm_tt *dst_ttm;
+	int ret;
+
+	/* Create an SG BO to dma map the target buffer for direct copy. */
+	ret = amdgpu_gem_object_create(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_CPU,
+				0, ttm_bo_type_sg, NULL, &gobj);
+	if (ret)
+		return ret;
+
+	dst_bo = gem_to_amdgpu_bo(gobj);
+	dst_ttm = dst_bo->tbo.ttm;
+	dst_ttm->sg = kmalloc(sizeof(*dst_ttm->sg), GFP_KERNEL);
+	if (unlikely(!dst_ttm->sg)) {
+		ret = -ENOMEM;
+		goto free_bo;
+	}
+
+	dst_page = virt_to_page(buf);
+	ret = sg_alloc_table_from_pages(dst_ttm->sg, &dst_page, 1, 0,
+					1 << PAGE_SHIFT, GFP_KERNEL);
+	if (unlikely(ret))
+		goto free_sg;
+
+	ret = dma_map_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+	if (unlikely(ret))
+		goto release_sg;
+
+	drm_prime_sg_to_dma_addr_array(dst_ttm->sg, dst_ttm->dma_address, 1);
+
+	amdgpu_bo_placement_from_domain(dst_bo, AMDGPU_GEM_DOMAIN_GTT);
+	ret = ttm_bo_validate(&dst_bo->tbo, &dst_bo->placement, &ctx);
+	if (ret)
+		goto unmap_sg;
+
+	src.mem = bo->resource;
+	src.offset = offset;
+	dst.bo = &dst_bo->tbo;
+	dst.mem = dst.bo->resource;
+	dst.offset = 0;
+
+	/* Do the direct copy and wait for fence response. */
+	ret = amdgpu_ttm_copy_mem_to_mem(adev, write ? &dst : &src, write ? &src : &dst,
+					1 << PAGE_SHIFT, amdgpu_bo_encrypted(abo),
+					bo->base.resv, &fence);
+	if (!ret && fence) {
+		if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+			ret = -ETIMEDOUT;
+
+		dma_fence_put(fence);
+	}
+
+unmap_sg:
+	dma_unmap_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+release_sg:
+	sg_free_table(dst_ttm->sg);
+free_sg:
+	kfree(dst_ttm->sg);
+	dst_ttm->sg = NULL;
+free_bo:
+	gobj->funcs->free(gobj);
+	return ret;
+}
+
  /**
   * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
   *
@@ -1765,7 +1850,19 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
  	if (bo->resource->mem_type != TTM_PL_VRAM)
  		return -EIO;
  
+	/*
+	 * Attempt SDMA access over non-visible VRAM first.
+	 * On failure, fall back to MMIO access.
+	 *
+	 * Restrict this to PAGE_SIZE access for PTRACED memory operations.
+	 * Any other access size should use MM access.
+	 */
  	amdgpu_res_first(bo->resource, offset, len, &cursor);
+	if (adev->gmc.visible_vram_size < cursor.start + len && len == PAGE_SIZE &&
+			!amdgpu_in_reset(adev) &&
+				!amdgpu_ttm_access_memory_page_sdma(bo, offset, buf, write))
+		return len;
+
  	while (cursor.remaining) {
  		size_t count, size = cursor.size;
  		loff_t pos = cursor.start;