On 2024-08-12 02:59, Samuel Zhang wrote:
The requested access range may be across 2 adjacent buddy blocks of a
BO. In this case, it needs to issue 2 sdma copy commands to fully access
the data range. But current implementation only issue 1 sdma copy
command and result in incomplete access.
The fix is to loop the res cursor when emitting copy commands so that
multiple(2) copy commands got issued when necessary.
Signed-off-by: Samuel Zhang <guoqing.zhang@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 26 ++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a6e90eada367..c423574acd5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1484,7 +1484,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
struct dma_fence *fence;
uint64_t src_addr, dst_addr;
unsigned int num_dw;
- int r, idx;
+ int r, idx, count = 0;
if (len > PAGE_SIZE)
return -EINVAL;
@@ -1498,7 +1498,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
if (write)
memcpy(adev->mman.sdma_access_ptr, buf, len);
- num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw * 2, 8);
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, AMDGPU_IB_POOL_DELAYED,
@@ -1507,15 +1507,19 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
goto out;
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
- src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
- src_mm.start;
- dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
- if (write)
- swap(src_addr, dst_addr);
-
- amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
- len, 0);
-
+ while (src_mm.remaining) {
+ src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
+ src_mm.start;
+ dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo) + count;
+ if (write)
+ swap(src_addr, dst_addr);
+
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
+ src_mm.size, 0);
+
+ count += src_mm.size;
You could just increment dst_addr instead. And move the initialization
of dst_addr outside the loop. Other than that, this patch is
Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx>
+ amdgpu_res_next(&src_mm, src_mm.size);
+ }
amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);