[AMD Official Use Only - General] Hi Alex, Sure, I will merge the fix and update the kerneldoc above each function. Best Regards, Frank -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Alex Deucher Sent: Sunday, April 14, 2024 6:42 AM To: Min, Frank <Frank.Min@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Gao, Likun <Likun.Gao@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx> Subject: Re: [PATCH] drm/amdgpu: replace tmz flag into buffer flag On Fri, Apr 12, 2024 at 8:17 AM Min, Frank <Frank.Min@xxxxxxx> wrote: > > [AMD Official Use Only - General] > > From: Frank Min <Frank.Min@xxxxxxx> > > Replace tmz flag into buffer flag to make it easier to understand and > extend > > Signed-off-by: Likun Gao <Likun.Gao@xxxxxxx> > Signed-off-by: Frank Min <Frank.Min@xxxxxxx> Before you push this to amd-staging-drm-next, can you squash in the si_dma.c fix and update the kerneldoc above each function to reflect the new parameter? Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 4 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 +++++++++++------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +++- > drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 +- > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 +- > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 5 +++-- > drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++-- > 15 files changed, 36 insertions(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c > index edc6377ec5ff..199693369c7c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c > @@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, > for (i = 0; i < n; i++) { > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, > - false, false, false); > + false, false, 0); > if (r) > goto exit_do_move; > r = dma_fence_wait(fence, false); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 38742ff0ff49..abb1505c82ec 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -791,7 +791,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo > *shadow, struct dma_fence **fence) > > return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, > amdgpu_bo_size(shadow), NULL, fence, > - true, false, false); > + true, false, 0); > } > > /** > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > index a22c6446817b..b5bde6652838 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > @@ -136,7 +136,7 @@ struct amdgpu_buffer_funcs { > uint64_t dst_offset, > /* number of byte to transfer */ > uint32_t byte_count, > - bool tmz); > + uint32_t copy_flags); > > /* maximum bytes in a single operation */ > uint32_t fill_max_bytes; > @@ -154,7 +154,7 @@ struct amdgpu_buffer_funcs { > uint32_t byte_count); }; > > -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) > (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) > +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, f) > +(adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (f)) > #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) > (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) > > struct amdgpu_sdma_instance * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index f0fffbf2bdd5..d58ab879e125 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -267,7 +267,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, > dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); > dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, > - dst_addr, num_bytes, false); > + dst_addr, num_bytes, 0); > > amdgpu_ring_pad_ib(ring, &job->ibs[0]); > WARN_ON(job->ibs[0].length_dw > num_dw); @@ -327,6 +327,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > struct dma_fence *fence = NULL; > int r = 0; > > + uint32_t copy_flags = 0; > + > if (!adev->mman.buffer_funcs_enabled) { > DRM_ERROR("Trying to move memory with ring turned off.\n"); > return -EINVAL; > @@ -354,8 +356,11 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > if (r) > goto error; > > - r = amdgpu_copy_buffer(ring, from, to, cur_size, > - resv, &next, false, true, tmz); > + if (tmz) > + copy_flags |= AMDGPU_COPY_FLAGS_TMZ; > + > + r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, > + &next, false, true, > + copy_flags); > if (r) > goto error; > > @@ -1782,7 +1787,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, > swap(src_addr, dst_addr); > > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, > - PAGE_SIZE, false); > + PAGE_SIZE, 0); > > amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); > WARN_ON(job->ibs[0].length_dw > num_dw); @@ -2596,7 +2601,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > uint64_t dst_offset, uint32_t byte_count, > struct dma_resv *resv, > struct dma_fence **fence, bool direct_submit, > - bool vm_needs_flush, bool tmz) > + bool vm_needs_flush, uint32_t copy_flags) > { > struct amdgpu_device *adev = ring->adev; > unsigned int num_loops, num_dw; @@ -2622,8 +2627,7 @@ int > amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > uint32_t cur_size_in_bytes = min(byte_count, > max_bytes); > > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, > - dst_offset, cur_size_in_bytes, tmz); > - > + dst_offset, cur_size_in_bytes, > + copy_flags); > src_offset += cur_size_in_bytes; > dst_offset += cur_size_in_bytes; > byte_count -= cur_size_in_bytes; diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index fe37697a76b9..22cdbb52ba4e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -121,6 +121,8 @@ struct amdgpu_copy_mem { > unsigned long offset; > }; > > +#define AMDGPU_COPY_FLAGS_TMZ (1 << 0) > + > int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); int amdgpu_preempt_mgr_init(struct amdgpu_device *adev); @@ -158,7 +160,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > uint64_t dst_offset, uint32_t byte_count, > struct dma_resv *resv, > struct dma_fence **fence, bool direct_submit, > - bool vm_needs_flush, bool tmz); > + bool vm_needs_flush, uint32_t copy_flags); > int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, > const struct amdgpu_copy_mem *src, > const struct amdgpu_copy_mem *dst, diff > --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > index ee5dce6f6043..ab1f112a0cbd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > @@ -1305,7 +1305,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); > ib->ptr[ib->length_dw++] = byte_count; diff --git > a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > index 7ffaaaf1fcdd..93e933915e86 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > @@ -1191,7 +1191,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > index c5ea32687eb5..45ccd12bd857 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > @@ -1631,7 +1631,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index 7ae5f134f09b..90b936545dc9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -2468,11 +2468,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | > - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); > + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & > +AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); > ib->ptr[ib->length_dw++] = byte_count - 1; > ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ > ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff > --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index fec5a3d1c4bc..7d1d65dca627 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -1981,11 +1981,11 @@ static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | > - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); > + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & > +AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); > ib->ptr[ib->length_dw++] = byte_count - 1; > ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ > ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff > --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > index c1ff5eda8961..6a669613d028 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > @@ -1836,11 +1836,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | > - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); > + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & > +AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); > ib->ptr[ib->length_dw++] = byte_count - 1; > ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ > ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff > --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > index c441a20a41a3..aa0901135f30 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > @@ -1778,11 +1778,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | > - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); > + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & > +AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); > ib->ptr[ib->length_dw++] = byte_count - 1; > ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ > ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff > --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > index f423cc6cc9d2..8a3889a20c60 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c > @@ -1604,11 +1604,11 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | > - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); > + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & > +AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); > ib->ptr[ib->length_dw++] = byte_count - 1; > ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ > ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff > --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > index 33f1a549e7b5..0245b0374f65 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c > @@ -1606,11 +1606,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, > uint64_t src_offset, > uint64_t dst_offset, > uint32_t byte_count, > - bool tmz) > + uint32_t copy_flags) > { > ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | > SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | > - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); > + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & > +AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); > + > ib->ptr[ib->length_dw++] = byte_count - 1; > ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ > ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff > --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > index 0ed275dd6ecf..c96e10b5b86f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c > @@ -77,7 +77,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, > uint64_t npages, > > dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); > amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, > - dst_addr, num_bytes, false); > + dst_addr, num_bytes, 0); > > amdgpu_ring_pad_ib(ring, &job->ibs[0]); > WARN_ON(job->ibs[0].length_dw > num_dw); @@ -153,7 +153,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, > } > > r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, > - NULL, &next, false, true, false); > + NULL, &next, false, true, 0); > if (r) { > dev_err(adev->dev, "fail %d to copy memory\n", r); > goto out_unlock; > -- > 2.34.1 >