Re: [PATCH v1 1/3] drm/amdgpu: optimize the padding for gfx12

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 31, 2024 at 9:13 AM Sunil Khatri <sunil.khatri@xxxxxxx> wrote:
>
> Adding NOP packets one by one in the ring
> does not use the CP efficiently.
>
> Solution:
> Use CP optimization while adding NOP packet's so PFP
> can discard NOP packets based on information of count
> from the Header instead of fetching all NOP packets
> one by one.
>
> Cc: Christian König <christian.koenig@xxxxxxx>
> Cc: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>
> Cc: Tvrtko Ursulin <tursulin@xxxxxxxxxx>
> Cc: Marek Olšák <marek.olsak@xxxxxxx>
> Signed-off-by: Sunil Khatri <sunil.khatri@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index f932c7ff85e3..29b3bf1b29b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -5005,6 +5005,24 @@ static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
>         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
>  }
>
> +static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
> +{
> +       int i;
> +
> +       /* Header itself is a NOP packet */
> +       if (num_nop == 1) {
> +               amdgpu_ring_write(ring, ring->funcs->nop);
> +               return;
> +       }
> +
> +       /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
> +
> +       /* Header is at index 0, followed by num_nops - 1 NOP packet's */
> +       for (i = 1; i < num_nop; i++)
> +               amdgpu_ring_write(ring, ring->funcs->nop);

This loop should be removed. I explained the reason in the gfx10 commit.

Marek

> +}
> +
>  static void gfx_v12_ip_print(void *handle, struct drm_printer *p)
>  {
>         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -5186,7 +5204,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
>         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
>         .test_ring = gfx_v12_0_ring_test_ring,
>         .test_ib = gfx_v12_0_ring_test_ib,
> -       .insert_nop = amdgpu_ring_insert_nop,
> +       .insert_nop = gfx_v12_ring_insert_nop,
>         .pad_ib = amdgpu_ring_generic_pad_ib,
>         .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
>         .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
> @@ -5224,7 +5242,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
>         .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
>         .test_ring = gfx_v12_0_ring_test_ring,
>         .test_ib = gfx_v12_0_ring_test_ib,
> -       .insert_nop = amdgpu_ring_insert_nop,
> +       .insert_nop = gfx_v12_ring_insert_nop,
>         .pad_ib = amdgpu_ring_generic_pad_ib,
>         .emit_wreg = gfx_v12_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
> --
> 2.34.1
>




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux