Re: [PATCH] drm/amd/amdgpu: Fix MES init sequence

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 10, 2025 at 1:58 PM Shaoyun Liu <shaoyun.liu@xxxxxxx> wrote:
>
> When MES is been used , the set_hw_resource_1 API is required to
> initialize MES internal context correctly
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h  |  6 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  6 +--
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c   | 52 +++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/mes_v12_0.c   | 40 ++++++++----------
>  4 files changed, 48 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 4391b3383f0c..78362a838212 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -143,9 +143,9 @@ struct amdgpu_mes {
>         const struct amdgpu_mes_funcs   *funcs;
>
>         /* mes resource_1 bo*/
> -       struct amdgpu_bo    *resource_1;
> -       uint64_t            resource_1_gpu_addr;
> -       void                *resource_1_addr;
> +       struct amdgpu_bo    *resource_1[AMDGPU_MAX_MES_PIPES];
> +       uint64_t            resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
> +       void                *resource_1_addr[AMDGPU_MAX_MES_PIPES];
>
>  };
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index ab7e73d0e7b1..980dfb8935b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -614,10 +614,10 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
>         vf2pf_info->decode_usage = 0;
>
>         vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
> -       vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr;
> +       vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr[0];
>
> -       if (adev->mes.resource_1) {
> -               vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size;
> +       if (adev->mes.resource_1[0]) {
> +               vf2pf_info->mes_info_size = adev->mes.resource_1[0]->tbo.base.size;
>         }
>         vf2pf_info->checksum =
>                 amd_sriov_msg_checksum(
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index a569d09a1a74..299f17868822 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -751,10 +751,10 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
>         mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
>         mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
>         mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
> -       mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr;
> +       mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr[0];
>         mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE;
>         mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr =
> -               mes->resource_1_gpu_addr + MES11_HW_RESOURCE_1_SIZE;
> +               mes->resource_1_gpu_addr[0] + MES11_HW_RESOURCE_1_SIZE;

This offset here will need to be adjusted if MES11_HW_RESOURCE_1_SIZE
depends on SR-IOV.  See below.

>
>         return mes_v11_0_submit_pkt_and_poll_completion(mes,
>                         &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
> @@ -1392,7 +1392,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
>  static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
>  {
>         struct amdgpu_device *adev = ip_block->adev;
> -       int pipe, r;
> +       int pipe, r, bo_size;
>
>         adev->mes.funcs = &mes_v11_0_funcs;
>         adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
> @@ -1427,19 +1427,21 @@ static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
>         if (r)
>                 return r;
>
> -       if (amdgpu_sriov_is_mes_info_enable(adev) ||
> -           adev->gfx.enable_cleaner_shader) {
> -               r = amdgpu_bo_create_kernel(adev,
> -                                           MES11_HW_RESOURCE_1_SIZE + AMDGPU_GPU_PAGE_SIZE,
> -                                           PAGE_SIZE,
> -                                           AMDGPU_GEM_DOMAIN_VRAM,
> -                                           &adev->mes.resource_1,
> -                                           &adev->mes.resource_1_gpu_addr,
> -                                           &adev->mes.resource_1_addr);
> -               if (r) {
> -                       dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
> -                       return r;
> -               }
> +       bo_size = AMDGPU_GPU_PAGE_SIZE;
> +       if (amdgpu_sriov_is_mes_info_enable(adev)
> +               bo_size += MES11_HW_RESOURCE_1_SIZE;

if you make the size depend on amdgpu_sriov_is_mes_info_enable(), it
will break the address for
mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr above when SR-IOV is
not enabled.

> +
> +       /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/
> +       r = amdgpu_bo_create_kernel(adev,
> +                                   bo_size,
> +                                   PAGE_SIZE,
> +                                   AMDGPU_GEM_DOMAIN_VRAM,
> +                                   &adev->mes.resource_1[0],
> +                                   &adev->mes.resource_1_gpu_addr[0],
> +                                   &adev->mes.resource_1_addr[0]);
> +       if (r) {
> +               dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
> +               return r;
>         }
>
>         return 0;
> @@ -1450,11 +1452,8 @@ static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
>         struct amdgpu_device *adev = ip_block->adev;
>         int pipe;
>
> -       if (amdgpu_sriov_is_mes_info_enable(adev) ||
> -           adev->gfx.enable_cleaner_shader) {
> -               amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr,
> -                                     &adev->mes.resource_1_addr);
> -       }
> +       amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0],
> +                             &adev->mes.resource_1_addr[0]);
>
>         for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
>                 kfree(adev->mes.mqd_backup[pipe]);
> @@ -1643,13 +1642,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
>         if (r)
>                 goto failure;
>
> -       if (amdgpu_sriov_is_mes_info_enable(adev) ||
> -           adev->gfx.enable_cleaner_shader) {
> -               r = mes_v11_0_set_hw_resources_1(&adev->mes);
> -               if (r) {
> -                       DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
> -                       goto failure;
> -               }
> +       r = mes_v11_0_set_hw_resources_1(&adev->mes);
> +       if (r) {
> +               DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
> +               goto failure;
>         }
>
>         r = mes_v11_0_query_sched_status(&adev->mes);
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 96336652d14c..abe8592170b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -687,7 +687,7 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
>         mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
>         mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
>         mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
> -               mes->resource_1_gpu_addr;
> +               mes->resource_1_gpu_addr[pipe];
>
>         return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
>                         &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
> @@ -1530,21 +1530,19 @@ static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
>
>                 if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
>                         r = mes_v12_0_kiq_ring_init(adev);
> -               else
> +               else {

Per kernel coding style, the top part of this if clause needs {} if
you add them to the else half.

Alex

>                         r = mes_v12_0_ring_init(adev, pipe);
> -               if (r)
> -                       return r;
> -       }
> -
> -       if (adev->enable_uni_mes) {
> -               r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
> -                                           AMDGPU_GEM_DOMAIN_VRAM,
> -                                           &adev->mes.resource_1,
> -                                           &adev->mes.resource_1_gpu_addr,
> -                                           &adev->mes.resource_1_addr);
> -               if (r) {
> -                       dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
> -                       return r;
> +                       if (r)
> +                               return r;
> +                       r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
> +                                                   AMDGPU_GEM_DOMAIN_VRAM,
> +                                                   &adev->mes.resource_1[pipe],
> +                                                   &adev->mes.resource_1_gpu_addr[pipe],
> +                                                   &adev->mes.resource_1_addr[pipe]);
> +                       if (r) {
> +                               dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
> +                               return r;
> +                       }
>                 }
>         }
>
> @@ -1556,12 +1554,11 @@ static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
>         struct amdgpu_device *adev = ip_block->adev;
>         int pipe;
>
> -       if (adev->enable_uni_mes)
> -               amdgpu_bo_free_kernel(&adev->mes.resource_1,
> -                                     &adev->mes.resource_1_gpu_addr,
> -                                     &adev->mes.resource_1_addr);
> -
>         for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
> +               amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
> +                                     &adev->mes.resource_1_gpu_addr[pipe],
> +                                     &adev->mes.resource_1_addr[pipe]);
> +
>                 kfree(adev->mes.mqd_backup[pipe]);
>
>                 amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
> @@ -1760,8 +1757,7 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
>         if (r)
>                 goto failure;
>
> -       if (adev->enable_uni_mes)
> -               mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
> +       mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
>
>         mes_v12_0_init_aggregated_doorbell(&adev->mes);
>
> --
> 2.34.1
>




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux