Re: [PATCH v5 05/10] drm/amdgpu: create context space for usermode queue

Alex Deucher <alexdeucher@xxxxxxxxx> · Thu, 6 Jul 2023 12:44:40 -0400

On Thu, Jul 6, 2023 at 8:36 AM Shashank Sharma <shashank.sharma@xxxxxxx> wrote:
>
> The FW expects us to allocate at least one page as context
> space to process gang, process, GDS and FW  related work.
> This patch creates a joint object for the same, and calculates
> GPU space offsets for each of these spaces.
>
> V1: Addressed review comments on RFC patch:
>     Alex: Make this function IP specific
>
> V2: Addressed review comments from Christian
>     - Allocate only one object for total FW space, and calculate
>       offsets for each of these objects.
>
> V3: Integration with doorbell manager
>
> V4: Review comments:
>     - Remove shadow from FW space list from cover letter (Alex)
>     - Alignment of macro (Luben)
>
> V5: Merged patches 5 and 6 into this single patch
>     Addressed review comments:
>     - Use lower_32_bits instead of mask (Christian)
>     - gfx_v11_0 instead of gfx_v11 in function names (Alex)
>     - Shadow and GDS objects are now coming from userspace (Christian,
>       Alex)
>
> Cc: Alex Deucher <alexander.deucher@xxxxxxx>
> Cc: Christian Koenig <christian.koenig@xxxxxxx>
> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
> Signed-off-by: Arvind Yadav <arvind.yadav@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c        | 62 +++++++++++++++++++
>  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  4 ++
>  2 files changed, 66 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index e76e1b86b434..7d3b19e08bbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -61,6 +61,9 @@
>  #define regCGTT_WD_CLK_CTRL_BASE_IDX   1
>  #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1  0x4e7e
>  #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
> +#define AMDGPU_USERQ_PROC_CTX_SZ   PAGE_SIZE
> +#define AMDGPU_USERQ_GANG_CTX_SZ   PAGE_SIZE
> +#define AMDGPU_USERQ_FW_CTX_SZ     PAGE_SIZE
>
>  MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
>  MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
> @@ -6488,6 +6491,57 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
>         .funcs = &gfx_v11_0_ip_funcs,
>  };
>
> +static void gfx_v11_0_userq_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
> +                                             struct amdgpu_usermode_queue *queue)
> +{
> +       struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> +
> +       amdgpu_bo_free_kernel(&ctx->obj, &ctx->gpu_addr, &ctx->cpu_ptr);
> +}
> +
> +static int gfx_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
> +                                           struct amdgpu_usermode_queue *queue,
> +                                           struct drm_amdgpu_userq_mqd_gfx_v11_0 *mqd_user)
> +{
> +       struct amdgpu_device *adev = uq_mgr->adev;
> +       struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> +       struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr;
> +       int r, size;
> +
> +       /*
> +        * The FW expects at least one page space allocated for
> +        * process ctx, gang ctx and fw ctx each. Create an object
> +        * for the same.
> +        */
> +       size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_FW_CTX_SZ +
> +              AMDGPU_USERQ_GANG_CTX_SZ;
> +       r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
> +                                   AMDGPU_GEM_DOMAIN_GTT,
> +                                   &ctx->obj,
> +                                   &ctx->gpu_addr,
> +                                   &ctx->cpu_ptr);
> +       if (r) {
> +               DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
> +               return r;
> +       }
> +
> +       queue->proc_ctx_gpu_addr = ctx->gpu_addr;
> +       queue->gang_ctx_gpu_addr = queue->proc_ctx_gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
> +       queue->fw_ctx_gpu_addr = queue->gang_ctx_gpu_addr + AMDGPU_USERQ_GANG_CTX_SZ;
> +
> +       mqd->fw_work_area_base_lo = lower_32_bits(queue->fw_ctx_gpu_addr);
> +       mqd->fw_work_area_base_lo = upper_32_bits(queue->fw_ctx_gpu_addr);
> +
> +       /* Shadow and GDS objects come directly from userspace */
> +       mqd->shadow_base_lo = lower_32_bits(mqd_user->shadow_va);
> +       mqd->shadow_base_hi = upper_32_bits(mqd_user->shadow_va);
> +
> +       mqd->gds_bkup_base_lo = lower_32_bits(mqd_user->gds_va);
> +       mqd->gds_bkup_base_hi = upper_32_bits(mqd_user->gds_va);
> +
> +       return 0;
> +}
> +
>  static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
>                                       struct drm_amdgpu_userq_in *args_in,
>                                       struct amdgpu_usermode_queue *queue)
> @@ -6540,6 +6594,13 @@ static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
>                 goto free_mqd;
>         }
>
> +       /* Create BO for FW operations */
> +       r = gfx_v11_0_userq_create_ctx_space(uq_mgr, queue, &mqd_user);
> +       if (r) {
> +               DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
> +               goto free_mqd;
> +       }
> +
>         return 0;
>
>  free_mqd:
> @@ -6552,6 +6613,7 @@ gfx_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userm
>  {
>         struct amdgpu_userq_obj *mqd = &queue->mqd;
>
> +       gfx_v11_0_userq_destroy_ctx_space(uq_mgr, queue);
>         amdgpu_bo_free_kernel(&mqd->obj, &mqd->gpu_addr, &mqd->cpu_ptr);
>  }
>
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> index 240f92796f00..a5cdb319193d 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> @@ -40,10 +40,14 @@ struct amdgpu_usermode_queue {
>         uint64_t                doorbell_handle;
>         uint64_t                doorbell_index;
>         uint64_t                flags;
> +       uint64_t                proc_ctx_gpu_addr;
> +       uint64_t                gang_ctx_gpu_addr;
> +       uint64_t                fw_ctx_gpu_addr;

Is there a way we could store these in some gfx11 structure?  These
are specific to gfx11 and other IPs may have other metadata buffers
they need to allocate.  maybe subclass a gfx11 userq structure or add
a priv ptr off of the the userq structure so IPs can add their
implementation details there.

Alex

Alex

>         struct amdgpu_mqd_prop  *userq_prop;
>         struct amdgpu_userq_mgr *userq_mgr;
>         struct amdgpu_vm        *vm;
>         struct amdgpu_userq_obj mqd;
> +       struct amdgpu_userq_obj fw_obj;
>  };
>
>  struct amdgpu_userq_funcs {
> --
> 2.40.1
>