Re: [PATCH v6 5/9] drm/amdgpu: create context space for usermode queue

Alex Deucher <alexdeucher@xxxxxxxxx> · Wed, 4 Oct 2023 09:13:32 -0400

On Fri, Sep 29, 2023 at 1:50 PM Shashank Sharma <shashank.sharma@xxxxxxx> wrote:
>
>
> On 20/09/2023 17:21, Alex Deucher wrote:
> > On Fri, Sep 8, 2023 at 12:45 PM Shashank Sharma <shashank.sharma@xxxxxxx> wrote:
> >> The FW expects us to allocate at least one page as context
> >> space to process gang, process, GDS and FW  related work.
> >> This patch creates a joint object for the same, and calculates
> >> GPU space offsets of these spaces.
> >>
> >> V1: Addressed review comments on RFC patch:
> >>      Alex: Make this function IP specific
> >>
> >> V2: Addressed review comments from Christian
> >>      - Allocate only one object for total FW space, and calculate
> >>        offsets for each of these objects.
> >>
> >> V3: Integration with doorbell manager
> >>
> >> V4: Review comments:
> >>      - Remove shadow from FW space list from cover letter (Alex)
> >>      - Alignment of macro (Luben)
> >>
> >> V5: Merged patches 5 and 6 into this single patch
> >>      Addressed review comments:
> >>      - Use lower_32_bits instead of mask (Christian)
> >>      - gfx_v11_0 instead of gfx_v11 in function names (Alex)
> >>      - Shadow and GDS objects are now coming from userspace (Christian,
> >>        Alex)
> >>
> >> V6:
> >>      - Add a comment to replace amdgpu_bo_create_kernel() with
> >>        amdgpu_bo_create() during fw_ctx object creation (Christian).
> >>      - Move proc_ctx_gpu_addr, gang_ctx_gpu_addr and fw_ctx_gpu_addr out
> >>        of generic queue structure and make it gen11 specific (Alex).
> >>
> >> Cc: Alex Deucher <alexander.deucher@xxxxxxx>
> >> Cc: Christian Koenig <christian.koenig@xxxxxxx>
> >> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
> >> Signed-off-by: Arvind Yadav <arvind.yadav@xxxxxxx>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c        | 61 +++++++++++++++++++
> >>   .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
> >>   2 files changed, 62 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >> index 6760abda08df..8ffb5dee72a9 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >> @@ -61,6 +61,9 @@
> >>   #define regCGTT_WD_CLK_CTRL_BASE_IDX   1
> >>   #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1  0x4e7e
> >>   #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
> >> +#define AMDGPU_USERQ_PROC_CTX_SZ   PAGE_SIZE
> >> +#define AMDGPU_USERQ_GANG_CTX_SZ   PAGE_SIZE
> >> +#define AMDGPU_USERQ_FW_CTX_SZ     PAGE_SIZE
> >>
> >>   MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
> >>   MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
> >> @@ -6424,6 +6427,56 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
> >>          .funcs = &gfx_v11_0_ip_funcs,
> >>   };
> >>
> >> +static void gfx_v11_0_userq_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
> >> +                                             struct amdgpu_usermode_queue *queue)
> >> +{
> >> +       struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> >> +
> >> +       amdgpu_bo_free_kernel(&ctx->obj, &ctx->gpu_addr, &ctx->cpu_ptr);
> >> +}
> >> +
> >> +static int gfx_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
> >> +                                           struct amdgpu_usermode_queue *queue,
> >> +                                           struct drm_amdgpu_userq_mqd_gfx_v11_0 *mqd_user)
> >> +{
> >> +       struct amdgpu_device *adev = uq_mgr->adev;
> >> +       struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> >> +       struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr;
> >> +       uint64_t fw_ctx_gpu_addr;
> >> +       int r, size;
> >> +
> >> +       /*
> >> +        * The FW expects at least one page space allocated for
> >> +        * process ctx, gang ctx and fw ctx each. Create an object
> >> +        * for the same.
> >> +        */
> >> +       size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_FW_CTX_SZ +
> >> +              AMDGPU_USERQ_GANG_CTX_SZ;
> >> +       r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
> >> +                                   AMDGPU_GEM_DOMAIN_GTT,
> >> +                                   &ctx->obj,
> >> +                                   &ctx->gpu_addr,
> >> +                                   &ctx->cpu_ptr);
> >> +       if (r) {
> >> +               DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
> >> +               return r;
> >> +       }
> >> +
> >> +       fw_ctx_gpu_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ +
> >> +                         AMDGPU_USERQ_GANG_CTX_SZ;
> >> +       mqd->fw_work_area_base_lo = lower_32_bits(fw_ctx_gpu_addr);
> >> +       mqd->fw_work_area_base_lo = upper_32_bits(fw_ctx_gpu_addr);
> >> +
> >> +       /* Shadow and GDS objects come directly from userspace */
> >> +       mqd->shadow_base_lo = lower_32_bits(mqd_user->shadow_va);
> >> +       mqd->shadow_base_hi = upper_32_bits(mqd_user->shadow_va);
> >> +
> >> +       mqd->gds_bkup_base_lo = lower_32_bits(mqd_user->gds_va);
> >> +       mqd->gds_bkup_base_hi = upper_32_bits(mqd_user->gds_va);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >>   static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
> >>                                        struct drm_amdgpu_userq_in *args_in,
> >>                                        struct amdgpu_usermode_queue *queue)
> >> @@ -6480,6 +6533,13 @@ static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
> >>                  goto free_mqd;
> >>          }
> >>
> >> +       /* Create BO for FW operations */
> >> +       r = gfx_v11_0_userq_create_ctx_space(uq_mgr, queue, &mqd_user);
> >> +       if (r) {
> >> +               DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
> >> +               goto free_mqd;
> >> +       }
> >> +
> >>          return 0;
> >>
> >>   free_mqd:
> >> @@ -6492,6 +6552,7 @@ gfx_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userm
> >>   {
> >>          struct amdgpu_userq_obj *mqd = &queue->mqd;
> >>
> >> +       gfx_v11_0_userq_destroy_ctx_space(uq_mgr, queue);
> >>          amdgpu_bo_free_kernel(&mqd->obj, &mqd->gpu_addr, &mqd->cpu_ptr);
> >>   }
> >>
> >> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> >> index 240f92796f00..34e20daa06c8 100644
> >> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> >> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> >> @@ -44,6 +44,7 @@ struct amdgpu_usermode_queue {
> >>          struct amdgpu_userq_mgr *userq_mgr;
> >>          struct amdgpu_vm        *vm;
> >>          struct amdgpu_userq_obj mqd;
> >> +       struct amdgpu_userq_obj fw_obj;
> > Since this is gfx 11 specific, I feel like this would be better stored
> > in some gfx 11 structure rather than the generic user queue structure.
> > Maybe a driver private pointer here would make more sense, then each
> > IP can hang whatever structure they want here for IP specific
> > metadata.
>
>
> I was thinking more on this, and to me it seems like it's the size of
> this FW space which is going to be specific to a IP, but some object
> space probably will always be required, as MES will always need some
> space to save its process and gang ctx. So if this is not a big concern
> for you, I would like to keep it here and see how this space requirement
> evolves over the time.

Sure.  We can revisit this later.

Alex

>
> - Shashank
>
> >
> > Alex
> >
> >
> >>   };
> >>
> >>   struct amdgpu_userq_funcs {
> >> --
> >> 2.42.0
> >>