On Thu, Jul 6, 2023 at 8:36 AM Shashank Sharma <shashank.sharma@xxxxxxx> wrote: > > The FW expects us to allocate at least one page as context > space to process gang, process, GDS and FW related work. > This patch creates a joint object for the same, and calculates > GPU space offsets for each of these spaces. > > V1: Addressed review comments on RFC patch: > Alex: Make this function IP specific > > V2: Addressed review comments from Christian > - Allocate only one object for total FW space, and calculate > offsets for each of these objects. > > V3: Integration with doorbell manager > > V4: Review comments: > - Remove shadow from FW space list from cover letter (Alex) > - Alignment of macro (Luben) > > V5: Merged patches 5 and 6 into this single patch > Addressed review comments: > - Use lower_32_bits instead of mask (Christian) > - gfx_v11_0 instead of gfx_v11 in function names (Alex) > - Shadow and GDS objects are now coming from userspace (Christian, > Alex) > > Cc: Alex Deucher <alexander.deucher@xxxxxxx> > Cc: Christian Koenig <christian.koenig@xxxxxxx> > Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> > Signed-off-by: Arvind Yadav <arvind.yadav@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 62 +++++++++++++++++++ > .../gpu/drm/amd/include/amdgpu_userqueue.h | 4 ++ > 2 files changed, 66 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > index e76e1b86b434..7d3b19e08bbb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > @@ -61,6 +61,9 @@ > #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 > #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e > #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 > +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE > +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE > +#define AMDGPU_USERQ_FW_CTX_SZ PAGE_SIZE > > MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); > MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); > @@ -6488,6 +6491,57 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block = > .funcs = &gfx_v11_0_ip_funcs, > }; > > +static void gfx_v11_0_userq_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr, > + struct amdgpu_usermode_queue *queue) > +{ > + struct amdgpu_userq_obj *ctx = &queue->fw_obj; > + > + amdgpu_bo_free_kernel(&ctx->obj, &ctx->gpu_addr, &ctx->cpu_ptr); > +} > + > +static int gfx_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, > + struct amdgpu_usermode_queue *queue, > + struct drm_amdgpu_userq_mqd_gfx_v11_0 *mqd_user) > +{ > + struct amdgpu_device *adev = uq_mgr->adev; > + struct amdgpu_userq_obj *ctx = &queue->fw_obj; > + struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; > + int r, size; > + > + /* > + * The FW expects at least one page space allocated for > + * process ctx, gang ctx and fw ctx each. Create an object > + * for the same. > + */ > + size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_FW_CTX_SZ + > + AMDGPU_USERQ_GANG_CTX_SZ; > + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, > + AMDGPU_GEM_DOMAIN_GTT, > + &ctx->obj, > + &ctx->gpu_addr, > + &ctx->cpu_ptr); > + if (r) { > + DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); > + return r; > + } > + > + queue->proc_ctx_gpu_addr = ctx->gpu_addr; > + queue->gang_ctx_gpu_addr = queue->proc_ctx_gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; > + queue->fw_ctx_gpu_addr = queue->gang_ctx_gpu_addr + AMDGPU_USERQ_GANG_CTX_SZ; > + > + mqd->fw_work_area_base_lo = lower_32_bits(queue->fw_ctx_gpu_addr); > + mqd->fw_work_area_base_lo = upper_32_bits(queue->fw_ctx_gpu_addr); > + > + /* Shadow and GDS objects come directly from userspace */ > + mqd->shadow_base_lo = lower_32_bits(mqd_user->shadow_va); > + mqd->shadow_base_hi = upper_32_bits(mqd_user->shadow_va); > + > + mqd->gds_bkup_base_lo = lower_32_bits(mqd_user->gds_va); > + mqd->gds_bkup_base_hi = upper_32_bits(mqd_user->gds_va); > + > + return 0; > +} > + > static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, > struct drm_amdgpu_userq_in *args_in, > struct amdgpu_usermode_queue *queue) > @@ -6540,6 +6594,13 @@ static int gfx_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, > goto free_mqd; > } > > + /* Create BO for FW operations */ > + r = gfx_v11_0_userq_create_ctx_space(uq_mgr, queue, &mqd_user); > + if (r) { > + DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); > + goto free_mqd; > + } > + > return 0; > > free_mqd: > @@ -6552,6 +6613,7 @@ gfx_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userm > { > struct amdgpu_userq_obj *mqd = &queue->mqd; > > + gfx_v11_0_userq_destroy_ctx_space(uq_mgr, queue); > amdgpu_bo_free_kernel(&mqd->obj, &mqd->gpu_addr, &mqd->cpu_ptr); > } > > diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h > index 240f92796f00..a5cdb319193d 100644 > --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h > +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h > @@ -40,10 +40,14 @@ struct amdgpu_usermode_queue { > uint64_t doorbell_handle; > uint64_t doorbell_index; > uint64_t flags; > + uint64_t proc_ctx_gpu_addr; > + uint64_t gang_ctx_gpu_addr; > + uint64_t fw_ctx_gpu_addr; Is there a way we could store these in some gfx11 structure? These are specific to gfx11 and other IPs may have other metadata buffers they need to allocate. maybe subclass a gfx11 userq structure or add a priv ptr off of the the userq structure so IPs can add their implementation details there. Alex Alex > struct amdgpu_mqd_prop *userq_prop; > struct amdgpu_userq_mgr *userq_mgr; > struct amdgpu_vm *vm; > struct amdgpu_userq_obj mqd; > + struct amdgpu_userq_obj fw_obj; > }; > > struct amdgpu_userq_funcs { > -- > 2.40.1 >