Re: [PATCH v3 1/9] drm/amdgpu: UAPI for user queue management

Alex Deucher <alexdeucher@xxxxxxxxx> · Wed, 29 Mar 2023 15:46:03 -0400

On Wed, Mar 29, 2023 at 3:21 PM Shashank Sharma <shashank.sharma@xxxxxxx> wrote:
>
> Hey Alex,
>
> On 29/03/2023 19:57, Alex Deucher wrote:
> > On Wed, Mar 29, 2023 at 12:05 PM Shashank Sharma
> > <shashank.sharma@xxxxxxx> wrote:
> >> From: Alex Deucher <alexander.deucher@xxxxxxx>
> >>
> >> This patch intorduces new UAPI/IOCTL for usermode graphics
> >> queue. The userspace app will fill this structure and request
> >> the graphics driver to add a graphics work queue for it. The
> >> output of this UAPI is a queue id.
> >>
> >> This UAPI maps the queue into GPU, so the graphics app can start
> >> submitting work to the queue as soon as the call returns.
> >>
> >> V2: Addressed review comments from Alex and Christian
> >>      - Make the doorbell offset's comment clearer
> >>      - Change the output parameter name to queue_id
> >> V3: Integration with doorbell manager
> >>
> >> Cc: Alex Deucher <alexander.deucher@xxxxxxx>
> >> Cc: Christian Koenig <christian.koenig@xxxxxxx>
> >> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
> >> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
> >> ---
> >>   include/uapi/drm/amdgpu_drm.h | 55 +++++++++++++++++++++++++++++++++++
> >>   1 file changed, 55 insertions(+)
> >>
> >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >> index cc5d551abda5..e4943099b9d2 100644
> >> --- a/include/uapi/drm/amdgpu_drm.h
> >> +++ b/include/uapi/drm/amdgpu_drm.h
> >> @@ -54,6 +54,7 @@ extern "C" {
> >>   #define DRM_AMDGPU_VM                  0x13
> >>   #define DRM_AMDGPU_FENCE_TO_HANDLE     0x14
> >>   #define DRM_AMDGPU_SCHED               0x15
> >> +#define DRM_AMDGPU_USERQ               0x16
> >>
> >>   #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> >>   #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> >> @@ -71,6 +72,7 @@ extern "C" {
> >>   #define DRM_IOCTL_AMDGPU_VM            DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
> >>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >>   #define DRM_IOCTL_AMDGPU_SCHED         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >> +#define DRM_IOCTL_AMDGPU_USERQ         DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
> >>
> >>   /**
> >>    * DOC: memory domains
> >> @@ -307,6 +309,59 @@ union drm_amdgpu_ctx {
> >>          union drm_amdgpu_ctx_out out;
> >>   };
> >>
> >> +/* user queue IOCTL */
> >> +#define AMDGPU_USERQ_OP_CREATE 1
> >> +#define AMDGPU_USERQ_OP_FREE   2
> >> +
> >> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
> >> +#define AMDGPU_USERQ_MQD_FLAGS_AQL     (1 << 1)
> >> +
> >> +struct drm_amdgpu_userq_mqd {
> >> +       /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
> >> +       __u32   flags;
> >> +       /** IP type: AMDGPU_HW_IP_* */
> >> +       __u32   ip_type;
> >> +       /** GEM object handle */
> >> +       __u32   doorbell_handle;
> >> +       /** Doorbell's offset in the doorbell bo */
> >> +       __u32   doorbell_offset;
> >> +       /** GPU virtual address of the queue */
> >> +       __u64   queue_va;
> >> +       /** Size of the queue in bytes */
> >> +       __u64   queue_size;
> >> +       /** GPU virtual address of the rptr */
> >> +       __u64   rptr_va;
> >> +       /** GPU virtual address of the wptr */
> >> +       __u64   wptr_va;
> >> +       /** GPU virtual address of the shadow context space */
> >> +       __u64   shadow_va;
> >> +};
> > We may want to make the MQD engine specific.  E.g., shadow is gfx
> > specific.  We also probably need the csa and gds buffers for gfx as
> > well.  Other engines may have their own additional buffer
> > requirements.
> >
> > Alex
>
> Sure, we can call it drm_amdgpu_userq_mqd_gfx to clarify that this MQD
> is specific to GFX engine.

We can make it a union and then add additional entries for SDMA,
compute, and VCN.  We should also move the IP type into struct
drm_amdgpu_userq_in so we know how to interpret the union.  Or make it
a u64 and handle it similarly to the chunks interface in
drm_amdgpu_cs_chunk.

Alex

>
> - Shashank
>
> >
> >
> >> +
> >> +struct drm_amdgpu_userq_in {
> >> +       /** AMDGPU_USERQ_OP_* */
> >> +       __u32   op;
> >> +       /** Flags */
> >> +       __u32   flags;
> >> +       /** Queue handle to associate the queue free call with,
> >> +        * unused for queue create calls */
> >> +       __u32   queue_id;
> >> +       __u32   pad;
> >> +       /** Queue descriptor */
> >> +       struct drm_amdgpu_userq_mqd mqd;
> >> +};
> >> +
> >> +struct drm_amdgpu_userq_out {
> >> +       /** Queue handle */
> >> +       __u32   queue_id;
> >> +       /** Flags */
> >> +       __u32   flags;
> >> +};
> >> +
> >> +union drm_amdgpu_userq {
> >> +       struct drm_amdgpu_userq_in in;
> >> +       struct drm_amdgpu_userq_out out;
> >> +};
> >> +
> >>   /* vm ioctl */
> >>   #define AMDGPU_VM_OP_RESERVE_VMID      1
> >>   #define AMDGPU_VM_OP_UNRESERVE_VMID    2
> >> --
> >> 2.40.0
> >>