Re: [PATCH v9 01/14] drm/amdgpu: UAPI for user queue management

"Sharma, Shashank" <shashank.sharma@xxxxxxx> · Thu, 2 May 2024 14:53:19 +0200

On 02/05/2024 07:23, Sharma, Shashank wrote:
Hey Alex,

On 01/05/2024 22:39, Alex Deucher wrote:
On Fri, Apr 26, 2024 at 10:07 AM Shashank Sharma
<shashank.sharma@xxxxxxx> wrote:
From: Alex Deucher <alexander.deucher@xxxxxxx>

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

V2: Addressed review comments from Alex and Christian
     - Make the doorbell offset's comment clearer
     - Change the output parameter name to queue_id

V3: Integration with doorbell manager

V4:
     - Updated the UAPI doc (Pierre-Eric)
     - Created a Union for engine specific MQDs (Alex)
     - Added Christian's R-B
V5:
     - Add variables for GDS and CSA in MQD structure (Alex)
     - Make MQD data a ptr-size pair instead of union (Alex)

V9:
    - renamed struct drm_amdgpu_userq_mqd_gfx_v11 to struct
      drm_amdgpu_userq_mqd as its being used for SDMA and
      compute queues as well

Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Reviewed-by: Christian König <christian.koenig@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
---
  include/uapi/drm/amdgpu_drm.h | 110 
++++++++++++++++++++++++++++++++++
  1 file changed, 110 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h
index 96e32dafd4f0..22f56a30f7cb 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
  #define DRM_AMDGPU_VM                  0x13
  #define DRM_AMDGPU_FENCE_TO_HANDLE     0x14
  #define DRM_AMDGPU_SCHED               0x15
+#define DRM_AMDGPU_USERQ               0x16

  #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
  #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
  #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_VM, union drm_amdgpu_vm)
  #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE 
+ DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
  #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ, union drm_amdgpu_userq)

  /**
   * DOC: memory domains
@@ -317,6 +319,114 @@ union drm_amdgpu_ctx {
         union drm_amdgpu_ctx_out out;
  };

+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE 1
+#define AMDGPU_USERQ_OP_FREE   2
+
+/* Flag to indicate secure buffer related workload, unused for now */
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
+/* Flag to indicate AQL workload, unused for now */
+#define AMDGPU_USERQ_MQD_FLAGS_AQL     (1 << 1)
+
+/*
+ * MQD (memory queue descriptor) is a set of parameters which allow
+ * the GPU to uniquely define and identify a usermode queue. This
+ * structure defines the MQD for GFX-V11 IP ver 0.
+ */
+struct drm_amdgpu_userq_mqd {
Maybe rename this to drm_amdgpu_gfx_userq_mqd since it's gfx specific.
Then we can add different MQDs for SDMA, compute, etc. as they have
different metadata.  E.g., the shadow and CSA are gfx only.


Actually this was named drm_amdgpu_userq_mqd_gfx_v11_0 until the last 
patchset, but then I realized that apart from the objects (gds/shadow 
va) nothing is gfx specific, its actually required for every userqueue 
IP which is MES based, so I thought it would be an overkill to create 
multiple structures for almost the same data. If you feel strong about 
this, I can change it again.

- Shashank


Please ignore my last comment, I understand what you are mentioning, and 
I have reformatted the patches accordingly. Now, I am keeping everything 
reqd for MES in one basic struture (drm_amdgpu_userq_in) and creating  
drm_amdgpu_userq_mqd_gfx_v11 for GFX specific things (like CSA, Shadow 
and GDS areas). Now there will be one separate patch which will enabled 
GFX_IP on MES code, just like how we have separate patches for SDMA and 
Compute IP in this series.  I will send the V10 patches with this 
reformatting in some time.

- Shashank


Alex


+       /**
+        * @queue_va: Virtual address of the GPU memory which holds 
the queue
+        * object. The queue holds the workload packets.
+        */
+       __u64   queue_va;
+       /**
+        * @queue_size: Size of the queue in bytes, this needs to be 
256-byte
+        * aligned.
+        */
+       __u64   queue_size;
+       /**
+        * @rptr_va : Virtual address of the GPU memory which holds 
the ring RPTR.
+        * This object must be at least 8 byte in size and aligned 
to 8-byte offset.
+        */
+       __u64   rptr_va;
+       /**
+        * @wptr_va : Virtual address of the GPU memory which holds 
the ring WPTR.
+        * This object must be at least 8 byte in size and aligned 
to 8-byte offset.
+        *
+        * Queue, RPTR and WPTR can come from the same object, as 
long as the size
+        * and alignment related requirements are met.
+        */
+       __u64   wptr_va;
+       /**
+        * @shadow_va: Virtual address of the GPU memory to hold the 
shadow buffer.
+        * This must be a from a separate GPU object, and must be at 
least 4-page
+        * sized.
+        */
+       __u64   shadow_va;
+       /**
+        * @gds_va: Virtual address of the GPU memory to hold the 
GDS buffer.
+        * This must be a from a separate GPU object, and must be at 
least 1-page
+        * sized.
+        */
+       __u64   gds_va;
+       /**
+        * @csa_va: Virtual address of the GPU memory to hold the 
CSA buffer.
+        * This must be a from a separate GPU object, and must be at 
least 1-page
+        * sized.
+        */
+       __u64   csa_va;
+};
+
+struct drm_amdgpu_userq_in {
+       /** AMDGPU_USERQ_OP_* */
+       __u32   op;
+       /** Queue handle for USERQ_OP_FREE */
+       __u32   queue_id;
+       /** the target GPU engine to execute workload 
(AMDGPU_HW_IP_*) */
+       __u32   ip_type;
+       /**
+        * @flags: flags to indicate special function for queue like 
secure
+        * buffer (TMZ). Unused for now.
+        */
+       __u32   flags;
+       /**
+        * @doorbell_handle: the handle of doorbell GEM object
+        * associated to this client.
+        */
+       __u32   doorbell_handle;
+       /**
+        * @doorbell_offset: 32-bit offset of the doorbell in the 
doorbell bo.
+        * Kernel will generate absolute doorbell offset using 
doorbell_handle
+        * and doorbell_offset in the doorbell bo.
+        */
+       __u32   doorbell_offset;
+       /**
+        * @mqd: Queue descriptor for USERQ_OP_CREATE
+        * MQD data can be of different size for different GPU 
IP/engine and
+        * their respective versions/revisions, so this points to a 
__u64 *
+        * which holds MQD of this usermode queue.
+        */
+       __u64 mqd;
+       /**
+        * @size: size of MQD data in bytes, it must match the MQD 
structure
+        * size of the respective engine/revision defined in UAPI 
for ex, for
+        * gfx_v11 workloads, size = 
sizeof(drm_amdgpu_userq_mqd_gfx_v11).
+        */
+       __u64 mqd_size;
+};
+
+struct drm_amdgpu_userq_out {
+       /** Queue handle */
+       __u32   queue_id;
+       /** Flags */
+       __u32   flags;
+};
+
+union drm_amdgpu_userq {
+       struct drm_amdgpu_userq_in in;
+       struct drm_amdgpu_userq_out out;
+};
+
  /* vm ioctl */
  #define AMDGPU_VM_OP_RESERVE_VMID      1
  #define AMDGPU_VM_OP_UNRESERVE_VMID    2
--
2.43.2