Re: [PATCH 1/1] drm/amdgpu: rework context priority handling

Christian König <christian.koenig@xxxxxxx> · Wed, 25 Aug 2021 14:29:09 +0200

Am 25.08.21 um 14:20 schrieb Lazar, Lijo:
On 8/25/2021 4:52 PM, Nirmoy Das wrote:
To get a hardware queue priority for a context, we are currently
mapping AMDGPU_CTX_PRIORITY_* to DRM_SCHED_PRIORITY_* and then
to hardware queue priority, which is not the right way to do that
as DRM_SCHED_PRIORITY_* is software scheduler's priority and it is
independent from a hardware queue priority.

Use userspace provided context priority, AMDGPU_CTX_PRIORITY_* to
map a context to proper hardware queue priority.

Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 127 ++++++++++++++++------
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   |   8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c |  44 ++------
  3 files changed, 105 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index e7a010b7ca1f..c88c5c6c54a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -43,14 +43,61 @@ const unsigned int 
amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
      [AMDGPU_HW_IP_VCN_JPEG]    =    1,
  };
  +bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
+{
+    switch (ctx_prio) {
+    case AMDGPU_CTX_PRIORITY_UNSET:
+    case AMDGPU_CTX_PRIORITY_VERY_LOW:
+    case AMDGPU_CTX_PRIORITY_LOW:
+    case AMDGPU_CTX_PRIORITY_NORMAL:
+    case AMDGPU_CTX_PRIORITY_HIGH:
+    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static enum drm_sched_priority
+amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
+{
+    switch (ctx_prio) {
+    case AMDGPU_CTX_PRIORITY_UNSET:
+        return DRM_SCHED_PRIORITY_UNSET;
+
+    case AMDGPU_CTX_PRIORITY_VERY_LOW:
+        return DRM_SCHED_PRIORITY_MIN;
+
+    case AMDGPU_CTX_PRIORITY_LOW:
+        return DRM_SCHED_PRIORITY_MIN;
+
+    case AMDGPU_CTX_PRIORITY_NORMAL:
+        return DRM_SCHED_PRIORITY_NORMAL;
+
+    case AMDGPU_CTX_PRIORITY_HIGH:
+        return DRM_SCHED_PRIORITY_HIGH;
+
+    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+        return DRM_SCHED_PRIORITY_HIGH;
+
+    /* This should not happen as we sanitized userspace provided 
priority
+     * already, WARN if this happens.
+     */
+    default:
+        WARN(1, "Invalid context priority %d\n", ctx_prio);
+        return DRM_SCHED_PRIORITY_NORMAL;
+    }
+
+}
+
  static int amdgpu_ctx_priority_permit(struct drm_file *filp,
-                      enum drm_sched_priority priority)
+                      int32_t priority)
  {
-    if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
+    if (!amdgpu_ctx_priority_is_valid(priority))
          return -EINVAL;
        /* NORMAL and below are accessible by everyone */
-    if (priority <= DRM_SCHED_PRIORITY_NORMAL)
+    if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
          return 0;
        if (capable(CAP_SYS_NICE))
@@ -62,26 +109,35 @@ static int amdgpu_ctx_priority_permit(struct 
drm_file *filp,
      return -EACCES;
  }
  -static enum gfx_pipe_priority 
amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
+static enum gfx_pipe_priority 
amdgpu_ctx_prio_to_compute_prio(int32_t prio)
  {
      switch (prio) {
-    case DRM_SCHED_PRIORITY_HIGH:
-    case DRM_SCHED_PRIORITY_KERNEL:
+    case AMDGPU_CTX_PRIORITY_HIGH:
+    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
          return AMDGPU_GFX_PIPE_PRIO_HIGH;
      default:
          return AMDGPU_GFX_PIPE_PRIO_NORMAL;
      }
  }
  -static unsigned int amdgpu_ctx_prio_sched_to_hw(struct 
amdgpu_device *adev,
-                         enum drm_sched_priority prio,
-                         u32 hw_ip)
+static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, 
u32 hw_ip)
  {
+    struct amdgpu_device *adev = ctx->adev;
+    int32_t ctx_prio;
      unsigned int hw_prio;
  -    hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
-            amdgpu_ctx_sched_prio_to_compute_prio(prio) :
-            AMDGPU_RING_PRIO_DEFAULT;
+    ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+            ctx->init_priority : ctx->override_priority;
+
+    switch (hw_ip) {
+    case AMDGPU_HW_IP_COMPUTE:
+        hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
+        break;
+    default:
+        hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+        break;
+    }
+
      hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
      if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
          hw_prio = AMDGPU_RING_PRIO_DEFAULT;
@@ -89,15 +145,17 @@ static unsigned int 
amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
      return hw_prio;
  }
  +
  static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
-                   const u32 ring)
+                  const u32 ring)
  {
      struct amdgpu_device *adev = ctx->adev;
      struct amdgpu_ctx_entity *entity;
      struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
      unsigned num_scheds = 0;
+    int32_t ctx_prio;
      unsigned int hw_prio;
-    enum drm_sched_priority priority;
+    enum drm_sched_priority drm_prio;
      int r;
        entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
@@ -105,10 +163,11 @@ static int amdgpu_ctx_init_entity(struct 
amdgpu_ctx *ctx, u32 hw_ip,
      if (!entity)
          return  -ENOMEM;
  +    ctx_prio = (ctx->override_priority == 
AMDGPU_CTX_PRIORITY_UNSET) ?
+            ctx->init_priority : ctx->override_priority;
      entity->sequence = 1;
-    priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
-                ctx->init_priority : ctx->override_priority;
-    hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);
+    hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
+    drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
        hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
      scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
@@ -124,7 +183,7 @@ static int amdgpu_ctx_init_entity(struct 
amdgpu_ctx *ctx, u32 hw_ip,
          num_scheds = 1;
      }
  -    r = drm_sched_entity_init(&entity->entity, priority, scheds, 
num_scheds,
+    r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, 
num_scheds,
                    &ctx->guilty);
      if (r)
          goto error_free_entity;
@@ -139,7 +198,7 @@ static int amdgpu_ctx_init_entity(struct 
amdgpu_ctx *ctx, u32 hw_ip,
  }
    static int amdgpu_ctx_init(struct amdgpu_device *adev,
-               enum drm_sched_priority priority,
+               int32_t priority,
                 struct drm_file *filp,
                 struct amdgpu_ctx *ctx)
  {
@@ -161,7 +220,7 @@ static int amdgpu_ctx_init(struct amdgpu_device 
*adev,
      ctx->reset_counter_query = ctx->reset_counter;
      ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
      ctx->init_priority = priority;
-    ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+    ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
        return 0;
  }
@@ -234,7 +293,7 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, 
u32 hw_ip, u32 instance,
  static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
                  struct amdgpu_fpriv *fpriv,
                  struct drm_file *filp,
-                enum drm_sched_priority priority,
+                int32_t priority,
                  uint32_t *id)
  {
      struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
@@ -397,19 +456,19 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, 
void *data,
  {
      int r;
      uint32_t id;
-    enum drm_sched_priority priority;
+    int32_t priority;
        union drm_amdgpu_ctx *args = data;
      struct amdgpu_device *adev = drm_to_adev(dev);
      struct amdgpu_fpriv *fpriv = filp->driver_priv;
        id = args->in.ctx_id;
-    r = amdgpu_to_sched_priority(args->in.priority, &priority);
+    priority = args->in.priority;
        /* For backwards compatibility reasons, we need to accept
       * ioctls with garbage in the priority field */
-    if (r == -EINVAL)
-        priority = DRM_SCHED_PRIORITY_NORMAL;
+    if (!amdgpu_ctx_priority_is_valid(priority))
+        priority = AMDGPU_CTX_PRIORITY_NORMAL;
        switch (args->in.op) {
      case AMDGPU_CTX_OP_ALLOC_CTX:
@@ -515,9 +574,9 @@ struct dma_fence *amdgpu_ctx_get_fence(struct 
amdgpu_ctx *ctx,
  }
    static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
-                        struct amdgpu_ctx_entity *aentity,
-                        int hw_ip,
-                        enum drm_sched_priority priority)
+                       struct amdgpu_ctx_entity *aentity,
+                       int hw_ip,
+                       int32_t priority)
  {
      struct amdgpu_device *adev = ctx->adev;
      unsigned int hw_prio;
@@ -525,12 +584,12 @@ static void 
amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
      unsigned num_scheds;
        /* set sw priority */
-    drm_sched_entity_set_priority(&aentity->entity, priority);
+    drm_sched_entity_set_priority(&aentity->entity,
+                      amdgpu_ctx_to_drm_sched_prio(priority));
        /* set hw priority */
      if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
-        hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
-                              AMDGPU_HW_IP_COMPUTE);
+        hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
          hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);

Not related to this patch, but this kind of logic may break some day. 
There is a HWIP specific priority and there is another RING_PRIO which 
is unmapped to HWIP priority Ex: when a new priority is added for VCN 
which is higher than any of the existing priorities.

Yes, that's something I've noticed as well.

Either we should leave the exact mapping to the engine or have a global 
definition of possible hw priorities (the later is preferable I think).

Christian.


Thanks,
Lijo

          scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
          num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
@@ -540,14 +599,14 @@ static void 
amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
  }
    void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
-                  enum drm_sched_priority priority)
+                  int32_t priority)
  {
-    enum drm_sched_priority ctx_prio;
+    int32_t ctx_prio;
      unsigned i, j;
        ctx->override_priority = priority;
  -    ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+    ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
              ctx->init_priority : ctx->override_priority;
      for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
          for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 14db16bc3322..a44b8b8ed39c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -47,8 +47,8 @@ struct amdgpu_ctx {
      spinlock_t            ring_lock;
      struct amdgpu_ctx_entity 
*entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
      bool                preamble_presented;
-    enum drm_sched_priority        init_priority;
-    enum drm_sched_priority        override_priority;
+    int32_t                init_priority;
+    int32_t                override_priority;
      struct mutex            lock;
      atomic_t            guilty;
      unsigned long            ras_counter_ce;
@@ -75,8 +75,8 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
  struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
                         struct drm_sched_entity *entity,
                         uint64_t seq);
-void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
-                  enum drm_sched_priority priority);
+bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio);
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int32_t 
ctx_prio);
    int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
               struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index b7d861ed5284..e9b45089a28a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -32,37 +32,9 @@
  #include "amdgpu_sched.h"
  #include "amdgpu_vm.h"
  -int amdgpu_to_sched_priority(int amdgpu_priority,
-                 enum drm_sched_priority *prio)
-{
-    switch (amdgpu_priority) {
-    case AMDGPU_CTX_PRIORITY_VERY_HIGH:
-        *prio = DRM_SCHED_PRIORITY_HIGH;
-        break;
-    case AMDGPU_CTX_PRIORITY_HIGH:
-        *prio = DRM_SCHED_PRIORITY_HIGH;
-        break;
-    case AMDGPU_CTX_PRIORITY_NORMAL:
-        *prio = DRM_SCHED_PRIORITY_NORMAL;
-        break;
-    case AMDGPU_CTX_PRIORITY_LOW:
-    case AMDGPU_CTX_PRIORITY_VERY_LOW:
-        *prio = DRM_SCHED_PRIORITY_MIN;
-        break;
-    case AMDGPU_CTX_PRIORITY_UNSET:
-        *prio = DRM_SCHED_PRIORITY_UNSET;
-        break;
-    default:
-        WARN(1, "Invalid context priority %d\n", amdgpu_priority);
-        return -EINVAL;
-    }
-
-    return 0;
-}
-
  static int amdgpu_sched_process_priority_override(struct 
amdgpu_device *adev,
                            int fd,
-                          enum drm_sched_priority priority)
+                          int32_t priority)
  {
      struct fd f = fdget(fd);
      struct amdgpu_fpriv *fpriv;
@@ -89,7 +61,7 @@ static int 
amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
  static int amdgpu_sched_context_priority_override(struct 
amdgpu_device *adev,
                            int fd,
                            unsigned ctx_id,
-                          enum drm_sched_priority priority)
+                          int32_t priority)
  {
      struct fd f = fdget(fd);
      struct amdgpu_fpriv *fpriv;
@@ -124,7 +96,6 @@ int amdgpu_sched_ioctl(struct drm_device *dev, 
void *data,
  {
      union drm_amdgpu_sched *args = data;
      struct amdgpu_device *adev = drm_to_adev(dev);
-    enum drm_sched_priority priority;
      int r;
        /* First check the op, then the op's argument.
@@ -138,21 +109,22 @@ int amdgpu_sched_ioctl(struct drm_device *dev, 
void *data,
          return -EINVAL;
      }
  -    r = amdgpu_to_sched_priority(args->in.priority, &priority);
-    if (r)
-        return r;
+    if (!amdgpu_ctx_priority_is_valid(args->in.priority)) {
+        WARN(1, "Invalid context priority %d\n", args->in.priority);
+        return -EINVAL;
+    }
        switch (args->in.op) {
      case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
          r = amdgpu_sched_process_priority_override(adev,
                                 args->in.fd,
-                               priority);
+                               args->in.priority);
          break;
      case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
          r = amdgpu_sched_context_priority_override(adev,
                                 args->in.fd,
                                 args->in.ctx_id,
-                               priority);
+                               args->in.priority);
          break;
      default:
          /* Impossible.