[PATCH 1/4] drm/amdgpu: add parameter to allocate high priority contexts v7

andresx7@xxxxxxxxx (Andres Rodriguez) · Fri, 14 Apr 2017 15:18:44 -0400

On 2017-04-14 03:15 PM, Andres Rodriguez wrote:
> MrCooper just pointed me to the following EGL extension last night:
> https://www.khronos.org/registry/EGL/extensions/IMG/EGL_IMG_context_priority.txt
>
>
> I think it might be a good idea to add an AMDGPU_CTX_PRIORITY_LOW to the
> kernel ABI in this patch in order to accommodate that extension.
>

Also, the query mechanism can be implemented fully in usermode. I don't 
think we would require any API in the kernel to return the real priority.

If a user requests a priority that is unsupported we can just return an 
error and usermode can retry creating the context with a different priority.

> Regards,
> Andres
>
> On 2017-04-13 06:30 PM, Andres Rodriguez wrote:
>> Add a new context creation parameter to express a global context
>> priority.
>>
>> Contexts allocated with AMDGPU_CTX_PRIORITY_HIGH will receive higher
>> priority to schedule their work than AMDGPU_CTX_PRIORITY_NORMAL
>> (default) contexts.
>>
>> v2: Instead of using flags, repurpose __pad
>> v3: Swap enum values of _NORMAL _HIGH for backwards compatibility
>> v4: Validate usermode priority and store it
>> v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword
>> v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN
>> v7: remove ctx->priority
>>
>> Reviewed-by: Emil Velikov <emil.l.velikov at gmail.com>
>> Reviewed-by: Christian KÃ¶nig <christian.koenig at amd.com>
>> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       | 36
>> ++++++++++++++++++++++++---
>>  drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  1 +
>>  include/uapi/drm/amdgpu_drm.h                 |  8 +++++-
>>  3 files changed, 40 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 1969f27..df6fc9d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -8,67 +8,75 @@
>>   * and/or sell copies of the Software, and to permit persons to whom the
>>   * Software is furnished to do so, subject to the following conditions:
>>   *
>>   * The above copyright notice and this permission notice shall be
>> included in
>>   * all copies or substantial portions of the Software.
>>   *
>>   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>>   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>>   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> SHALL
>>   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>> DAMAGES OR
>>   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>>   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>>   * OTHER DEALINGS IN THE SOFTWARE.
>>   *
>>   * Authors: monk liu <monk.liu at amd.com>
>>   */
>>
>>  #include <drm/drmP.h>
>>  #include "amdgpu.h"
>>
>> -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct
>> amdgpu_ctx *ctx)
>> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
>> +               enum amd_sched_priority priority,
>> +               struct amdgpu_ctx *ctx)
>>  {
>>      unsigned i, j;
>>      int r;
>>
>> +    if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
>> +        return -EINVAL;
>> +
>> +    if (priority == AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_ADMIN))
>> +        return -EACCES;
>> +
>>      memset(ctx, 0, sizeof(*ctx));
>>      ctx->adev = adev;
>>      kref_init(&ctx->refcount);
>>      spin_lock_init(&ctx->ring_lock);
>>      ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
>>                    sizeof(struct dma_fence*), GFP_KERNEL);
>>      if (!ctx->fences)
>>          return -ENOMEM;
>>
>>      for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>          ctx->rings[i].sequence = 1;
>>          ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
>>      }
>>
>>      ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
>>
>>      /* create context entity for each ring */
>>      for (i = 0; i < adev->num_rings; i++) {
>>          struct amdgpu_ring *ring = adev->rings[i];
>>          struct amd_sched_rq *rq;
>>
>> -        rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
>> +        rq = &ring->sched.sched_rq[priority];
>>          r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
>>                        rq, amdgpu_sched_jobs);
>>          if (r)
>>              goto failed;
>>      }
>>
>>      r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
>>      if (r)
>>          goto failed;
>>
>>      return 0;
>>
>>  failed:
>>      for (j = 0; j < i; j++)
>>          amd_sched_entity_fini(&adev->rings[j]->sched,
>>                        &ctx->rings[j].entity);
>>      kfree(ctx->fences);
>>      ctx->fences = NULL;
>>      return r;
>>  }
>> @@ -79,59 +87,61 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>>      unsigned i, j;
>>
>>      if (!adev)
>>          return;
>>
>>      for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
>>          for (j = 0; j < amdgpu_sched_jobs; ++j)
>>              dma_fence_put(ctx->rings[i].fences[j]);
>>      kfree(ctx->fences);
>>      ctx->fences = NULL;
>>
>>      for (i = 0; i < adev->num_rings; i++)
>>          amd_sched_entity_fini(&adev->rings[i]->sched,
>>                        &ctx->rings[i].entity);
>>
>>      amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
>>  }
>>
>>  static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>>                  struct amdgpu_fpriv *fpriv,
>> +                enum amd_sched_priority priority,
>>                  uint32_t *id)
>>  {
>>      struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
>>      struct amdgpu_ctx *ctx;
>>      int r;
>>
>>      ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
>>      if (!ctx)
>>          return -ENOMEM;
>>
>>      mutex_lock(&mgr->lock);
>>      r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
>>      if (r < 0) {
>>          mutex_unlock(&mgr->lock);
>>          kfree(ctx);
>>          return r;
>>      }
>> +
>>      *id = (uint32_t)r;
>> -    r = amdgpu_ctx_init(adev, ctx);
>> +    r = amdgpu_ctx_init(adev, priority, ctx);
>>      if (r) {
>>          idr_remove(&mgr->ctx_handles, *id);
>>          *id = 0;
>>          kfree(ctx);
>>      }
>>      mutex_unlock(&mgr->lock);
>>      return r;
>>  }
>>
>>  static void amdgpu_ctx_do_release(struct kref *ref)
>>  {
>>      struct amdgpu_ctx *ctx;
>>
>>      ctx = container_of(ref, struct amdgpu_ctx, refcount);
>>
>>      amdgpu_ctx_fini(ctx);
>>
>>      kfree(ctx);
>>  }
>>
>> @@ -167,56 +177,74 @@ static int amdgpu_ctx_query(struct amdgpu_device
>> *adev,
>>          return -EINVAL;
>>      }
>>
>>      /* TODO: these two are always zero */
>>      out->state.flags = 0x0;
>>      out->state.hangs = 0x0;
>>
>>      /* determine if a GPU reset has occured since the last call */
>>      reset_counter = atomic_read(&adev->gpu_reset_counter);
>>      /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
>>      if (ctx->reset_counter == reset_counter)
>>          out->state.reset_status = AMDGPU_CTX_NO_RESET;
>>      else
>>          out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
>>      ctx->reset_counter = reset_counter;
>>
>>      mutex_unlock(&mgr->lock);
>>      return 0;
>>  }
>>
>> +static enum amd_sched_priority amdgpu_to_sched_priority(int
>> amdgpu_priority)
>> +{
>> +    switch (amdgpu_priority) {
>> +    case AMDGPU_CTX_PRIORITY_HIGH:
>> +        return AMD_SCHED_PRIORITY_HIGH;
>> +    case AMDGPU_CTX_PRIORITY_NORMAL:
>> +        return AMD_SCHED_PRIORITY_NORMAL;
>> +    default:
>> +        WARN(1, "Invalid context priority %d\n", amdgpu_priority);
>> +        return AMD_SCHED_PRIORITY_NORMAL;
>> +    }
>> +}
>> +
>>  int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
>>               struct drm_file *filp)
>>  {
>>      int r;
>>      uint32_t id;
>> +    enum amd_sched_priority priority;
>>
>>      union drm_amdgpu_ctx *args = data;
>>      struct amdgpu_device *adev = dev->dev_private;
>>      struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>
>>      r = 0;
>>      id = args->in.ctx_id;
>> +    priority = amdgpu_to_sched_priority(args->in.priority);
>> +
>> +    if (priority >= AMD_SCHED_PRIORITY_MAX)
>> +        return -EINVAL;
>>
>>      switch (args->in.op) {
>>      case AMDGPU_CTX_OP_ALLOC_CTX:
>> -        r = amdgpu_ctx_alloc(adev, fpriv, &id);
>> +        r = amdgpu_ctx_alloc(adev, fpriv, priority, &id);
>>          args->out.alloc.ctx_id = id;
>>          break;
>>      case AMDGPU_CTX_OP_FREE_CTX:
>>          r = amdgpu_ctx_free(fpriv, id);
>>          break;
>>      case AMDGPU_CTX_OP_QUERY_STATE:
>>          r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
>>          break;
>>      default:
>>          return -EINVAL;
>>      }
>>
>>      return r;
>>  }
>>
>>  struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv,
>> uint32_t id)
>>  {
>>      struct amdgpu_ctx *ctx;
>>      struct amdgpu_ctx_mgr *mgr;
>>
>> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>> index 0255c7f..e266e1e 100644
>> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>> @@ -93,40 +93,41 @@ static inline struct amd_sched_fence
>> *to_amd_sched_fence(struct dma_fence *f)
>>      if (f->ops == &amd_sched_fence_ops_finished)
>>          return container_of(f, struct amd_sched_fence, finished);
>>
>>      return NULL;
>>  }
>>
>>  /**
>>   * Define the backend operations called by the scheduler,
>>   * these functions should be implemented in driver side
>>  */
>>  struct amd_sched_backend_ops {
>>      struct dma_fence *(*dependency)(struct amd_sched_job *sched_job);
>>      struct dma_fence *(*run_job)(struct amd_sched_job *sched_job);
>>      void (*timedout_job)(struct amd_sched_job *sched_job);
>>      void (*free_job)(struct amd_sched_job *sched_job);
>>  };
>>
>>  enum amd_sched_priority {
>>      AMD_SCHED_PRIORITY_MIN,
>>      AMD_SCHED_PRIORITY_NORMAL = AMD_SCHED_PRIORITY_MIN,
>> +    AMD_SCHED_PRIORITY_HIGH,
>>      AMD_SCHED_PRIORITY_KERNEL,
>>      AMD_SCHED_PRIORITY_MAX
>>  };
>>
>>  /**
>>   * One scheduler is implemented for each hardware ring
>>  */
>>  struct amd_gpu_scheduler {
>>      const struct amd_sched_backend_ops    *ops;
>>      uint32_t            hw_submission_limit;
>>      long                timeout;
>>      const char            *name;
>>      struct amd_sched_rq        sched_rq[AMD_SCHED_PRIORITY_MAX];
>>      wait_queue_head_t        wake_up_worker;
>>      wait_queue_head_t        job_scheduled;
>>      atomic_t            hw_rq_count;
>>      atomic64_t            job_id_count;
>>      struct task_struct        *thread;
>>      struct list_head    ring_mirror_list;
>>      spinlock_t            job_list_lock;
>> diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h
>> index 95260e5..a334598 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -143,47 +143,53 @@ struct drm_amdgpu_bo_list_out {
>>
>>  union drm_amdgpu_bo_list {
>>      struct drm_amdgpu_bo_list_in in;
>>      struct drm_amdgpu_bo_list_out out;
>>  };
>>
>>  /* context related */
>>  #define AMDGPU_CTX_OP_ALLOC_CTX    1
>>  #define AMDGPU_CTX_OP_FREE_CTX    2
>>  #define AMDGPU_CTX_OP_QUERY_STATE    3
>>
>>  /* GPU reset status */
>>  #define AMDGPU_CTX_NO_RESET        0
>>  /* this the context caused it */
>>  #define AMDGPU_CTX_GUILTY_RESET        1
>>  /* some other context caused it */
>>  #define AMDGPU_CTX_INNOCENT_RESET    2
>>  /* unknown cause */
>>  #define AMDGPU_CTX_UNKNOWN_RESET    3
>>
>> +/* Context priority level */
>> +#define AMDGPU_CTX_PRIORITY_NORMAL    0
>> +/* Selecting a priority above NORMAL requires CAP_SYS_ADMIN */
>> +#define AMDGPU_CTX_PRIORITY_HIGH    1
>> +#define AMDGPU_CTX_PRIORITY_NUM        2
>> +
>>  struct drm_amdgpu_ctx_in {
>>      /** AMDGPU_CTX_OP_* */
>>      __u32    op;
>>      /** For future use, no flags defined so far */
>>      __u32    flags;
>>      __u32    ctx_id;
>> -    __u32    _pad;
>> +    __u32    priority;
>>  };
>>
>>  union drm_amdgpu_ctx_out {
>>          struct {
>>              __u32    ctx_id;
>>              __u32    _pad;
>>          } alloc;
>>
>>          struct {
>>              /** For future use, no flags defined so far */
>>              __u64    flags;
>>              /** Number of resets caused by this context so far. */
>>              __u32    hangs;
>>              /** Reset status since the last call of the ioctl. */
>>              __u32    reset_status;
>>          } state;
>>  };
>>
>>  union drm_amdgpu_ctx {
>>      struct drm_amdgpu_ctx_in in;
>>