[PATCH] drm/amdgpu: add parameter to allocate high priority contexts v9

nhaehnle@xxxxxxxxx (Nicolai Hähnle) · Sat, 29 Apr 2017 19:45:10 +0200

On 29.04.2017 18:30, Andres Rodriguez wrote:
>
>
> On 2017-04-29 04:34 AM, Nicolai HÃ¤hnle wrote:
>> Thanks for the update!
>>
>>
>> On 26.04.2017 03:10, Andres Rodriguez wrote:
>>> Add a new context creation parameter to express a global context
>>> priority.
>>>
>>> The priority ranking in descending order is as follows:
>>>  * AMDGPU_CTX_PRIOR ITY_HIGH
>>>  * AMDGPU_CTX_PRIORITY_NORMAL
>>>  * AMDGPU_CTX_PRIORITY_LOW
>>>
>>> The driver will attempt to schedule work to the hardware according to
>>> the priorities. No latency or throughput guarantees are provided by
>>> this patch.
>>>
>>> This interface intends to service the EGL_IMG_context_priority
>>> extension, and vulkan equivalents.
>>>
>>> v2: Instead of using flags, repurpose __pad
>>> v3: Swap enum values of _NORMAL _HIGH for backwards compatibility
>>> v4: Validate usermode priority and store it
>>> v5: Move priority validation into amdgpu_ctx_ioctl(), headline reword
>>> v6: add UAPI note regarding priorities requiring CAP_SYS_ADMIN
>>> v7: remove ctx->priority
>>> v8: added AMDGPU_CTX_PRIORITY_LOW, s/CAP_SYS_ADMIN/CAP_SYS_NICE
>>> v9: change the priority parameter to __s32
>>>
>>> Reviewed-by: Emil Velikov <emil.l.velikov at gmail.com>
>>> Reviewed-by: Christian KÃ¶nig <christian.koenig at amd.com>
>>> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       | 38
>>> ++++++++++++++++++++++++---
>>>  drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  4 ++-
>>>  include/uapi/drm/amdgpu_drm.h                 |  8 +++++-
>>>  3 files changed, 44 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index b4bbbb3..af75571 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -25,11 +25,19 @@
>>>  #include <drm/drmP.h>
>>>  #include "amdgpu.h"
>>>
>>> -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct
>>> amdgpu_ctx *ctx)
>>> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>> +               enum amd_sched_priority priority,
>>> +               struct amdgpu_ctx *ctx)
>>>  {
>>>      unsigned i, j;
>>>      int r;
>>>
>>> +    if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX)
>>> +        return -EINVAL;
>>> +
>>> +    if (priority >= AMD_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_NICE))
>>> +        return -EACCES;
>>> +
>>>      memset(ctx, 0, sizeof(*ctx));
>>>      ctx->adev = adev;
>>>      kref_init(&ctx->refcount);
>>> @@ -51,7 +59,7 @@ static int amdgpu_ctx_init(struct amdgpu_device
>>> *adev, struct amdgpu_ctx *ctx)
>>>          struct amdgpu_ring *ring = adev->rings[i];
>>>          struct amd_sched_rq *rq;
>>>
>>> -        rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
>>> +        rq = &ring->sched.sched_rq[priority];
>>>          r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
>>>                        rq, amdgpu_sched_jobs);
>>>          if (r)
>>> @@ -90,6 +98,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>>>
>>>  static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>>>                  struct amdgpu_fpriv *fpriv,
>>> +                enum amd_sched_priority priority,
>>>                  uint32_t *id)
>>>  {
>>>      struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
>>> @@ -107,8 +116,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device
>>> *adev,
>>>          kfree(ctx);
>>>          return r;
>>>      }
>>> +
>>>      *id = (uint32_t)r;
>>> -    r = amdgpu_ctx_init(adev, ctx);
>>> +    r = amdgpu_ctx_init(adev, priority, ctx);
>>>      if (r) {
>>>          idr_remove(&mgr->ctx_handles, *id);
>>>          *id = 0;
>>> @@ -182,11 +192,27 @@ static int amdgpu_ctx_query(struct amdgpu_device
>>> *adev,
>>>      return 0;
>>>  }
>>>
>>> +static enum amd_sched_priority amdgpu_to_sched_priority(int
>>> amdgpu_priority)
>>> +{
>>> +    switch (amdgpu_priority) {
>>> +    case AMDGPU_CTX_PRIORITY_HIGH:
>>> +        return AMD_SCHED_PRIORITY_HIGH;
>>> +    case AMDGPU_CTX_PRIORITY_NORMAL:
>>> +        return AMD_SCHED_PRIORITY_NORMAL;
>>> +    case AMDGPU_CTX_PRIORITY_LOW:
>>> +        return AMD_SCHED_PRIORITY_LOW;
>>
>> This needs to be changed now to support the range.
>>
>>
>
> I actually don't intend on the priority parameter to behave like a
> range. libdrm is expected to pass in HIGH/NORMAL/LOW, and nothing in
> between.

Okay, makes sense.

> The current version of the hardware only supports a handful of discrete
> priority configurations. So I would rather avoid creating the illusion
> that a priority of -333 is any different than 0.
>
> What I like about your suggestion of spreading out the values further
> apart (-1023, 0, 1023 vs -1, 0, +1), is that it gives us the option to
> add new priority values and keep everything ordered. Or, we could also
> expand into ranges and still maintain backwards compatibility (if the HW
> supports it).
>
> The EGL extension and the vulkan draft extension for context priorities
> also use discrete values. So I don't really see a case for pursuing
> range based priorities when the APIs and the HW don't support it.
>
>
>>> +    default:
>>> +        WARN(1, "Invalid context priority %d\n", amdgpu_priority);
>>> +        return AMD_SCHED_PRIORITY_NORMAL;
>>> +    }
>>> +}
>>> +
>>>  int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
>>>               struct drm_file *filp)
>>>  {
>>>      int r;
>>>      uint32_t id;
>>> +    enum amd_sched_priority priority;
>>>
>>>      union drm_amdgpu_ctx *args = data;
>>>      struct amdgpu_device *adev = dev->dev_private;
>>> @@ -194,10 +220,14 @@ int amdgpu_ctx_ioctl(struct drm_device *dev,
>>> void *data,
>>>
>>>      r = 0;
>>>      id = args->in.ctx_id;
>>> +    priority = amdgpu_to_sched_priority(args->in.priority);
>>> +
>>> +    if (priority >= AMD_SCHED_PRIORITY_MAX)
>>> +        return -EINVAL;
>>
>> We check ioctl parameters before using them. In this case the range
>> check should happen before all this. Misbehaving user-space programs
>> shouldn't be able to run into the WARN in amdgpu_to_sched_priority that
>> easily, and most of all they shouldn't silently have their ioctls
>> succeed. Otherwise, we limit our ability to evolve the interface.
>>
>
> The checking is intended to happen in amdgpu_to_sched priority. The
> check is non-fatal in case a usermode program used to have _pad filled
> with garbage.
>
> I agree silently succeeding here is non ideal. My stab at providing some
> feedback is the WARN, so that someone may at least notice that their
> program is faulty.
>
> The solution I have isn't perfect for backwards compat either. If the
> faulty app used to have 1024 as their _pad, then they would fail with
> this patch, but work correctly without it. I think that case is
> statistically unlikely, so we should be okay *fingers crossed*. Note
> that libdrm, which accounts for most clients of the amdgpu ioctl
> interface has always memsetted this field to 0. And that accounts for a
> majority of the clients of this interface.
>
> If there are any suggestions for a better approach to deal with this
> situation don't hesitate to let me know. I honestly don't like silently
> changing usermode's intentions, but I also don't want to get an angry
> email saying "RULE#1: WE DON'T BREAK USERSPACE!".

Oh wow, I didn't realize there was no checking of _pad. Looks like 
there's no check on flags either? That sucks :/

I agree, there's just no really clean solution to this without adding a 
new ioctl, so I think your approach is fine.

Reviewed-by: Nicolai HÃ¤hnle <nicolai.haehnle at amd.com>

> Thanks for the feedback,
> Andres
>
>> Cheers,
>> Nicolai
>>
>>
>>>
>>>      switch (args->in.op) {
>>>      case AMDGPU_CTX_OP_ALLOC_CTX:
>>> -        r = amdgpu_ctx_alloc(adev, fpriv, &id);
>>> +        r = amdgpu_ctx_alloc(adev, fpriv, priority, &id);
>>>          args->out.alloc.ctx_id = id;
>>>          break;
>>>      case AMDGPU_CTX_OP_FREE_CTX:
>>> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>>> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>>> index 8cb41d3..613e682 100644
>>> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>>> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>>> @@ -109,7 +109,9 @@ struct amd_sched_backend_ops {
>>>
>>>  enum amd_sched_priority {
>>>      AMD_SCHED_PRIORITY_MIN,
>>> -    AMD_SCHED_PRIORITY_NORMAL = AMD_SCHED_PRIORITY_MIN,
>>> +    AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN,
>>> +    AMD_SCHED_PRIORITY_NORMAL,
>>> +    AMD_SCHED_PRIORITY_HIGH,
>>>      AMD_SCHED_PRIORITY_KERNEL,
>>>      AMD_SCHED_PRIORITY_MAX
>>>  };
>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h
>>> index d76d525..6a2d97c 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -162,13 +162,19 @@ union drm_amdgpu_bo_list {
>>>  /* unknown cause */
>>>  #define AMDGPU_CTX_UNKNOWN_RESET    3
>>>
>>> +/* Context priority level */
>>> +#define AMDGPU_CTX_PRIORITY_LOW         -1023
>>> +#define AMDGPU_CTX_PRIORITY_NORMAL      0
>>> +/* Selecting a priority above NORMAL requires CAP_SYS_ADMIN */
>>> +#define AMDGPU_CTX_PRIORITY_HIGH        1023
>>> +
>>>  struct drm_amdgpu_ctx_in {
>>>      /** AMDGPU_CTX_OP_* */
>>>      __u32    op;
>>>      /** For future use, no flags defined so far */
>>>      __u32    flags;
>>>      __u32    ctx_id;
>>> -    __u32    _pad;
>>> +    __s32    priority;
>>>  };
>>>
>>>  union drm_amdgpu_ctx_out {
>>>
>>
>>

-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.