Re: [PATCH] drm/i915/gt: Protect signaler walk with RCU

Matthew Auld <matthew.william.auld@xxxxxxxxx> · Wed, 19 Feb 2020 19:02:36 +0000

On Tue, 18 Feb 2020 at 20:23, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote:
>
> While we know that the waiters cannot disappear as we walk our list
> (only that they might be added), the same cannot be said for our
> signalers as they may be completed by the HW and retired as we process
> this request. Ergo we need to use rcu to protect the list iteration and
> remember to mark up the list_del_rcu.
>
> Fixes: 793c22617367 ("drm/i915/gt: Protect execlists_hold/unhold from new waiters")
> Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests")
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c   | 16 ++++++++++------
>  drivers/gpu/drm/i915/i915_scheduler.c |  4 ++--
>  2 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index ba31cbe8c68e..47561dc29304 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1668,9 +1668,9 @@ last_active(const struct intel_engine_execlists *execlists)
>                                      wait_link)
>
>  #define for_each_signaler(p__, rq__) \
> -       list_for_each_entry_lockless(p__, \
> -                                    &(rq__)->sched.signalers_list, \
> -                                    signal_link)
> +       list_for_each_entry_rcu(p__, \
> +                               &(rq__)->sched.signalers_list, \
> +                               signal_link)
>
>  static void defer_request(struct i915_request *rq, struct list_head * const pl)
>  {
> @@ -2533,11 +2533,13 @@ static bool execlists_hold(struct intel_engine_cs *engine,
>  static bool hold_request(const struct i915_request *rq)
>  {
>         struct i915_dependency *p;
> +       bool result = false;
>
>         /*
>          * If one of our ancestors is on hold, we must also be on hold,
>          * otherwise we will bypass it and execute before it.
>          */
> +       rcu_read_lock();
>         for_each_signaler(p, rq) {
>                 const struct i915_request *s =
>                         container_of(p->signaler, typeof(*s), sched);
> @@ -2545,11 +2547,13 @@ static bool hold_request(const struct i915_request *rq)
>                 if (s->engine != rq->engine)
>                         continue;
>
> -               if (i915_request_on_hold(s))
> -                       return true;
> +               result = i915_request_on_hold(s);
> +               if (result)
> +                       break;
>         }
> +       rcu_read_unlock();
>
> -       return false;
> +       return result;
>  }
>
>  static void __execlists_unhold(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index e19a37a83397..4e48757e9de6 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -486,7 +486,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>         list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
>                 GEM_BUG_ON(!list_empty(&dep->dfs_link));
>
> -               list_del(&dep->wait_link);
> +               list_del_rcu(&dep->wait_link);
>                 if (dep->flags & I915_DEPENDENCY_ALLOC)
>                         i915_dependency_free(dep);
>         }
> @@ -497,7 +497,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>                 GEM_BUG_ON(dep->signaler != node);
>                 GEM_BUG_ON(!list_empty(&dep->dfs_link));
>
> -               list_del(&dep->signal_link);
> +               list_del_rcu(&dep->signal_link);
>                 if (dep->flags & I915_DEPENDENCY_ALLOC)
>                         i915_dependency_free(dep);

Is this not a potential uaf? Do we not have to wait for the grace
period before doing the free, or what?

>         }
> --
> 2.25.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx