Re: [PATCH 1/2] drm/nouveau: don't fini scheduler if not initialized

Dave Airlie <airlied@xxxxxxxxx> · Sat, 10 Feb 2024 05:39:11 +1000

On Fri, 2 Feb 2024 at 10:06, Danilo Krummrich <dakr@xxxxxxxxxx> wrote:
>
> nouveau_abi16_ioctl_channel_alloc() and nouveau_cli_init() simply call
> their corresponding *_fini() counterpart. This can lead to
> nouveau_sched_fini() being called without struct nouveau_sched ever
> being initialized in the first place.
>
> Instead of embedding struct nouveau_sched into struct nouveau_cli and
> struct nouveau_chan_abi16, allocate struct nouveau_sched separately,
> such that we can check for the corresponding pointer to be NULL in the
> particular *_fini() functions.
>
> It makes sense to allocate struct nouveau_sched separately anyway, since
> in a subsequent commit we can also avoid to allocate a struct
> nouveau_sched in nouveau_abi16_ioctl_channel_alloc() at all, if the
> VM_BIND uAPI has been disabled due to the legacy uAPI being used.

Looks good,

for the series
Reviewed-by: Dave Airlie <airlied@xxxxxxxxxx>

>
> Fixes: 5f03a507b29e ("drm/nouveau: implement 1:1 scheduler - entity relationship")
> Reported-by: Timur Tabi <ttabi@xxxxxxxxxx>
> Closes: https://lore.kernel.org/nouveau/20240131213917.1545604-1-ttabi@xxxxxxxxxx/
> Signed-off-by: Danilo Krummrich <dakr@xxxxxxxxxx>
> ---
>  drivers/gpu/drm/nouveau/nouveau_abi16.c | 10 ++++---
>  drivers/gpu/drm/nouveau/nouveau_abi16.h |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_drm.c   |  7 +++--
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_exec.c  |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_sched.c | 38 +++++++++++++++++++++++--
>  drivers/gpu/drm/nouveau/nouveau_sched.h |  6 ++--
>  drivers/gpu/drm/nouveau/nouveau_uvmm.c  |  2 +-
>  8 files changed, 53 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
> index a04156ca8390..ca4b5ab3e59e 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
> @@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
>         struct nouveau_abi16_ntfy *ntfy, *temp;
>
>         /* Cancel all jobs from the entity's queue. */
> -       drm_sched_entity_fini(&chan->sched.entity);
> +       if (chan->sched)
> +               drm_sched_entity_fini(&chan->sched->entity);
>
>         if (chan->chan)
>                 nouveau_channel_idle(chan->chan);
>
> -       nouveau_sched_fini(&chan->sched);
> +       if (chan->sched)
> +               nouveau_sched_destroy(&chan->sched);
>
>         /* cleanup notifier state */
>         list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
> @@ -337,8 +339,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
>         if (ret)
>                 goto done;
>
> -       ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
> -                                chan->chan->dma.ib_max);
> +       ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
> +                                  chan->chan->dma.ib_max);
>         if (ret)
>                 goto done;
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
> index 1f5e243c0c75..11c8c4a80079 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
> @@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
>         struct nouveau_bo *ntfy;
>         struct nouveau_vma *ntfy_vma;
>         struct nvkm_mm  heap;
> -       struct nouveau_sched sched;
> +       struct nouveau_sched *sched;
>  };
>
>  struct nouveau_abi16 {
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 6f6c31a9937b..a947e1d5f309 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
>         WARN_ON(!list_empty(&cli->worker));
>
>         usif_client_fini(cli);
> -       nouveau_sched_fini(&cli->sched);
> +       if (cli->sched)
> +               nouveau_sched_destroy(&cli->sched);
>         if (uvmm)
>                 nouveau_uvmm_fini(uvmm);
>         nouveau_vmm_fini(&cli->svm);
> @@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
>         cli->mem = &mems[ret];
>
>         /* Don't pass in the (shared) sched_wq in order to let
> -        * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
> +        * nouveau_sched_create() create a dedicated one for VM_BIND jobs.
>          *
>          * This is required to ensure that for VM_BIND jobs free_job() work and
>          * run_job() work can always run concurrently and hence, free_job() work
> @@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
>          * locks which indirectly or directly are held for allocations
>          * elsewhere.
>          */
> -       ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
> +       ret = nouveau_sched_create(&cli->sched, drm, NULL, 1);
>         if (ret)
>                 goto done;
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index 8a6d94c8b163..e239c6bf4afa 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -98,7 +98,7 @@ struct nouveau_cli {
>                 bool disabled;
>         } uvmm;
>
> -       struct nouveau_sched sched;
> +       struct nouveau_sched *sched;
>
>         const struct nvif_mclass *mem;
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
> index bc5d71b79ab2..e65c0ef23bc7 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_exec.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
> @@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
>         if (ret)
>                 goto out;
>
> -       args.sched = &chan16->sched;
> +       args.sched = chan16->sched;
>         args.file_priv = file_priv;
>         args.chan = chan;
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
> index dd98f6910f9c..32fa2e273965 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> @@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = {
>         .free_job = nouveau_sched_free_job,
>  };
>
> -int
> +static int
>  nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
>                    struct workqueue_struct *wq, u32 credit_limit)
>  {
> @@ -453,7 +453,30 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
>         return ret;
>  }
>
> -void
> +int
> +nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
> +                    struct workqueue_struct *wq, u32 credit_limit)
> +{
> +       struct nouveau_sched *sched;
> +       int ret;
> +
> +       sched = kzalloc(sizeof(*sched), GFP_KERNEL);
> +       if (!sched)
> +               return -ENOMEM;
> +
> +       ret = nouveau_sched_init(sched, drm, wq, credit_limit);
> +       if (ret) {
> +               kfree(sched);
> +               return ret;
> +       }
> +
> +       *psched = sched;
> +
> +       return 0;
> +}
> +
> +
> +static void
>  nouveau_sched_fini(struct nouveau_sched *sched)
>  {
>         struct drm_gpu_scheduler *drm_sched = &sched->base;
> @@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched)
>         if (sched->wq)
>                 destroy_workqueue(sched->wq);
>  }
> +
> +void
> +nouveau_sched_destroy(struct nouveau_sched **psched)
> +{
> +       struct nouveau_sched *sched = *psched;
> +
> +       nouveau_sched_fini(sched);
> +       kfree(sched);
> +
> +       *psched = NULL;
> +}
> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
> index a6528f5981e6..e1f01a23e6f6 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_sched.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
> @@ -111,8 +111,8 @@ struct nouveau_sched {
>         } job;
>  };
>
> -int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
> -                      struct workqueue_struct *wq, u32 credit_limit);
> -void nouveau_sched_fini(struct nouveau_sched *sched);
> +int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
> +                        struct workqueue_struct *wq, u32 credit_limit);
> +void nouveau_sched_destroy(struct nouveau_sched **psched);
>
>  #endif
> diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> index 4f223c972c6a..0a0a11dc9ec0 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
> @@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
>         if (ret)
>                 return ret;
>
> -       args.sched = &cli->sched;
> +       args.sched = cli->sched;
>         args.file_priv = file_priv;
>
>         ret = nouveau_uvmm_vm_bind(&args);
>
> base-commit: 041261ac4c365e03b07427569d6735f8adfd21c8
> --
> 2.43.0
>