Re: [PATCH] drm/ttm: Remove set_need_resched from the ttm fault handler

Jakob Bornecrantz <wallbraker@xxxxxxxxx> · Mon, 18 Nov 2013 16:07:49 +0100



On Thu, Nov 14, 2013 at 7:49 PM, Thomas Hellstrom <thellstrom@xxxxxxxxxx> wrote:
> Addresses
> "[BUG] completely bonkers use of set_need_resched + VM_FAULT_NOPAGE".
>
> In the first occurence it was used to try to be nice while releasing the
> mmap_sem and retrying the fault to work around a locking inversion.
> The second occurence was never used.
>
> There has been some discussion whether we should change the locking order to
> mmap_sem -> bo_reserve. This patch doesn't address that issue, and leaves
> that locking order undefined. The solution that we release the mmap_sem if
> tryreserve fails and wait for the buffer to become unreserved is something
> we want in any case, and follows how the core vm system waits for pages
> to be come unlocked while releasing the mmap_sem.
>
> The code also outlines what needs to be changed if we want to establish the
> locking order as mmap_sem -> bo::reserve.
>
> One slight issue that remains with this code is that the fault handler might
> be prone to starvation if another thread countinously reserves the buffer.
> IMO that usage pattern is highly unlikely.
>
> Signed-off-by: Thomas Hellstrom <thellstrom@xxxxxxxxxx>

Reviewed-by: Jakob Bornecrantz <jakob@xxxxxxxxxx>

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c    |   35 ++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/ttm/ttm_bo_vm.c |   26 ++++++++++++++++++++------
>  include/drm/ttm/ttm_bo_api.h    |    4 +++-
>  3 files changed, 57 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8d5a646..07e02c4 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -151,7 +151,7 @@ static void ttm_bo_release_list(struct kref *list_kref)
>         atomic_dec(&bo->glob->bo_count);
>         if (bo->resv == &bo->ttm_resv)
>                 reservation_object_fini(&bo->ttm_resv);
> -
> +       mutex_destroy(&bo->wu_mutex);
>         if (bo->destroy)
>                 bo->destroy(bo);
>         else {
> @@ -1123,6 +1123,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
>         INIT_LIST_HEAD(&bo->ddestroy);
>         INIT_LIST_HEAD(&bo->swap);
>         INIT_LIST_HEAD(&bo->io_reserve_lru);
> +       mutex_init(&bo->wu_mutex);
>         bo->bdev = bdev;
>         bo->glob = bdev->glob;
>         bo->type = type;
> @@ -1704,3 +1705,35 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>                 ;
>  }
>  EXPORT_SYMBOL(ttm_bo_swapout_all);
> +
> +/**
> + * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> + * unreserved
> + *
> + * @bo: Pointer to buffer
> + */
> +int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> +{
> +       int ret;
> +
> +       /*
> +        * In the absense of a wait_unlocked API,
> +        * Use the bo::wu_mutex to avoid triggering livelocks due to
> +        * concurrent use of this function. Note that this use of
> +        * bo::wu_mutex can go away if we change locking order to
> +        * mmap_sem -> bo::reserve.
> +        */
> +       ret = mutex_lock_interruptible(&bo->wu_mutex);
> +       if (unlikely(ret != 0))
> +               return -ERESTARTSYS;
> +       if (!ww_mutex_is_locked(&bo->resv->lock))
> +               goto out_unlock;
> +       ret = ttm_bo_reserve_nolru(bo, true, false, false, NULL);
> +       if (unlikely(ret != 0))
> +               goto out_unlock;
> +       ww_mutex_unlock(&bo->resv->lock);
> +
> +out_unlock:
> +       mutex_unlock(&bo->wu_mutex);
> +       return ret;
> +}
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index ac617f3..b249ab9 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -107,13 +107,28 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>         /*
>          * Work around locking order reversal in fault / nopfn
>          * between mmap_sem and bo_reserve: Perform a trylock operation
> -        * for reserve, and if it fails, retry the fault after scheduling.
> +        * for reserve, and if it fails, retry the fault after waiting
> +        * for the buffer to become unreserved.
>          */
> -
> -       ret = ttm_bo_reserve(bo, true, true, false, 0);
> +       ret = ttm_bo_reserve(bo, true, true, false, NULL);
>         if (unlikely(ret != 0)) {
> -               if (ret == -EBUSY)
> -                       set_need_resched();
> +               if (ret != -EBUSY)
> +                       return VM_FAULT_NOPAGE;
> +
> +               if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> +                       if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> +                               up_read(&vma->vm_mm->mmap_sem);
> +                               (void) ttm_bo_wait_unreserved(bo);
> +                       }
> +
> +                       return VM_FAULT_RETRY;
> +               }
> +
> +               /*
> +                * If we'd want to change locking order to
> +                * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> +                * instead of retrying the fault...
> +                */
>                 return VM_FAULT_NOPAGE;
>         }
>
> @@ -123,7 +138,6 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>                 case 0:
>                         break;
>                 case -EBUSY:
> -                       set_need_resched();
>                 case -ERESTARTSYS:
>                         retval = VM_FAULT_NOPAGE;
>                         goto out_unlock;
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 751eaff..ee127ec 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -169,6 +169,7 @@ struct ttm_tt;
>   * @offset: The current GPU offset, which can have different meanings
>   * depending on the memory type. For SYSTEM type memory, it should be 0.
>   * @cur_placement: Hint of current placement.
> + * @wu_mutex: Wait unreserved mutex.
>   *
>   * Base class for TTM buffer object, that deals with data placement and CPU
>   * mappings. GPU mappings are really up to the driver, but for simpler GPUs
> @@ -250,6 +251,7 @@ struct ttm_buffer_object {
>
>         struct reservation_object *resv;
>         struct reservation_object ttm_resv;
> +       struct mutex wu_mutex;
>  };
>
>  /**
> @@ -702,5 +704,5 @@ extern ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>                          size_t count, loff_t *f_pos, bool write);
>
>  extern void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> -
> +extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>  #endif
> --
> 1.7.10.4
> _______________________________________________
> dri-devel mailing list
> dri-devel@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel