Re: [PATCH 3/4] drm/xe: Fix fault mode invalidation with unbind

Matthew Brost <matthew.brost@xxxxxxxxx> · Wed, 26 Feb 2025 23:03:13 -0800



On Wed, Feb 26, 2025 at 04:33:43PM +0100, Thomas Hellström wrote:
> Fix fault mode invalidation racing with unbind leading to the
> PTE zapping potentially traversing an invalid page-table tree.
> Do this by holding the notifier lock across PTE zapping. This
> might transfer any contention waiting on the notifier seqlock
> read side to the notifier lock read side, but that shouldn't be
> a major problem.
> 
> At the same time get rid of the open-coded invalidation in the bind
> code by relying on the notifier even when the vma bind is not
> yet committed.
> 
> Finally let userptr invalidation call a dedicated xe_vm function
> performing a full invalidation.
> 
> Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job")
> Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
> Cc: Matthew Brost <matthew.brost@xxxxxxxxx>

Reviewed-by: Matthew Brost <matthew.brost@xxxxxxxxx>

> Cc: Matthew Auld <matthew.auld@xxxxxxxxx>
> Cc: <stable@xxxxxxxxxxxxxxx> # v6.12+
> Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/xe/xe_pt.c       | 38 ++++------------
>  drivers/gpu/drm/xe/xe_vm.c       | 78 ++++++++++++++++++++------------
>  drivers/gpu/drm/xe/xe_vm.h       |  8 ++++
>  drivers/gpu/drm/xe/xe_vm_types.h |  4 +-
>  4 files changed, 68 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 1ddcc7e79a93..12a627a23eb4 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1213,42 +1213,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
>  		return 0;
>  
>  	uvma = to_userptr_vma(vma);
> -	notifier_seq = uvma->userptr.notifier_seq;
> +	if (xe_pt_userptr_inject_eagain(uvma))
> +		xe_vma_userptr_force_invalidate(uvma);
>  
> -	if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm))
> -		return 0;
> +	notifier_seq = uvma->userptr.notifier_seq;
>  
>  	if (!mmu_interval_read_retry(&uvma->userptr.notifier,
> -				     notifier_seq) &&
> -	    !xe_pt_userptr_inject_eagain(uvma))
> +				     notifier_seq))
>  		return 0;
>  
> -	if (xe_vm_in_fault_mode(vm)) {
> +	if (xe_vm_in_fault_mode(vm))
>  		return -EAGAIN;
> -	} else {
> -		spin_lock(&vm->userptr.invalidated_lock);
> -		list_move_tail(&uvma->userptr.invalidate_link,
> -			       &vm->userptr.invalidated);
> -		spin_unlock(&vm->userptr.invalidated_lock);
> -
> -		if (xe_vm_in_preempt_fence_mode(vm)) {
> -			struct dma_resv_iter cursor;
> -			struct dma_fence *fence;
> -			long err;
> -
> -			dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
> -					    DMA_RESV_USAGE_BOOKKEEP);
> -			dma_resv_for_each_fence_unlocked(&cursor, fence)
> -				dma_fence_enable_sw_signaling(fence);
> -			dma_resv_iter_end(&cursor);
> -
> -			err = dma_resv_wait_timeout(xe_vm_resv(vm),
> -						    DMA_RESV_USAGE_BOOKKEEP,
> -						    false, MAX_SCHEDULE_TIMEOUT);
> -			XE_WARN_ON(err <= 0);
> -		}
> -	}
>  
> +	/*
> +	 * Just continue the operation since exec or rebind worker
> +	 * will take care of rebinding.
> +	 */
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 4c1ca47667ad..37d773c0b729 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -580,51 +580,26 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  	trace_xe_vm_rebind_worker_exit(vm);
>  }
>  
> -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
> -				   const struct mmu_notifier_range *range,
> -				   unsigned long cur_seq)
> +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
>  {
> -	struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
> -	struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
> +	struct xe_userptr *userptr = &uvma->userptr;
>  	struct xe_vma *vma = &uvma->vma;
> -	struct xe_vm *vm = xe_vma_vm(vma);
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  	long err;
>  
> -	xe_assert(vm->xe, xe_vma_is_userptr(vma));
> -	trace_xe_vma_userptr_invalidate(vma);
> -
> -	if (!mmu_notifier_range_blockable(range))
> -		return false;
> -
> -	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> -	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
> -		xe_vma_start(vma), xe_vma_size(vma));
> -
> -	down_write(&vm->userptr.notifier_lock);
> -	mmu_interval_set_seq(mni, cur_seq);
> -
> -	/* No need to stop gpu access if the userptr is not yet bound. */
> -	if (!userptr->initial_bind) {
> -		up_write(&vm->userptr.notifier_lock);
> -		return true;
> -	}
> -
>  	/*
>  	 * Tell exec and rebind worker they need to repin and rebind this
>  	 * userptr.
>  	 */
>  	if (!xe_vm_in_fault_mode(vm) &&
> -	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
> +	    !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
>  		spin_lock(&vm->userptr.invalidated_lock);
>  		list_move_tail(&userptr->invalidate_link,
>  			       &vm->userptr.invalidated);
>  		spin_unlock(&vm->userptr.invalidated_lock);
>  	}
>  
> -	up_write(&vm->userptr.notifier_lock);
> -
>  	/*
>  	 * Preempt fences turn into schedule disables, pipeline these.
>  	 * Note that even in fault mode, we need to wait for binds and
> @@ -642,11 +617,35 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
>  				    false, MAX_SCHEDULE_TIMEOUT);
>  	XE_WARN_ON(err <= 0);
>  
> -	if (xe_vm_in_fault_mode(vm)) {
> +	if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
>  		err = xe_vm_invalidate_vma(vma);
>  		XE_WARN_ON(err);
>  	}
> +}
> +
> +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
> +				   const struct mmu_notifier_range *range,
> +				   unsigned long cur_seq)
> +{
> +	struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
> +	struct xe_vma *vma = &uvma->vma;
> +	struct xe_vm *vm = xe_vma_vm(vma);
> +
> +	xe_assert(vm->xe, xe_vma_is_userptr(vma));
> +	trace_xe_vma_userptr_invalidate(vma);
> +
> +	if (!mmu_notifier_range_blockable(range))
> +		return false;
>  
> +	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> +	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
> +		xe_vma_start(vma), xe_vma_size(vma));
> +
> +	down_write(&vm->userptr.notifier_lock);
> +	mmu_interval_set_seq(mni, cur_seq);
> +
> +	__vma_userptr_invalidate(vm, uvma);
> +	up_write(&vm->userptr.notifier_lock);
>  	trace_xe_vma_userptr_invalidate_complete(vma);
>  
>  	return true;
> @@ -656,6 +655,27 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
>  	.invalidate = vma_userptr_invalidate,
>  };
>  
> +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
> +/**
> + * xe_vma_userptr_force_invalidate() - force invalidate a userptr
> + * @uvma: The userptr vma to invalidate
> + *
> + * Perform a forced userptr invalidation for testing purposes.
> + */
> +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
> +{
> +	struct xe_vm *vm = xe_vma_vm(&uvma->vma);
> +
> +	lockdep_assert_held_write(&vm->lock);
> +	lockdep_assert_held(&vm->userptr.notifier_lock);
> +
> +	if (!mmu_interval_read_retry(&uvma->userptr.notifier,
> +				     uvma->userptr.notifier_seq))
> +		uvma->userptr.notifier_seq -= 2;
> +	__vma_userptr_invalidate(vm, uvma);
> +}
> +#endif
> +
>  int xe_vm_userptr_pin(struct xe_vm *vm)
>  {
>  	struct xe_userptr_vma *uvma, *next;
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 7c8e39049223..f5d835271350 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -287,4 +287,12 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm);
>  void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
>  void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
>  void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
> +
> +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
> +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
> +#else
> +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
> +{
> +}
> +#endif
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 52467b9b5348..1fe79bf23b6b 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -228,8 +228,8 @@ struct xe_vm {
>  		 * up for revalidation. Protected from access with the
>  		 * @invalidated_lock. Removing items from the list
>  		 * additionally requires @lock in write mode, and adding
> -		 * items to the list requires the @userptr.notifer_lock in
> -		 * write mode.
> +		 * items to the list requires either the @userptr.notifer_lock in
> +		 * write mode, OR @lock in write mode.
>  		 */
>  		struct list_head invalidated;
>  	} userptr;
> -- 
> 2.48.1
>