[PATCH] drm/amdgpu: fix amdgpu_amdkfd_remove_eviction_fence

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[+Harish]

I think this looks good for the most part. See one comment inline below.

But bear with me while I'm trying to understand what was wrong with the
old code. Please correct me if I get this wrong or point out anything
I'm missing.

The reservation_object_list looks to be protected by a combination of
three mechanism:

  * Holding the reservation object
  * RCU
  * seqcount

Updating the fence list requires holding the reservation object. But
there are some readers that can be called without holding that lock
(reservation_object_copy_fences, reservation_object_get_fences_rcu,
reservation_object_wait_timeout_rcu,
reservation_object_test_signaled_rcu). They rely on RCU to work on a
copy and seqcount to make sure they had the most up-to-date information.
So any function updating the fence lists needs to do RCU and seqcount
correctly to prevent breaking those readers.

As I understand it, RCU with seqcount retry just means that readers will
spin retrying while there are writers, and writers are never blocked by
readers. Writers are blocked by each other, because they need to hold
the reservation.

The code you added looks a lot like
reservation_object_add_shared_replace, which removes fences that have
signalled, and atomically replaces obj->fences with a new
reservation_fence_list. That atomicity is important because each pointer
in the obj->fences->shared array is separately protected by RCU, but not
the array as a whole or the shared_count.

See one comment inline.

Regards,
  Felix

On 2018-08-14 03:39 AM, Christian König wrote:
> Fix quite a number of bugs here. Unfortunately only compile tested.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 103 ++++++++++-------------
>  1 file changed, 46 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index fa38a960ce00..dece31516dc4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -206,11 +206,9 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>  					struct amdgpu_amdkfd_fence ***ef_list,
>  					unsigned int *ef_count)
>  {
> -	struct reservation_object_list *fobj;
> -	struct reservation_object *resv;
> -	unsigned int i = 0, j = 0, k = 0, shared_count;
> -	unsigned int count = 0;
> -	struct amdgpu_amdkfd_fence **fence_list;
> +	struct reservation_object *resv = bo->tbo.resv;
> +	struct reservation_object_list *old, *new;
> +	unsigned int i, j, k;
>  
>  	if (!ef && !ef_list)
>  		return -EINVAL;
> @@ -220,76 +218,67 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>  		*ef_count = 0;
>  	}
>  
> -	resv = bo->tbo.resv;
> -	fobj = reservation_object_get_list(resv);
> -
> -	if (!fobj)
> +	old = reservation_object_get_list(resv);
> +	if (!old)
>  		return 0;
>  
> -	preempt_disable();
> -	write_seqcount_begin(&resv->seq);
> +	new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]),
> +		      GFP_KERNEL);
> +	if (!new)
> +		return -ENOMEM;
>  
> -	/* Go through all the shared fences in the resevation object. If
> -	 * ef is specified and it exists in the list, remove it and reduce the
> -	 * count. If ef is not specified, then get the count of eviction fences
> -	 * present.
> +	/* Go through all the shared fences in the resevation object and sort
> +	 * the interesting ones to the end of the list.
>  	 */
> -	shared_count = fobj->shared_count;
> -	for (i = 0; i < shared_count; ++i) {
> +	for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
>  		struct dma_fence *f;
>  
> -		f = rcu_dereference_protected(fobj->shared[i],
> +		f = rcu_dereference_protected(old->shared[i],
>  					      reservation_object_held(resv));
>  
> -		if (ef) {
> -			if (f->context == ef->base.context) {
> -				dma_fence_put(f);
> -				fobj->shared_count--;
> -			} else {
> -				RCU_INIT_POINTER(fobj->shared[j++], f);
> -			}
> -		} else if (to_amdgpu_amdkfd_fence(f))
> -			count++;
> +		if ((ef && f->context == ef->base.context) ||
> +		    (!ef && to_amdgpu_amdkfd_fence(f)))
> +			RCU_INIT_POINTER(new->shared[--j], f);
> +		else
> +			RCU_INIT_POINTER(new->shared[k++], f);
>  	}
> -	write_seqcount_end(&resv->seq);
> -	preempt_enable();
> +	new->shared_max = old->shared_max;
> +	new->shared_count = k;
>  
> -	if (ef || !count)
> -		return 0;
> +	if (!ef) {
> +		unsigned int count = old->shared_count - j;
>  
> -	/* Alloc memory for count number of eviction fence pointers. Fill the
> -	 * ef_list array and ef_count
> -	 */
> -	fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
> -			     GFP_KERNEL);
> -	if (!fence_list)
> -		return -ENOMEM;
> +		/* Alloc memory for count number of eviction fence pointers. Fill the
> +		 * ef_list array and ef_count
> +		 */
> +		*ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
> +		*ef_count = count;
>  
> +		if (!*ef_list) {
> +			kfree(new);
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	/* Install the new fence list, seqcount provides the barriers */
> +	preempt_disable();
> +	write_seqcount_begin(&resv->seq);
> +	RCU_INIT_POINTER(resv->fence, new);
>  	preempt_disable();
>  	write_seqcount_begin(&resv->seq);

You're disabling preemption and calling write_seqcount_begin twice. I
think this is meant to be

	write_seqcount_end(&resv->seq);
	preempt_enable();


>  
> -	j = 0;
> -	for (i = 0; i < shared_count; ++i) {
> +	/* Drop the references to the removed fences or move them to ef_list */
> +	for (i = j, k = 0; i < old->shared_count; ++i) {
>  		struct dma_fence *f;
> -		struct amdgpu_amdkfd_fence *efence;
>  
> -		f = rcu_dereference_protected(fobj->shared[i],
> -			reservation_object_held(resv));
> -
> -		efence = to_amdgpu_amdkfd_fence(f);
> -		if (efence) {
> -			fence_list[k++] = efence;
> -			fobj->shared_count--;
> -		} else {
> -			RCU_INIT_POINTER(fobj->shared[j++], f);
> -		}
> +		f = rcu_dereference_protected(new->shared[i],
> +					      reservation_object_held(resv));
> +		if (!ef)
> +			(*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
> +		else
> +			dma_fence_put(f);
>  	}
> -
> -	write_seqcount_end(&resv->seq);
> -	preempt_enable();
> -
> -	*ef_list = fence_list;
> -	*ef_count = k;
> +	kfree_rcu(old, rcu);
>  
>  	return 0;
>  }



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux