[PATCH 5/6] drm/amdgpu: use fence-array for ctx release

david1.zhou@xxxxxxx (zhoucm1) · Mon, 22 Aug 2016 10:24:35 +0800



On 2016å¹´08æ??21æ?¥ 14:42, Edward O'Callaghan wrote:
>
> On 08/18/2016 05:50 PM, Chunming Zhou wrote:
>> benifits:
>> 1. don't block userspace release at all.
>> 2. make sure userspace can look up dependency if fence isn't signaled.
>> If they cannot find ctx, that means the dependecy is signaled.
>>
>> Change-Id: I9184a7bb4f5bb6858c2dd49cfb113eeee159cf71
>> Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  3 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 51 ++++++++++++++++++++++++++++++++-
>>   2 files changed, 53 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 6d770c2..b6320e8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -35,6 +35,7 @@
>>   #include <linux/interval_tree.h>
>>   #include <linux/hashtable.h>
>>   #include <linux/fence.h>
>> +#include <linux/fence-array.h>
>>   
>>   #include <ttm/ttm_bo_api.h>
>>   #include <ttm/ttm_bo_driver.h>
>> @@ -1025,6 +1026,8 @@ struct amdgpu_ctx_ring {
>>   
>>   struct amdgpu_ctx {
>>   	struct kref		refcount;
>> +	struct fence_cb         cb;
>> +	struct work_struct      release_work;
>>   	struct amdgpu_device    *adev;
>>   	unsigned		reset_counter;
>>   	spinlock_t		ring_lock;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 01d5612..23afe92 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -27,6 +27,7 @@
>>   
>>   static DEFINE_MUTEX(amdgpu_ctx_lock);
>>   extern struct idr amdgpu_ctx_idr;
>> +static void amdgpu_ctx_release_work(struct work_struct *work);
>>   
>>   static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
>>   {
>> @@ -37,6 +38,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
>>   	ctx->adev = adev;
>>   	kref_init(&ctx->refcount);
>>   	spin_lock_init(&ctx->ring_lock);
>> +	INIT_WORK(&ctx->release_work, amdgpu_ctx_release_work);
>>   	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
>>   			      sizeof(struct fence*), GFP_KERNEL);
>>   	if (!ctx->fences)
>> @@ -120,13 +122,60 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>>   	return r;
>>   }
>>   
>> +static void amdgpu_ctx_release_work(struct work_struct *work)
>> +{
>> +	struct amdgpu_ctx *ctx = container_of(work, struct amdgpu_ctx,
>> +					      release_work);
>> +	amdgpu_ctx_fini(ctx);
>> +}
>> +
>> +static void amdgpu_ctx_release_cb(struct fence *f, struct fence_cb *cb)
>> +{
>> +	struct amdgpu_ctx *ctx = container_of(cb, struct amdgpu_ctx,
>> +					      cb);
>> +	schedule_work(&ctx->release_work);
>> +	fence_put(f);
>> +}
>> +
>>   static void amdgpu_ctx_do_release(struct kref *ref)
>>   {
>>   	struct amdgpu_ctx *ctx;
>> +	struct fence **fences;
>> +	struct fence_array *array;
>> +	int i, j, k = 0, r;
>>   
>>   	ctx = container_of(ref, struct amdgpu_ctx, refcount);
>>   
>> -	amdgpu_ctx_fini(ctx);
>> +	fences = kmalloc_array(sizeof(void *), AMDGPU_MAX_RINGS *
>> +			       amdgpu_sched_jobs,
>> +			       GFP_KERNEL);
>> +	if (!fences)
>> +		return;
>> +	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>> +		for (j = 0; j < amdgpu_sched_jobs; ++j) {
>> +			if (ctx->rings[i].fences[j])
>> +				fences[k++] = fence_get(ctx->rings[i].fences[j]);
>> +		}
>> +	}
>> +	if (k == 0) {
>> +		amdgpu_ctx_release_cb(NULL, &ctx->cb);
>> +		kfree(fences);
>> +		return;
>> +	}
>> +
>> +	array = fence_array_create(k, fences, fence_context_alloc(1),
>> +				   1, false);
>> +	if (!array) {
>> +		for (j = 0; j < k; ++j)
>> +			fence_put(fences[j]);
>> +		kfree(fences);
>> +		return;
>> +	}
>> +	r = fence_add_callback(&array->base, &ctx->cb, amdgpu_ctx_release_cb);
>> +	if (r == -ENOENT)
>> +		amdgpu_ctx_release_cb(&array->base, &ctx->cb);
>> +	else if (r)
> Could be wrong but should this be (r < 0) ?
I don't think so, it's different with fence_wait_timeout, which returns 
the remaining jiffies.
But this one will only return 0 or negative value.

Regards,
David Zhou
>
> Kind Regards,
> Edward.
>
>> +		DRM_ERROR("fence add callback failed (%d)\n",  r);
>>   }
>>   
>>   static int amdgpu_ctx_free(uint32_t id)
>>