Re: [PATCH 2/2] drm/amdgpu: allocate entities on demand

Luben Tuikov <luben.tuikov@xxxxxxx> · Tue, 21 Jan 2020 19:07:03 -0500

On 2020-01-21 11:50 a.m., Nirmoy Das wrote:
> Currently we pre-allocate entities and fences for all the HW IPs on
> context creation and some of which are might never be used.
> 
> This patch tries to resolve entity/fences wastage by creating entities
> for a HW IP only when it is required.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 163 +++++++++++++-----------
>  1 file changed, 92 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 91638a2a5163..43f1266b1b2e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -58,64 +58,37 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
>  	return -EACCES;
>  }
>  
> -static int amdgpu_ctx_init(struct amdgpu_device *adev,
> -			   enum drm_sched_priority priority,
> -			   struct drm_file *filp,
> -			   struct amdgpu_ctx *ctx)
> +static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip)

I believe we can set "hw_ip" to "const u32 hw_ip", to protect it
from accidential changes.

>  {
> -	unsigned i, j;
> -	int r;
> +	struct amdgpu_device *adev = ctx->adev;
> +	struct drm_gpu_scheduler **scheds;
> +	struct drm_gpu_scheduler *sched;
> +	unsigned num_scheds = 0;
> +	enum drm_sched_priority priority;
> +	int j, r;
>  
> -	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> -		return -EINVAL;
> +	ctx->entities[hw_ip] = kcalloc(amdgpu_ctx_num_entities[hw_ip],
> +			sizeof(struct amdgpu_ctx_entity), GFP_KERNEL);
>  
> -	r = amdgpu_ctx_priority_permit(filp, priority);
> -	if (r)
> -		return r;
> +	if (!ctx->entities[hw_ip])
> +		return  -ENOMEM;

There seem to be two spaces here.

>  
> -	memset(ctx, 0, sizeof(*ctx));
> -	ctx->adev = adev;
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
>  
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		ctx->entities[i] = kcalloc(amdgpu_ctx_num_entities[i],
> -				   sizeof(struct amdgpu_ctx_entity),
> -				   GFP_KERNEL);
> +		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>  
> -		if (!ctx->entities[0]) {
> -			r =  -ENOMEM;
> -			goto error_cleanup_entity_memory;
> +		entity->sequence = 1;
> +		entity->fences = kcalloc(amdgpu_sched_jobs,
> +				sizeof(struct dma_fence*), GFP_KERNEL);

The indent here seems wrong...

> +		if (!entity->fences) {
> +			r = -ENOMEM;
> +			goto error_cleanup_memory;
>  		}
>  	}
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> -
> -			entity->sequence = 1;
> -			entity->fences = kcalloc(amdgpu_sched_jobs,
> -					 sizeof(struct dma_fence*), GFP_KERNEL);
> -			if (!entity->fences) {
> -				r = -ENOMEM;
> -				goto error_cleanup_memory;
> -			}
> -		}
>  
> -	kref_init(&ctx->refcount);
> -	spin_lock_init(&ctx->ring_lock);
> -	mutex_init(&ctx->lock);
> -
> -	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> -	ctx->reset_counter_query = ctx->reset_counter;
> -	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> -	ctx->init_priority = priority;
> -	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> -
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		struct drm_gpu_scheduler **scheds;
> -		struct drm_gpu_scheduler *sched;
> -		unsigned num_scheds = 0;
> -
> -		switch (i) {
> +	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
> +				ctx->init_priority : ctx->override_priority;

You don't need parenthesis around the relational equality operator used here.
It has higher precedence than the ternary conditional, in which it is embedded.

> +	switch (hw_ip) {
>  		case AMDGPU_HW_IP_GFX:
>  			sched = &adev->gfx.gfx_ring[0].sched;
>  			scheds = &sched;
> @@ -156,12 +129,12 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  			scheds = adev->jpeg.jpeg_sched;
>  			num_scheds =  adev->jpeg.num_jpeg_sched;
>  			break;
> -		}
> +	}
>  
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
> -						  priority, scheds,
> -						  num_scheds, &ctx->guilty);
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
> +		r = drm_sched_entity_init(&ctx->entities[hw_ip][j].entity,
> +				priority, scheds,
> +				num_scheds, &ctx->guilty);

The indent here seems off...

>  		if (r)
>  			goto error_cleanup_entities;
>  	}
> @@ -169,28 +142,54 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>  	return 0;
>  
>  error_cleanup_entities:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> -			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j)
> +		drm_sched_entity_destroy(&ctx->entities[hw_ip][j].entity);
>  
>  error_cleanup_memory:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> +	for (j = 0; j < amdgpu_ctx_num_entities[hw_ip]; ++j) {
> +		struct amdgpu_ctx_entity *entity = &ctx->entities[hw_ip][j];
>  
> -			kfree(entity->fences);
> -			entity->fences = NULL;
> -		}
> -
> -error_cleanup_entity_memory:
> -	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
> -		kfree(ctx->entities[i]);
> -		ctx->entities[i] = NULL;
> +		kfree(entity->fences);
> +		entity->fences = NULL;
>  	}
>  
> +	kfree(ctx->entities[hw_ip]);
> +	ctx->entities[hw_ip] = NULL;
> +
>  	return r;
>  }
>  
> +static int amdgpu_ctx_init(struct amdgpu_device *adev,
> +			   enum drm_sched_priority priority,
> +			   struct drm_file *filp,
> +			   struct amdgpu_ctx *ctx)

The indent of the argument list here seems off...

> +{
> +	int r;
> +
> +	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
> +		return -EINVAL;

This shouldn't be possible since it is an enum...
But why not do this check in "amdgpu_ctx_priority_permit()"
which is introduced by this patchset and used in the imediately
following line?

Or perhaps fix up amdgpu_to_sched_priority() where it is massaged
from the ioctl argument which is an integer.

On a side note: I noticed that the enum drm_sched_priority
has no DRM_SCHED_PRIORITY_NONE.

I've found value in setting the first value of an enum to
"_NONE" (unless zero actually has a meaning as set by HW/etc., anyway).
Enum drm_sched_priority starts with "_MIN" and "_LOW" which
are both set to the same (zero) value.

So having DRM_SCHED_PRIORITY_NONE, could be used to denote
that no priority was given and any is fine, or to mean
that if the priority was given out of range, set it to "none",
to mean pick any.

> +
> +	r = amdgpu_ctx_priority_permit(filp, priority);
> +	if (r)
> +		return r;
> +
> +	memset(ctx, 0, sizeof(*ctx));
> +	ctx->adev = adev;
> +
> +	kref_init(&ctx->refcount);
> +	spin_lock_init(&ctx->ring_lock);
> +	mutex_init(&ctx->lock);
> +
> +	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
> +	ctx->reset_counter_query = ctx->reset_counter;
> +	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
> +	ctx->init_priority = priority;
> +	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
> +
> +	return 0;
> +
> +}
> +
>  static void amdgpu_ctx_fini(struct kref *ref)
>  {
>  	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
> @@ -201,10 +200,18 @@ static void amdgpu_ctx_fini(struct kref *ref)
>  		return;
>  	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
>  		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> -			struct amdgpu_ctx_entity *entity = &ctx->entities[i][j];
> +			struct amdgpu_ctx_entity *entity;
> +
> +			if (!ctx->entities[i])
> +				continue;
> +
> +			entity = &ctx->entities[i][j];
> +			if (!entity->fences)
> +				continue;
>  
> -			for (k = 0; k < amdgpu_sched_jobs; ++k)
> +			for (k = 0; k < amdgpu_sched_jobs; ++k) {
>  				dma_fence_put(entity->fences[k]);
> +			}

LKCS: A single non-compound statement as the body of a loop, doesn't
warrant braces. So you can leave this is it was.

>  
>  			kfree(entity->fences);
>  		}
> @@ -237,6 +244,11 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
>  		return -EINVAL;
>  	}
>  
> +	if (ctx->entities[hw_ip] == NULL) {
> +		amdgpu_ctx_init_entity(ctx, hw_ip);
> +	}

No need for braces.

> +
> +
>  	*entity = &ctx->entities[hw_ip][ring].entity;
>  	return 0;
>  }
> @@ -281,8 +293,11 @@ static void amdgpu_ctx_do_release(struct kref *ref)
>  
>  	ctx = container_of(ref, struct amdgpu_ctx, refcount);
>  	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
> -		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
> +		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
> +			if (!ctx->entities[i])
> +				continue;
>  			drm_sched_entity_destroy(&ctx->entities[i][j].entity);
> +		}
>  
>  	amdgpu_ctx_fini(ref);
>  }
> @@ -573,6 +588,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
>  			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>  				struct drm_sched_entity *entity;
>  
> +				if (!ctx->entities[i])
> +					continue;
> +
>  				entity = &ctx->entities[i][j].entity;
>  				timeout = drm_sched_entity_flush(entity, timeout);
>  		}
> @@ -598,6 +616,9 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
>  			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>  				struct drm_sched_entity *entity;
>  
> +				if (!ctx->entities[i])
> +					continue;
> +
>  				entity = &ctx->entities[i][j].entity;
>  				drm_sched_entity_fini(entity);
>  			}
> 

I think these changes are good and in the right direction.

Regards,
Luben
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx