[PATCH] drm/amdgpu: Allow to create BO lists in CS ioctl.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 11.07.2018 um 16:51 schrieb Andrey Grodzovsky:
> This change is to support MESA performace optimization.
> Modify CS IOCTL to allow its input as command buffer and an array of
> buffer handles to create a temporay bo list and then destroy it
> when IOCTL completes.
> This saves on calling for BO_LIST create and destry IOCTLs in MESA
> and by this improves performance.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 12 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 62 ++++++++++++++++++-----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 52 +++++++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>   include/uapi/drm/amdgpu_drm.h               |  1 +
>   5 files changed, 101 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8eaba0f..e082eba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -732,6 +732,16 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>   			     struct list_head *validated);
>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param);
> +
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +				 struct drm_file *filp,
> +				 struct drm_amdgpu_bo_list_entry *info,
> +				 unsigned num_entries,
> +				 int *id);
> +
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>   
>   /*
>    * GFX stuff
> @@ -1048,6 +1058,8 @@ struct amdgpu_cs_parser {
>   
>   	unsigned num_post_dep_syncobjs;
>   	struct drm_syncobj **post_dep_syncobjs;
> +
> +	bool destroy_bo_list;

I think you don't need this. IIRC the bo_list structure was reference 
counted.

So all you need to do is to make sure that the temporary bo_list you 
create has a reference count of 1 and so is destroyed when the CS IOCTL 
calls amdgpu_bo_list_put() on it.

That would simplify the patch quite a bit.

Apart from that looks really good to me, especially that you only need a 
new chunk ID for the UAPI is quite nice.

Thanks,
Christian.

>   };
>   
>   #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 92be7f6..9acdacc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -55,7 +55,7 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
>   	kfree_rcu(list, rhead);
>   }
>   
> -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   				 struct drm_file *filp,
>   				 struct drm_amdgpu_bo_list_entry *info,
>   				 unsigned num_entries,
> @@ -91,7 +91,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   	return 0;
>   }
>   
> -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>   {
>   	struct amdgpu_bo_list *list;
>   
> @@ -263,49 +263,64 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
>   	kfree(list);
>   }
>   
> -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> -				struct drm_file *filp)
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param)
>   {
> -	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
> -
> -	struct amdgpu_device *adev = dev->dev_private;
> -	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> -	union drm_amdgpu_bo_list *args = data;
> -	uint32_t handle = args->in.list_handle;
> -	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
> -
>   	struct drm_amdgpu_bo_list_entry *info;
> -	struct amdgpu_bo_list *list;
> -
>   	int r;
> +	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
> +	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>   
> -	info = kvmalloc_array(args->in.bo_number,
> +	info = kvmalloc_array(in->bo_number,
>   			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
>   	if (!info)
>   		return -ENOMEM;
>   
>   	/* copy the handle array from userspace to a kernel buffer */
>   	r = -EFAULT;
> -	if (likely(info_size == args->in.bo_info_size)) {
> -		unsigned long bytes = args->in.bo_number *
> -			args->in.bo_info_size;
> +	if (likely(info_size == in->bo_info_size)) {
> +		unsigned long bytes = in->bo_number *
> +			in->bo_info_size;
>   
>   		if (copy_from_user(info, uptr, bytes))
>   			goto error_free;
>   
>   	} else {
> -		unsigned long bytes = min(args->in.bo_info_size, info_size);
> +		unsigned long bytes = min(in->bo_info_size, info_size);
>   		unsigned i;
>   
> -		memset(info, 0, args->in.bo_number * info_size);
> -		for (i = 0; i < args->in.bo_number; ++i) {
> +		memset(info, 0, in->bo_number * info_size);
> +		for (i = 0; i < in->bo_number; ++i) {
>   			if (copy_from_user(&info[i], uptr, bytes))
>   				goto error_free;
>   
> -			uptr += args->in.bo_info_size;
> +			uptr += in->bo_info_size;
>   		}
>   	}
>   
> +	*info_param = info;
> +	return 0;
> +
> +error_free:
> +	kvfree(info);
> +	return r;
> +}
> +
> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> +				struct drm_file *filp)
> +{
> +	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	union drm_amdgpu_bo_list *args = data;
> +	uint32_t handle = args->in.list_handle;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +	struct amdgpu_bo_list *list;
> +	int r;
> +
> +	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
> +	if (r)
> +		goto error_free;
> +
>   	switch (args->in.operation) {
>   	case AMDGPU_BO_LIST_OP_CREATE:
>   		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
> @@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   
>   error_free:
> -	kvfree(info);
> +	if (info)
> +		kvfree(info);
>   	return r;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9881a1e..1b17f6b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -66,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>   	return 0;
>   }
>   
> -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
> +				      struct drm_amdgpu_bo_list_in *data,
> +				      int *id)
> +{
> +	int r;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +
> +	r = amdgpu_bo_create_list_entry_array(data, &info);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, id);
> +	if (r)
> +		goto error_free;
> +
> +	kvfree(info);
> +	return 0;
> +
> +error_free:
> +	if (info)
> +		kvfree(info);
> +
> +	return r;
> +}
> +
> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
>   {
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
> -	union drm_amdgpu_cs *cs = data;
>   	uint64_t *chunk_array_user;
>   	uint64_t *chunk_array;
>   	unsigned size, num_ibs = 0;
> @@ -164,6 +188,20 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   
>   			break;
>   
> +		case AMDGPU_CHUNK_ID_BO_HANDLES:
> +			size = sizeof(struct drm_amdgpu_bo_list_in);
> +			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
> +				ret = -EINVAL;
> +				goto free_partial_kdata;
> +			}
> +
> +			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata, &cs->in.bo_list_handle);
> +			p->destroy_bo_list = true;
> +			if (ret)
> +				goto free_partial_kdata;
> +
> +			break;
> +
>   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
> @@ -747,7 +785,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>    * used by parsing context.
>    **/
>   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> -				  bool backoff)
> +				  bool backoff, int id)
>   {
>   	unsigned i;
>   
> @@ -765,9 +803,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>   		mutex_unlock(&parser->ctx->lock);
>   		amdgpu_ctx_put(parser->ctx);
>   	}
> -	if (parser->bo_list)
> +	if (parser->bo_list) {
>   		amdgpu_bo_list_put(parser->bo_list);
>   
> +		if (parser->destroy_bo_list)
> +			amdgpu_bo_list_destroy(parser->filp->driver_priv, id);
> +	}
> +
>   	for (i = 0; i < parser->nchunks; i++)
>   		kvfree(parser->chunks[i].kdata);
>   	kfree(parser->chunks);
> @@ -1277,7 +1319,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	r = amdgpu_cs_submit(&parser, cs);
>   
>   out:
> -	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +	amdgpu_cs_parser_fini(&parser, r, reserved_buffers, cs->in.bo_list_handle);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 06aede1..529500c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -69,9 +69,10 @@
>    * - 3.24.0 - Add high priority compute support for gfx9
>    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
>    */
>   #define KMS_DRIVER_MAJOR	3
> -#define KMS_DRIVER_MINOR	26
> +#define KMS_DRIVER_MINOR	27
>   #define KMS_DRIVER_PATCHLEVEL	0
>   
>   int amdgpu_vram_limit = 0;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 09741ba..94444ee 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>   
>   struct drm_amdgpu_cs_chunk {
>   	__u32		chunk_id;



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux