This change is to support MESA performace optimization. Modify CS IOCTL to allow its input as command buffer and an array of buffer handles to create a temporay bo list and then destroy it when IOCTL completes. This saves on calling for BO_LIST create and destry IOCTLs in MESA and by this improves performance. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 12 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 62 ++++++++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 52 +++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- include/uapi/drm/amdgpu_drm.h | 1 + 5 files changed, 101 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8eaba0f..e082eba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -732,6 +732,16 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, struct list_head *validated); void amdgpu_bo_list_put(struct amdgpu_bo_list *list); void amdgpu_bo_list_free(struct amdgpu_bo_list *list); +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, + struct drm_amdgpu_bo_list_entry **info_param); + +int amdgpu_bo_list_create(struct amdgpu_device *adev, + struct drm_file *filp, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries, + int *id); + +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id); /* * GFX stuff @@ -1048,6 +1058,8 @@ struct amdgpu_cs_parser { unsigned num_post_dep_syncobjs; struct drm_syncobj **post_dep_syncobjs; + + bool destroy_bo_list; }; #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 92be7f6..9acdacc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -55,7 +55,7 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref) kfree_rcu(list, rhead); } -static int amdgpu_bo_list_create(struct amdgpu_device *adev, +int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries, @@ -91,7 +91,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, return 0; } -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *list; @@ -263,49 +263,64 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list) kfree(list); } -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, + struct drm_amdgpu_bo_list_entry **info_param) { - const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); - - struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; - union drm_amdgpu_bo_list *args = data; - uint32_t handle = args->in.list_handle; - const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); - struct drm_amdgpu_bo_list_entry *info; - struct amdgpu_bo_list *list; - int r; + const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); + const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); - info = kvmalloc_array(args->in.bo_number, + info = kvmalloc_array(in->bo_number, sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL); if (!info) return -ENOMEM; /* copy the handle array from userspace to a kernel buffer */ r = -EFAULT; - if (likely(info_size == args->in.bo_info_size)) { - unsigned long bytes = args->in.bo_number * - args->in.bo_info_size; + if (likely(info_size == in->bo_info_size)) { + unsigned long bytes = in->bo_number * + in->bo_info_size; if (copy_from_user(info, uptr, bytes)) goto error_free; } else { - unsigned long bytes = min(args->in.bo_info_size, info_size); + unsigned long bytes = min(in->bo_info_size, info_size); unsigned i; - memset(info, 0, args->in.bo_number * info_size); - for (i = 0; i < args->in.bo_number; ++i) { + memset(info, 0, in->bo_number * info_size); + for (i = 0; i < in->bo_number; ++i) { if (copy_from_user(&info[i], uptr, bytes)) goto error_free; - uptr += args->in.bo_info_size; + uptr += in->bo_info_size; } } + *info_param = info; + return 0; + +error_free: + kvfree(info); + return r; +} + +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + union drm_amdgpu_bo_list *args = data; + uint32_t handle = args->in.list_handle; + struct drm_amdgpu_bo_list_entry *info = NULL; + struct amdgpu_bo_list *list; + int r; + + r = amdgpu_bo_create_list_entry_array(&args->in, &info); + if (r) + goto error_free; + switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, @@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, return 0; error_free: - kvfree(info); + if (info) + kvfree(info); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9881a1e..1b17f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -66,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return 0; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data, + int *id) +{ + int r; + struct drm_amdgpu_bo_list_entry *info = NULL; + + r = amdgpu_bo_create_list_entry_array(data, &info); + if (r) + return r; + + r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, id); + if (r) + goto error_free; + + kvfree(info); + return 0; + +error_free: + if (info) + kvfree(info); + + return r; +} + +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; unsigned size, num_ibs = 0; @@ -164,6 +188,20 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; + case AMDGPU_CHUNK_ID_BO_HANDLES: + size = sizeof(struct drm_amdgpu_bo_list_in); + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { + ret = -EINVAL; + goto free_partial_kdata; + } + + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata, &cs->in.bo_list_handle); + p->destroy_bo_list = true; + if (ret) + goto free_partial_kdata; + + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: @@ -747,7 +785,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) * used by parsing context. **/ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, - bool backoff) + bool backoff, int id) { unsigned i; @@ -765,9 +803,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); } - if (parser->bo_list) + if (parser->bo_list) { amdgpu_bo_list_put(parser->bo_list); + if (parser->destroy_bo_list) + amdgpu_bo_list_destroy(parser->filp->driver_priv, id); + } + for (i = 0; i < parser->nchunks; i++) kvfree(parser->chunks[i].kdata); kfree(parser->chunks); @@ -1277,7 +1319,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_submit(&parser, cs); out: - amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + amdgpu_cs_parser_fini(&parser, r, reserved_buffers, cs->in.bo_list_handle); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 06aede1..529500c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -69,9 +69,10 @@ * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 26 +#define KMS_DRIVER_MINOR 27 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 struct drm_amdgpu_cs_chunk { __u32 chunk_id; -- 2.7.4