Am 11.07.2018 um 16:51 schrieb Andrey Grodzovsky: > This change is to support MESA performace optimization. > Modify CS IOCTL to allow its input as command buffer and an array of > buffer handles to create a temporay bo list and then destroy it > when IOCTL completes. > This saves on calling for BO_LIST create and destry IOCTLs in MESA > and by this improves performance. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 12 ++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 62 ++++++++++++++++++----------- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 52 +++++++++++++++++++++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- > include/uapi/drm/amdgpu_drm.h | 1 + > 5 files changed, 101 insertions(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 8eaba0f..e082eba 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -732,6 +732,16 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, > struct list_head *validated); > void amdgpu_bo_list_put(struct amdgpu_bo_list *list); > void amdgpu_bo_list_free(struct amdgpu_bo_list *list); > +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, > + struct drm_amdgpu_bo_list_entry **info_param); > + > +int amdgpu_bo_list_create(struct amdgpu_device *adev, > + struct drm_file *filp, > + struct drm_amdgpu_bo_list_entry *info, > + unsigned num_entries, > + int *id); > + > +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id); > > /* > * GFX stuff > @@ -1048,6 +1058,8 @@ struct amdgpu_cs_parser { > > unsigned num_post_dep_syncobjs; > struct drm_syncobj **post_dep_syncobjs; > + > + bool destroy_bo_list; I think you don't need this. IIRC the bo_list structure was reference counted. So all you need to do is to make sure that the temporary bo_list you create has a reference count of 1 and so is destroyed when the CS IOCTL calls amdgpu_bo_list_put() on it. That would simplify the patch quite a bit. Apart from that looks really good to me, especially that you only need a new chunk ID for the UAPI is quite nice. Thanks, Christian. > }; > > #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > index 92be7f6..9acdacc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > @@ -55,7 +55,7 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref) > kfree_rcu(list, rhead); > } > > -static int amdgpu_bo_list_create(struct amdgpu_device *adev, > +int amdgpu_bo_list_create(struct amdgpu_device *adev, > struct drm_file *filp, > struct drm_amdgpu_bo_list_entry *info, > unsigned num_entries, > @@ -91,7 +91,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, > return 0; > } > > -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) > +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) > { > struct amdgpu_bo_list *list; > > @@ -263,49 +263,64 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list) > kfree(list); > } > > -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, > - struct drm_file *filp) > +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, > + struct drm_amdgpu_bo_list_entry **info_param) > { > - const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); > - > - struct amdgpu_device *adev = dev->dev_private; > - struct amdgpu_fpriv *fpriv = filp->driver_priv; > - union drm_amdgpu_bo_list *args = data; > - uint32_t handle = args->in.list_handle; > - const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); > - > struct drm_amdgpu_bo_list_entry *info; > - struct amdgpu_bo_list *list; > - > int r; > + const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); > + const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); > > - info = kvmalloc_array(args->in.bo_number, > + info = kvmalloc_array(in->bo_number, > sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL); > if (!info) > return -ENOMEM; > > /* copy the handle array from userspace to a kernel buffer */ > r = -EFAULT; > - if (likely(info_size == args->in.bo_info_size)) { > - unsigned long bytes = args->in.bo_number * > - args->in.bo_info_size; > + if (likely(info_size == in->bo_info_size)) { > + unsigned long bytes = in->bo_number * > + in->bo_info_size; > > if (copy_from_user(info, uptr, bytes)) > goto error_free; > > } else { > - unsigned long bytes = min(args->in.bo_info_size, info_size); > + unsigned long bytes = min(in->bo_info_size, info_size); > unsigned i; > > - memset(info, 0, args->in.bo_number * info_size); > - for (i = 0; i < args->in.bo_number; ++i) { > + memset(info, 0, in->bo_number * info_size); > + for (i = 0; i < in->bo_number; ++i) { > if (copy_from_user(&info[i], uptr, bytes)) > goto error_free; > > - uptr += args->in.bo_info_size; > + uptr += in->bo_info_size; > } > } > > + *info_param = info; > + return 0; > + > +error_free: > + kvfree(info); > + return r; > +} > + > +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, > + struct drm_file *filp) > +{ > + struct amdgpu_device *adev = dev->dev_private; > + struct amdgpu_fpriv *fpriv = filp->driver_priv; > + union drm_amdgpu_bo_list *args = data; > + uint32_t handle = args->in.list_handle; > + struct drm_amdgpu_bo_list_entry *info = NULL; > + struct amdgpu_bo_list *list; > + int r; > + > + r = amdgpu_bo_create_list_entry_array(&args->in, &info); > + if (r) > + goto error_free; > + > switch (args->in.operation) { > case AMDGPU_BO_LIST_OP_CREATE: > r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, > @@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, > return 0; > > error_free: > - kvfree(info); > + if (info) > + kvfree(info); > return r; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 9881a1e..1b17f6b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -66,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, > return 0; > } > > -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, > + struct drm_amdgpu_bo_list_in *data, > + int *id) > +{ > + int r; > + struct drm_amdgpu_bo_list_entry *info = NULL; > + > + r = amdgpu_bo_create_list_entry_array(data, &info); > + if (r) > + return r; > + > + r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, id); > + if (r) > + goto error_free; > + > + kvfree(info); > + return 0; > + > +error_free: > + if (info) > + kvfree(info); > + > + return r; > +} > + > +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) > { > struct amdgpu_fpriv *fpriv = p->filp->driver_priv; > struct amdgpu_vm *vm = &fpriv->vm; > - union drm_amdgpu_cs *cs = data; > uint64_t *chunk_array_user; > uint64_t *chunk_array; > unsigned size, num_ibs = 0; > @@ -164,6 +188,20 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > > break; > > + case AMDGPU_CHUNK_ID_BO_HANDLES: > + size = sizeof(struct drm_amdgpu_bo_list_in); > + if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { > + ret = -EINVAL; > + goto free_partial_kdata; > + } > + > + ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata, &cs->in.bo_list_handle); > + p->destroy_bo_list = true; > + if (ret) > + goto free_partial_kdata; > + > + break; > + > case AMDGPU_CHUNK_ID_DEPENDENCIES: > case AMDGPU_CHUNK_ID_SYNCOBJ_IN: > case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: > @@ -747,7 +785,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) > * used by parsing context. > **/ > static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, > - bool backoff) > + bool backoff, int id) > { > unsigned i; > > @@ -765,9 +803,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, > mutex_unlock(&parser->ctx->lock); > amdgpu_ctx_put(parser->ctx); > } > - if (parser->bo_list) > + if (parser->bo_list) { > amdgpu_bo_list_put(parser->bo_list); > > + if (parser->destroy_bo_list) > + amdgpu_bo_list_destroy(parser->filp->driver_priv, id); > + } > + > for (i = 0; i < parser->nchunks; i++) > kvfree(parser->chunks[i].kdata); > kfree(parser->chunks); > @@ -1277,7 +1319,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > r = amdgpu_cs_submit(&parser, cs); > > out: > - amdgpu_cs_parser_fini(&parser, r, reserved_buffers); > + amdgpu_cs_parser_fini(&parser, r, reserved_buffers, cs->in.bo_list_handle); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 06aede1..529500c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -69,9 +69,10 @@ > * - 3.24.0 - Add high priority compute support for gfx9 > * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). > * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. > + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. > */ > #define KMS_DRIVER_MAJOR 3 > -#define KMS_DRIVER_MINOR 26 > +#define KMS_DRIVER_MINOR 27 > #define KMS_DRIVER_PATCHLEVEL 0 > > int amdgpu_vram_limit = 0; > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 09741ba..94444ee 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va { > #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 > #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 > #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 > +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 > > struct drm_amdgpu_cs_chunk { > __u32 chunk_id;