With more thinking for you performance reason, Can we go further more not to create temp bo list at all? directly add them into parser->validated list? In fact, if bo array is very long, then overhead of bo list creation in CS is considerable, which will double iterate all BOs compared to original. From UMD perspective, they don't create bo list for every CS, they could use old created bo_list for next several CS, if there is a new bo, they just add it. Thanks, David Zhou On 2018å¹´07æ??12æ?¥ 12:02, zhoucm1 wrote: > > > On 2018å¹´07æ??12æ?¥ 11:09, Zhou, David(ChunMing) wrote: >> Hi Andrey, >> >> Could you add compatibility flag or increase kms driver version? So >> that user space can keep old path when using new one. > Sorry for noise, it's already at end of path. > > Regards, > David Zhou >> >> Regards, >> David Zhou >> >> -----Original Message----- >> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On >> Behalf Of zhoucm1 >> Sent: Thursday, July 12, 2018 10:31 AM >> To: Grodzovsky, Andrey <Andrey.Grodzovsky at amd.com>; >> amd-gfx at lists.freedesktop.org >> Cc: Olsak, Marek <Marek.Olsak at amd.com>; Koenig, Christian >> <Christian.Koenig at amd.com> >> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS >> ioctl v2 >> >> >> >> On 2018å¹´07æ??12æ?¥ 04:57, Andrey Grodzovsky wrote: >>> This change is to support MESA performace optimization. >>> Modify CS IOCTL to allow its input as command buffer and an array of >>> buffer handles to create a temporay bo list and then destroy it when >>> IOCTL completes. >>> This saves on calling for BO_LIST create and destry IOCTLs in MESA and >>> by this improves performance. >>> >>> v2: Avoid inserting the temp list into idr struct. >>> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> >>> --- >>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 11 ++++ >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 >>> ++++++++++++++++++----------- >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 51 +++++++++++++++-- >>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 3 +- >>>   include/uapi/drm/amdgpu_drm.h              | 1 + >>>   5 files changed, 114 insertions(+), 38 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> index 8eaba0f..9b472b2 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct >>> amdgpu_bo_list *list, >>>                    struct list_head *validated); >>>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list); >>>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list); >>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in >>> *in, >>> +                     struct drm_amdgpu_bo_list_entry **info_param); >>> + >>> +int amdgpu_bo_list_create(struct amdgpu_device *adev, >>> +                struct drm_file *filp, >>> +                struct drm_amdgpu_bo_list_entry *info, >>> +                unsigned num_entries, >>> +                int *id, >>> +                struct amdgpu_bo_list **list); >>> + >>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id); >>>     /* >>>    * GFX stuff >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>> index 92be7f6..14c7c59 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct >>> kref *ref) >>>       kfree_rcu(list, rhead); >>>   } >>>   -static int amdgpu_bo_list_create(struct amdgpu_device *adev, >>> +int amdgpu_bo_list_create(struct amdgpu_device *adev, >>>                    struct drm_file *filp, >>>                    struct drm_amdgpu_bo_list_entry *info, >>>                    unsigned num_entries, >>> -                int *id) >>> +                int *id, >>> +                struct amdgpu_bo_list **list_out) >>>   { >>>       int r; >>>       struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 >>> +79,25 @@ >>> static int amdgpu_bo_list_create(struct amdgpu_device *adev, >>>           return r; >>>       } >>>   +   if (id) { >>>       /* idr alloc should be called only after initialization of bo >>> list. */ >>> -   mutex_lock(&fpriv->bo_list_lock); >>> -   r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); >>> -   mutex_unlock(&fpriv->bo_list_lock); >>> -   if (r < 0) { >>> -       amdgpu_bo_list_free(list); >>> -       return r; >>> +       mutex_lock(&fpriv->bo_list_lock); >>> +       r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, >>> GFP_KERNEL); >>> +       mutex_unlock(&fpriv->bo_list_lock); >>> +       if (r < 0) { >>> +           amdgpu_bo_list_free(list); >>> +           return r; >>> +       } >>> +       *id = r; >>>       } >>> -   *id = r; >>> + >>> +   if (list_out) >>> +       *list_out = list; >>>         return 0; >>>   } >>>   -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int >>> id) >>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) >>>   { >>>       struct amdgpu_bo_list *list; >>>   @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct >>> amdgpu_bo_list *list) >>>       kfree(list); >>>   } >>>   -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, >>> -               struct drm_file *filp) >>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in >>> *in, >>> +                     struct drm_amdgpu_bo_list_entry **info_param) >>>   { >>> -   const uint32_t info_size = sizeof(struct >>> drm_amdgpu_bo_list_entry); >>> - >>> -   struct amdgpu_device *adev = dev->dev_private; >>> -   struct amdgpu_fpriv *fpriv = filp->driver_priv; >>> -   union drm_amdgpu_bo_list *args = data; >>> -   uint32_t handle = args->in.list_handle; >>> -   const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); >>> - >>>       struct drm_amdgpu_bo_list_entry *info; >>> -   struct amdgpu_bo_list *list; >>> - >>>       int r; >>> +   const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); >>> +   const uint32_t info_size = sizeof(struct >>> drm_amdgpu_bo_list_entry); >>>   -   info = kvmalloc_array(args->in.bo_number, >>> +   info = kvmalloc_array(in->bo_number, >>>                    sizeof(struct drm_amdgpu_bo_list_entry), >>> GFP_KERNEL); >>>       if (!info) >>>           return -ENOMEM; >>>         /* copy the handle array from userspace to a kernel buffer */ >>>       r = -EFAULT; >>> -   if (likely(info_size == args->in.bo_info_size)) { >>> -       unsigned long bytes = args->in.bo_number * >>> -           args->in.bo_info_size; >>> +   if (likely(info_size == in->bo_info_size)) { >>> +       unsigned long bytes = in->bo_number * >>> +           in->bo_info_size; >>>             if (copy_from_user(info, uptr, bytes)) >>>               goto error_free; >>>         } else { >>> -       unsigned long bytes = min(args->in.bo_info_size, info_size); >>> +       unsigned long bytes = min(in->bo_info_size, info_size); >>>           unsigned i; >>>   -       memset(info, 0, args->in.bo_number * info_size); >>> -       for (i = 0; i < args->in.bo_number; ++i) { >>> +       memset(info, 0, in->bo_number * info_size); >>> +       for (i = 0; i < in->bo_number; ++i) { >>>               if (copy_from_user(&info[i], uptr, bytes)) >>>                   goto error_free; >>>   -           uptr += args->in.bo_info_size; >>> +           uptr += in->bo_info_size; >>>           } >>>       } >>>   +   *info_param = info; >>> +   return 0; >>> + >>> +error_free: >>> +   kvfree(info); >>> +   return r; >>> +} >>> + >>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, >>> +               struct drm_file *filp) >>> +{ >>> +   struct amdgpu_device *adev = dev->dev_private; >>> +   struct amdgpu_fpriv *fpriv = filp->driver_priv; >>> +   union drm_amdgpu_bo_list *args = data; >>> +   uint32_t handle = args->in.list_handle; >>> +   struct drm_amdgpu_bo_list_entry *info = NULL; >>> +   struct amdgpu_bo_list *list; >>> +   int r; >>> + >>> +   r = amdgpu_bo_create_list_entry_array(&args->in, &info); >>> +   if (r) >>> +       goto error_free; >>> + >>>       switch (args->in.operation) { >>>       case AMDGPU_BO_LIST_OP_CREATE: >>>           r = amdgpu_bo_list_create(adev, filp, info, >>> args->in.bo_number, >>> -                     &handle); >>> +                     &handle, NULL); >>>           if (r) >>>               goto error_free; >>>           break; >>> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, >>> void *data, >>>       return 0; >>>     error_free: >>> -   kvfree(info); >>> +   if (info) >>> +       kvfree(info); >>>       return r; >>>   } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> index 9881a1e..30026b8 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct >>> amdgpu_cs_parser *p, >>>       return 0; >>>   } >>>   -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void >>> *data) >>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, >>> +                     struct drm_amdgpu_bo_list_in *data) { >>> +   int r; >>> +   struct drm_amdgpu_bo_list_entry *info = NULL; >>> + >>> +   r = amdgpu_bo_create_list_entry_array(data, &info); >>> +   if (r) >>> +       return r; >>> + >>> +   r = amdgpu_bo_list_create(p->adev, p->filp, info, >>> data->bo_number, NULL, &p->bo_list); >>> +   if (r) >>> +       goto error_free; >>> + >>> +   kvfree(info); >>> +   return 0; >>> + >>> +error_free: >>> +   if (info) >>> +       kvfree(info); >>> + >>> +   return r; >>> +} >>> + >>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union >>> +drm_amdgpu_cs *cs) >>>   { >>>       struct amdgpu_fpriv *fpriv = p->filp->driver_priv; >>>       struct amdgpu_vm *vm = &fpriv->vm; >>> -   union drm_amdgpu_cs *cs = data; >>>       uint64_t *chunk_array_user; >>>       uint64_t *chunk_array; >>>       unsigned size, num_ibs = 0; >>> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct >>> amdgpu_cs_parser *p, void *data) >>>                 break; >>>   +       case AMDGPU_CHUNK_ID_BO_HANDLES: >>> +           size = sizeof(struct drm_amdgpu_bo_list_in); >>> +           if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { >>> +               ret = -EINVAL; >>> +               goto free_partial_kdata; >>> +           } >>> + >>> +           ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); >>> +           if (ret) >>> +               goto free_partial_kdata; >>> + >>> +           break; >>> + >>>           case AMDGPU_CHUNK_ID_DEPENDENCIES: >>>           case AMDGPU_CHUNK_ID_SYNCOBJ_IN: >>>           case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: >>> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct >>> amdgpu_cs_parser *p, >>>         INIT_LIST_HEAD(&p->validated); >>>   -   p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); >>> +   /* p->bo_list could already be assigned if >>> AMDGPU_CHUNK_ID_BO_HANDLES is present */ >>> +   if (!p->bo_list) >>> +       p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); >>> +   else >>> +       mutex_lock(&p->bo_list->lock); >>> + >>>       if (p->bo_list) { >>>           amdgpu_bo_list_get_list(p->bo_list, &p->validated); >>>           if (p->bo_list->first_userptr != p->bo_list->num_entries) @@ >>> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct >>> amdgpu_cs_parser *p) >>>    * used by parsing context. >>>    **/ >>>   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser >>> *parser, int error, >>> -                 bool backoff) >>> +                 bool backoff, int id) >> Don't need it after you get bo_list without idr. >> >>>   { >>>       unsigned i; >>>   @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, >>> void *data, struct drm_file *filp) >>>       r = amdgpu_cs_submit(&parser, cs); >>>     out: >>> -   amdgpu_cs_parser_fini(&parser, r, reserved_buffers); >>> +   amdgpu_cs_parser_fini(&parser, r, reserved_buffers, >>> +cs->in.bo_list_handle); >> Don't need it after you get bo_list without idr. >> >> Otherwise it looks really good to me, Reviewed-by: Chunming Zhou >> <david1.zhou at amd.com> >> >>>       return r; >>>   } >>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> index 06aede1..529500c 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>> @@ -69,9 +69,10 @@ >>>    * - 3.24.0 - Add high priority compute support for gfx9 >>>    * - 3.25.0 - Add support for sensor query info (stable pstate >>> sclk/mclk). >>>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. >>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST >>> creation. >>>    */ >>>   #define KMS_DRIVER_MAJOR   3 >>> -#define KMS_DRIVER_MINOR   26 >>> +#define KMS_DRIVER_MINOR   27 >>>   #define KMS_DRIVER_PATCHLEVEL   0 >>>     int amdgpu_vram_limit = 0; >>> diff --git a/include/uapi/drm/amdgpu_drm.h >>> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644 >>> --- a/include/uapi/drm/amdgpu_drm.h >>> +++ b/include/uapi/drm/amdgpu_drm.h >>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va { >>>   #define AMDGPU_CHUNK_ID_DEPENDENCIES   0x03 >>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN     0x04 >>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT    0x05 >>> +#define AMDGPU_CHUNK_ID_BO_HANDLES     0x06 >>>     struct drm_amdgpu_cs_chunk { >>>       __u32       chunk_id; >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx