On 2018å¹´07æ??12æ?¥ 15:56, Christian König wrote: > Am 12.07.2018 um 06:21 schrieb zhoucm1: >> With more thinking for you performance reason, Can we go further more >> not to create temp bo list at all? directly add them into >> parser->validated list? > > You still need something which is added to the parser->validated list, > so creating the array of BOs in unavoidable. > >> In fact, if bo array is very long, then overhead of bo list creation >> in CS is considerable, which will double iterate all BOs compared to >> original. >> >> From UMD perspective, they don't create bo list for every CS, they >> could use old created bo_list for next several CS, if there is a new >> bo, they just add it. > > And exactly that is the failed concept of bo_lists, it is complete > nonsense to do this. > > Either you create the list of BOs from scratch for each command > submission like Mesa does it in which is exactly the case we try to > support efficient here. @Kai, do you have comments for what Christian said? > > Or you use per process BOs which are always valid. Something which we > have already implemented as well. Yes, vulkan already use it from 4.15. But pro-ogl still use bo list. > > Regards, > Christian. > >> >> Thanks, >> David Zhou >> On 2018å¹´07æ??12æ?¥ 12:02, zhoucm1 wrote: >>> >>> >>> On 2018å¹´07æ??12æ?¥ 11:09, Zhou, David(ChunMing) wrote: >>>> Hi Andrey, >>>> >>>> Could you add compatibility flag or increase kms driver version? So >>>> that user space can keep old path when using new one. >>> Sorry for noise, it's already at end of path. >>> >>> Regards, >>> David Zhou >>>> >>>> Regards, >>>> David Zhou >>>> >>>> -----Original Message----- >>>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On >>>> Behalf Of zhoucm1 >>>> Sent: Thursday, July 12, 2018 10:31 AM >>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky at amd.com>; >>>> amd-gfx at lists.freedesktop.org >>>> Cc: Olsak, Marek <Marek.Olsak at amd.com>; Koenig, Christian >>>> <Christian.Koenig at amd.com> >>>> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS >>>> ioctl v2 >>>> >>>> >>>> >>>> On 2018å¹´07æ??12æ?¥ 04:57, Andrey Grodzovsky wrote: >>>>> This change is to support MESA performace optimization. >>>>> Modify CS IOCTL to allow its input as command buffer and an array of >>>>> buffer handles to create a temporay bo list and then destroy it when >>>>> IOCTL completes. >>>>> This saves on calling for BO_LIST create and destry IOCTLs in MESA >>>>> and >>>>> by this improves performance. >>>>> >>>>> v2: Avoid inserting the temp list into idr struct. >>>>> >>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> >>>>> --- >>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 11 ++++ >>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 >>>>> ++++++++++++++++++----------- >>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 51 +++++++++++++++-- >>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 3 +- >>>>>   include/uapi/drm/amdgpu_drm.h              | 1 + >>>>>   5 files changed, 114 insertions(+), 38 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> index 8eaba0f..9b472b2 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct >>>>> amdgpu_bo_list *list, >>>>>                    struct list_head *validated); >>>>>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list); >>>>>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list); >>>>> +int amdgpu_bo_create_list_entry_array(struct >>>>> drm_amdgpu_bo_list_in *in, >>>>> +                     struct drm_amdgpu_bo_list_entry **info_param); >>>>> + >>>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev, >>>>> +                struct drm_file *filp, >>>>> +                struct drm_amdgpu_bo_list_entry *info, >>>>> +                unsigned num_entries, >>>>> +                int *id, >>>>> +                struct amdgpu_bo_list **list); >>>>> + >>>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id); >>>>>     /* >>>>>    * GFX stuff >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>>> index 92be7f6..14c7c59 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>>> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct >>>>> kref *ref) >>>>>       kfree_rcu(list, rhead); >>>>>   } >>>>>   -static int amdgpu_bo_list_create(struct amdgpu_device *adev, >>>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev, >>>>>                    struct drm_file *filp, >>>>>                    struct drm_amdgpu_bo_list_entry *info, >>>>>                    unsigned num_entries, >>>>> -                int *id) >>>>> +                int *id, >>>>> +                struct amdgpu_bo_list **list_out) >>>>>   { >>>>>       int r; >>>>>       struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 >>>>> +79,25 @@ >>>>> static int amdgpu_bo_list_create(struct amdgpu_device *adev, >>>>>           return r; >>>>>       } >>>>>   +   if (id) { >>>>>       /* idr alloc should be called only after initialization of >>>>> bo list. */ >>>>> -   mutex_lock(&fpriv->bo_list_lock); >>>>> -   r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); >>>>> -   mutex_unlock(&fpriv->bo_list_lock); >>>>> -   if (r < 0) { >>>>> -       amdgpu_bo_list_free(list); >>>>> -       return r; >>>>> +       mutex_lock(&fpriv->bo_list_lock); >>>>> +       r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, >>>>> GFP_KERNEL); >>>>> +       mutex_unlock(&fpriv->bo_list_lock); >>>>> +       if (r < 0) { >>>>> +           amdgpu_bo_list_free(list); >>>>> +           return r; >>>>> +       } >>>>> +       *id = r; >>>>>       } >>>>> -   *id = r; >>>>> + >>>>> +   if (list_out) >>>>> +       *list_out = list; >>>>>         return 0; >>>>>   } >>>>>   -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, >>>>> int >>>>> id) >>>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) >>>>>   { >>>>>       struct amdgpu_bo_list *list; >>>>>   @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct >>>>> amdgpu_bo_list *list) >>>>>       kfree(list); >>>>>   } >>>>>   -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, >>>>> -               struct drm_file *filp) >>>>> +int amdgpu_bo_create_list_entry_array(struct >>>>> drm_amdgpu_bo_list_in *in, >>>>> +                     struct drm_amdgpu_bo_list_entry **info_param) >>>>>   { >>>>> -   const uint32_t info_size = sizeof(struct >>>>> drm_amdgpu_bo_list_entry); >>>>> - >>>>> -   struct amdgpu_device *adev = dev->dev_private; >>>>> -   struct amdgpu_fpriv *fpriv = filp->driver_priv; >>>>> -   union drm_amdgpu_bo_list *args = data; >>>>> -   uint32_t handle = args->in.list_handle; >>>>> -   const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); >>>>> - >>>>>       struct drm_amdgpu_bo_list_entry *info; >>>>> -   struct amdgpu_bo_list *list; >>>>> - >>>>>       int r; >>>>> +   const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); >>>>> +   const uint32_t info_size = sizeof(struct >>>>> drm_amdgpu_bo_list_entry); >>>>>   -   info = kvmalloc_array(args->in.bo_number, >>>>> +   info = kvmalloc_array(in->bo_number, >>>>>                    sizeof(struct drm_amdgpu_bo_list_entry), >>>>> GFP_KERNEL); >>>>>       if (!info) >>>>>           return -ENOMEM; >>>>>         /* copy the handle array from userspace to a kernel >>>>> buffer */ >>>>>       r = -EFAULT; >>>>> -   if (likely(info_size == args->in.bo_info_size)) { >>>>> -       unsigned long bytes = args->in.bo_number * >>>>> -           args->in.bo_info_size; >>>>> +   if (likely(info_size == in->bo_info_size)) { >>>>> +       unsigned long bytes = in->bo_number * >>>>> +           in->bo_info_size; >>>>>             if (copy_from_user(info, uptr, bytes)) >>>>>               goto error_free; >>>>>         } else { >>>>> -       unsigned long bytes = min(args->in.bo_info_size, info_size); >>>>> +       unsigned long bytes = min(in->bo_info_size, info_size); >>>>>           unsigned i; >>>>>   -       memset(info, 0, args->in.bo_number * info_size); >>>>> -       for (i = 0; i < args->in.bo_number; ++i) { >>>>> +       memset(info, 0, in->bo_number * info_size); >>>>> +       for (i = 0; i < in->bo_number; ++i) { >>>>>               if (copy_from_user(&info[i], uptr, bytes)) >>>>>                   goto error_free; >>>>>   -           uptr += args->in.bo_info_size; >>>>> +           uptr += in->bo_info_size; >>>>>           } >>>>>       } >>>>>   +   *info_param = info; >>>>> +   return 0; >>>>> + >>>>> +error_free: >>>>> +   kvfree(info); >>>>> +   return r; >>>>> +} >>>>> + >>>>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, >>>>> +               struct drm_file *filp) >>>>> +{ >>>>> +   struct amdgpu_device *adev = dev->dev_private; >>>>> +   struct amdgpu_fpriv *fpriv = filp->driver_priv; >>>>> +   union drm_amdgpu_bo_list *args = data; >>>>> +   uint32_t handle = args->in.list_handle; >>>>> +   struct drm_amdgpu_bo_list_entry *info = NULL; >>>>> +   struct amdgpu_bo_list *list; >>>>> +   int r; >>>>> + >>>>> +   r = amdgpu_bo_create_list_entry_array(&args->in, &info); >>>>> +   if (r) >>>>> +       goto error_free; >>>>> + >>>>>       switch (args->in.operation) { >>>>>       case AMDGPU_BO_LIST_OP_CREATE: >>>>>           r = amdgpu_bo_list_create(adev, filp, info, >>>>> args->in.bo_number, >>>>> -                     &handle); >>>>> +                     &handle, NULL); >>>>>           if (r) >>>>>               goto error_free; >>>>>           break; >>>>> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device >>>>> *dev, void *data, >>>>>       return 0; >>>>>     error_free: >>>>> -   kvfree(info); >>>>> +   if (info) >>>>> +       kvfree(info); >>>>>       return r; >>>>>   } >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>>> index 9881a1e..30026b8 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>>> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct >>>>> amdgpu_cs_parser *p, >>>>>       return 0; >>>>>   } >>>>>   -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void >>>>> *data) >>>>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, >>>>> +                     struct drm_amdgpu_bo_list_in *data) { >>>>> +   int r; >>>>> +   struct drm_amdgpu_bo_list_entry *info = NULL; >>>>> + >>>>> +   r = amdgpu_bo_create_list_entry_array(data, &info); >>>>> +   if (r) >>>>> +       return r; >>>>> + >>>>> +   r = amdgpu_bo_list_create(p->adev, p->filp, info, >>>>> data->bo_number, NULL, &p->bo_list); >>>>> +   if (r) >>>>> +       goto error_free; >>>>> + >>>>> +   kvfree(info); >>>>> +   return 0; >>>>> + >>>>> +error_free: >>>>> +   if (info) >>>>> +       kvfree(info); >>>>> + >>>>> +   return r; >>>>> +} >>>>> + >>>>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union >>>>> +drm_amdgpu_cs *cs) >>>>>   { >>>>>       struct amdgpu_fpriv *fpriv = p->filp->driver_priv; >>>>>       struct amdgpu_vm *vm = &fpriv->vm; >>>>> -   union drm_amdgpu_cs *cs = data; >>>>>       uint64_t *chunk_array_user; >>>>>       uint64_t *chunk_array; >>>>>       unsigned size, num_ibs = 0; >>>>> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct >>>>> amdgpu_cs_parser *p, void *data) >>>>>                 break; >>>>>   +       case AMDGPU_CHUNK_ID_BO_HANDLES: >>>>> +           size = sizeof(struct drm_amdgpu_bo_list_in); >>>>> +           if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { >>>>> +               ret = -EINVAL; >>>>> +               goto free_partial_kdata; >>>>> +           } >>>>> + >>>>> +           ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); >>>>> +           if (ret) >>>>> +               goto free_partial_kdata; >>>>> + >>>>> +           break; >>>>> + >>>>>           case AMDGPU_CHUNK_ID_DEPENDENCIES: >>>>>           case AMDGPU_CHUNK_ID_SYNCOBJ_IN: >>>>>           case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: >>>>> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct >>>>> amdgpu_cs_parser *p, >>>>>         INIT_LIST_HEAD(&p->validated); >>>>>   -   p->bo_list = amdgpu_bo_list_get(fpriv, >>>>> cs->in.bo_list_handle); >>>>> +   /* p->bo_list could already be assigned if >>>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */ >>>>> +   if (!p->bo_list) >>>>> +       p->bo_list = amdgpu_bo_list_get(fpriv, >>>>> cs->in.bo_list_handle); >>>>> +   else >>>>> +       mutex_lock(&p->bo_list->lock); >>>>> + >>>>>       if (p->bo_list) { >>>>>           amdgpu_bo_list_get_list(p->bo_list, &p->validated); >>>>>           if (p->bo_list->first_userptr != >>>>> p->bo_list->num_entries) @@ >>>>> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct >>>>> amdgpu_cs_parser *p) >>>>>    * used by parsing context. >>>>>    **/ >>>>>   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser >>>>> *parser, int error, >>>>> -                 bool backoff) >>>>> +                 bool backoff, int id) >>>> Don't need it after you get bo_list without idr. >>>> >>>>>   { >>>>>       unsigned i; >>>>>   @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device >>>>> *dev, void *data, struct drm_file *filp) >>>>>       r = amdgpu_cs_submit(&parser, cs); >>>>>     out: >>>>> -   amdgpu_cs_parser_fini(&parser, r, reserved_buffers); >>>>> +   amdgpu_cs_parser_fini(&parser, r, reserved_buffers, >>>>> +cs->in.bo_list_handle); >>>> Don't need it after you get bo_list without idr. >>>> >>>> Otherwise it looks really good to me, Reviewed-by: Chunming Zhou >>>> <david1.zhou at amd.com> >>>> >>>>>       return r; >>>>>   } >>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> index 06aede1..529500c 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >>>>> @@ -69,9 +69,10 @@ >>>>>    * - 3.24.0 - Add high priority compute support for gfx9 >>>>>    * - 3.25.0 - Add support for sensor query info (stable pstate >>>>> sclk/mclk). >>>>>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. >>>>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST >>>>> creation. >>>>>    */ >>>>>   #define KMS_DRIVER_MAJOR   3 >>>>> -#define KMS_DRIVER_MINOR   26 >>>>> +#define KMS_DRIVER_MINOR   27 >>>>>   #define KMS_DRIVER_PATCHLEVEL   0 >>>>>     int amdgpu_vram_limit = 0; >>>>> diff --git a/include/uapi/drm/amdgpu_drm.h >>>>> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644 >>>>> --- a/include/uapi/drm/amdgpu_drm.h >>>>> +++ b/include/uapi/drm/amdgpu_drm.h >>>>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va { >>>>>   #define AMDGPU_CHUNK_ID_DEPENDENCIES   0x03 >>>>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN     0x04 >>>>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT    0x05 >>>>> +#define AMDGPU_CHUNK_ID_BO_HANDLES     0x06 >>>>>     struct drm_amdgpu_cs_chunk { >>>>>       __u32       chunk_id; >>>> _______________________________________________ >>>> amd-gfx mailing list >>>> amd-gfx at lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >>> >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx at lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >