[PATCH 9/9] drm/amdgpu: WIP add IOCTL interface for per VM BOs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 25.08.2017 um 21:19 schrieb Christian König:
> Am 25.08.2017 um 18:22 schrieb Marek Olšák:
>> On Fri, Aug 25, 2017 at 3:00 PM, Christian König
>> <deathsimple at vodafone.de> wrote:
>>> Am 25.08.2017 um 12:32 schrieb zhoucm1:
>>>>
>>>>
>>>> On 2017å¹´08æ??25æ?¥ 17:38, Christian König wrote:
>>>>> From: Christian König <christian.koenig at amd.com>
>>>>>
>>>>> Add the IOCTL interface so that applications can allocate per VM BOs.
>>>>>
>>>>> Still WIP since not all corner cases are tested yet, but this reduces
>>>>> average
>>>>> CS overhead for 10K BOs from 21ms down to 48us.
>>>> Wow, cheers, eventually you get per vm bo to same reservation with 
>>>> PD/pts,
>>>> indeed save a lot of bo list.
>>>
>>> Don't cheer to loud yet, that is a completely constructed test case.
>>>
>>> So far I wasn't able to archive any improvements with any real game 
>>> on this
>>> with Mesa.
>>>
>>> BTW: Marek can you take a look with some CPU bound tests? I can 
>>> provide a
>>> kernel branch if necessary.
>> Do you have a branch that works on Raven? This patch series doesn't,
>> and I didn't investigate why.
>
> I will come up with one on Monday.

Branch vm_improvements on 
git://people.freedesktop.org/~deathsimple/linux together with the 
attached patch should work.

Only testing on Tonga, but that's based on amd-staging-4.12 and so 
should work on Raven as well. If not I still have a bug somewhere which 
needs to be fixed.

Thanks,
Christian.
>
> Have a nice weekend guys,
> Christian.
>
>>
>> Marek
>>
>>> Regards,
>>> Christian.
>>>
>>>
>>>> overall looks good, I will take a detailed check for this tomorrow.
>>>>
>>>> Regards,
>>>> David Zhou
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 59
>>>>> ++++++++++++++++++++++---------
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
>>>>>    include/uapi/drm/amdgpu_drm.h             |  2 ++
>>>>>    5 files changed, 51 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index b1e817c..21cab36 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo {
>>>>>     */
>>>>>    void amdgpu_gem_force_release(struct amdgpu_device *adev);
>>>>>    int amdgpu_gem_object_create(struct amdgpu_device *adev, 
>>>>> unsigned long
>>>>> size,
>>>>> -                int alignment, u32 initial_domain,
>>>>> -                u64 flags, bool kernel,
>>>>> -                struct drm_gem_object **obj);
>>>>> +                 int alignment, u32 initial_domain,
>>>>> +                 u64 flags, bool kernel,
>>>>> +                 struct reservation_object *resv,
>>>>> +                 struct drm_gem_object **obj);
>>>>>      int amdgpu_mode_dumb_create(struct drm_file *file_priv,
>>>>>                    struct drm_device *dev,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> index 0e907ea..7256f83 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
>>>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct
>>>>> amdgpu_fbdev *rfbdev,
>>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
>>>>>                           AMDGPU_GEM_CREATE_VRAM_CLEARED,
>>>>> -                       true, &gobj);
>>>>> +                       true, NULL, &gobj);
>>>>>        if (ret) {
>>>>>            pr_err("failed to allocate framebuffer (%d)\n", 
>>>>> aligned_size);
>>>>>            return -ENOMEM;
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> index d028806..b8e8d67 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object
>>>>> *gobj)
>>>>>    }
>>>>>      int amdgpu_gem_object_create(struct amdgpu_device *adev, 
>>>>> unsigned
>>>>> long size,
>>>>> -                int alignment, u32 initial_domain,
>>>>> -                u64 flags, bool kernel,
>>>>> -                struct drm_gem_object **obj)
>>>>> +                 int alignment, u32 initial_domain,
>>>>> +                 u64 flags, bool kernel,
>>>>> +                 struct reservation_object *resv,
>>>>> +                 struct drm_gem_object **obj)
>>>>>    {
>>>>> -    struct amdgpu_bo *robj;
>>>>> +    struct amdgpu_bo *bo;
>>>>>        int r;
>>>>>          *obj = NULL;
>>>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>>> *adev, unsigned long size,
>>>>>      retry:
>>>>>        r = amdgpu_bo_create(adev, size, alignment, kernel, 
>>>>> initial_domain,
>>>>> -                 flags, NULL, NULL, 0, &robj);
>>>>> +                 flags, NULL, resv, 0, &bo);
>>>>>        if (r) {
>>>>>            if (r != -ERESTARTSYS) {
>>>>>                if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device
>>>>> *adev, unsigned long size,
>>>>>            }
>>>>>            return r;
>>>>>        }
>>>>> -    *obj = &robj->gem_base;
>>>>> +    *obj = &bo->gem_base;
>>>>>          return 0;
>>>>>    }
>>>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct 
>>>>> drm_gem_object
>>>>> *obj,
>>>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>>>>          struct amdgpu_bo_list_entry vm_pd;
>>>>> -    struct list_head list;
>>>>> +    struct list_head list, duplicates;
>>>>>        struct ttm_validate_buffer tv;
>>>>>        struct ww_acquire_ctx ticket;
>>>>>        struct amdgpu_bo_va *bo_va;
>>>>>        int r;
>>>>>          INIT_LIST_HEAD(&list);
>>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>>          tv.bo = &bo->tbo;
>>>>>        tv.shared = true;
>>>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct 
>>>>> drm_gem_object
>>>>> *obj,
>>>>>          amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
>>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
>>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
>>>>>        if (r) {
>>>>>            dev_err(adev->dev, "leaking bo va because "
>>>>>                "we fail to reserve bo (%d)\n", r);
>>>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>                    struct drm_file *filp)
>>>>>    {
>>>>>        struct amdgpu_device *adev = dev->dev_private;
>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>> +    struct amdgpu_vm *vm = &fpriv->vm;
>>>>>        union drm_amdgpu_gem_create *args = data;
>>>>>        uint64_t flags = args->in.domain_flags;
>>>>>        uint64_t size = args->in.bo_size;
>>>>> +    struct reservation_object *resv = NULL;
>>>>>        struct drm_gem_object *gobj;
>>>>>        uint32_t handle;
>>>>>        int r;
>>>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
>>>>>                  AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
>>>>>                  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
>>>>> -              AMDGPU_GEM_CREATE_VRAM_CLEARED))
>>>>> +              AMDGPU_GEM_CREATE_VRAM_CLEARED |
>>>>> +              AMDGPU_GEM_CREATE_LOCAL))
>>>>>            return -EINVAL;
>>>>>          /* reject invalid gem domains */
>>>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        }
>>>>>        size = roundup(size, PAGE_SIZE);
>>>>>    +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>>> +        r = amdgpu_bo_reserve(vm->root.base.bo, false);
>>>>> +        if (r)
>>>>> +            return r;
>>>>> +
>>>>> +        resv = vm->root.base.bo->tbo.resv;
>>>>> +    }
>>>>> +
>>>>>        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
>>>>>                         (u32)(0xffffffff & args->in.domains),
>>>>> -                     flags, false, &gobj);
>>>>> +                     flags, false, resv, &gobj);
>>>>> +    if (flags & AMDGPU_GEM_CREATE_LOCAL) {
>>>>> +        if (!r) {
>>>>> +            struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
>>>>> +
>>>>> +            abo->parent = amdgpu_bo_ref(vm->root.base.bo);
>>>>> +        }
>>>>> +        amdgpu_bo_unreserve(vm->root.base.bo);
>>>>> +    }
>>>>>        if (r)
>>>>>            return r;
>>>>>    @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device
>>>>> *dev, void *data,
>>>>>        }
>>>>>          /* create a gem object to contain this object in */
>>>>> -    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>> -                     AMDGPU_GEM_DOMAIN_CPU, 0,
>>>>> -                     0, &gobj);
>>>>> +    r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>> AMDGPU_GEM_DOMAIN_CPU,
>>>>> +                     0, 0, NULL, &gobj);
>>>>>        if (r)
>>>>>            return r;
>>>>>    @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        struct amdgpu_bo_list_entry vm_pd;
>>>>>        struct ttm_validate_buffer tv;
>>>>>        struct ww_acquire_ctx ticket;
>>>>> -    struct list_head list;
>>>>> +    struct list_head list, duplicates;
>>>>>        uint64_t va_flags;
>>>>>        int r = 0;
>>>>>    @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>        }
>>>>>          INIT_LIST_HEAD(&list);
>>>>> +    INIT_LIST_HEAD(&duplicates);
>>>>>        if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>>>>            !(args->flags & AMDGPU_VM_PAGE_PRT)) {
>>>>>            gobj = drm_gem_object_lookup(filp, args->handle);
>>>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev, void
>>>>> *data,
>>>>>          amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
>>>>>    -    r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
>>>>> +    r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
>>>>>        if (r)
>>>>>            goto error_unref;
>>>>>    @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device 
>>>>> *dev,
>>>>> void *data,
>>>>>    int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>>>>                struct drm_file *filp)
>>>>>    {
>>>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>>>        struct drm_amdgpu_gem_op *args = data;
>>>>>        struct drm_gem_object *gobj;
>>>>>        struct amdgpu_bo *robj;
>>>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device 
>>>>> *dev, void
>>>>> *data,
>>>>>            if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
>>>>>                robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
>>>>>    +        if (robj->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>> +            amdgpu_vm_bo_invalidate(adev, robj, true);
>>>>> +
>>>>>            amdgpu_bo_unreserve(robj);
>>>>>            break;
>>>>>        default:
>>>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file
>>>>> *file_priv,
>>>>>        r = amdgpu_gem_object_create(adev, args->size, 0,
>>>>>                         AMDGPU_GEM_DOMAIN_VRAM,
>>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
>>>>> -                     ttm_bo_type_device,
>>>>> -                     &gobj);
>>>>> +                     false, NULL, &gobj);
>>>>>        if (r)
>>>>>            return -ENOMEM;
>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> index 5b3f928..f407499 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct
>>>>> drm_device *dev,
>>>>>    {
>>>>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>>    -    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>> +        bo->flags & AMDGPU_GEM_CREATE_LOCAL)
>>>>>            return ERR_PTR(-EPERM);
>>>>>          return drm_gem_prime_export(dev, gobj, flags);
>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>>> b/include/uapi/drm/amdgpu_drm.h
>>>>> index d0ee739..05241a6 100644
>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>> @@ -89,6 +89,8 @@ extern "C" {
>>>>>    #define AMDGPU_GEM_CREATE_SHADOW        (1 << 4)
>>>>>    /* Flag that allocating the BO should use linear VRAM */
>>>>>    #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS    (1 << 5)
>>>>> +/* Flag that BO is local in the VM */
>>>>> +#define AMDGPU_GEM_CREATE_LOCAL            (1 << 6)
>>>>>      struct drm_amdgpu_gem_create_in  {
>>>>>        /** the requested memory size */
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-radeonsi-set-a-per-buffer-flag-that-disables-inter-p.patch
Type: text/x-patch
Size: 11821 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170827/336d6ead/attachment.bin>


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux