Am 25.08.2017 um 18:22 schrieb Marek Olšák: > On Fri, Aug 25, 2017 at 3:00 PM, Christian König > <deathsimple at vodafone.de> wrote: >> Am 25.08.2017 um 12:32 schrieb zhoucm1: >>> >>> >>> On 2017å¹´08æ??25æ?¥ 17:38, Christian König wrote: >>>> From: Christian König <christian.koenig at amd.com> >>>> >>>> Add the IOCTL interface so that applications can allocate per VM BOs. >>>> >>>> Still WIP since not all corner cases are tested yet, but this reduces >>>> average >>>> CS overhead for 10K BOs from 21ms down to 48us. >>> Wow, cheers, eventually you get per vm bo to same reservation with PD/pts, >>> indeed save a lot of bo list. >> >> Don't cheer to loud yet, that is a completely constructed test case. >> >> So far I wasn't able to archive any improvements with any real game on this >> with Mesa. >> >> BTW: Marek can you take a look with some CPU bound tests? I can provide a >> kernel branch if necessary. > Do you have a branch that works on Raven? This patch series doesn't, > and I didn't investigate why. I will come up with one on Monday. Have a nice weekend guys, Christian. > > Marek > >> Regards, >> Christian. >> >> >>> overall looks good, I will take a detailed check for this tomorrow. >>> >>> Regards, >>> David Zhou >>>> >>>> Signed-off-by: Christian König <christian.koenig at amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++-- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 59 >>>> ++++++++++++++++++++++--------- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 3 +- >>>> include/uapi/drm/amdgpu_drm.h | 2 ++ >>>> 5 files changed, 51 insertions(+), 22 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> index b1e817c..21cab36 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo { >>>> */ >>>> void amdgpu_gem_force_release(struct amdgpu_device *adev); >>>> int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long >>>> size, >>>> - int alignment, u32 initial_domain, >>>> - u64 flags, bool kernel, >>>> - struct drm_gem_object **obj); >>>> + int alignment, u32 initial_domain, >>>> + u64 flags, bool kernel, >>>> + struct reservation_object *resv, >>>> + struct drm_gem_object **obj); >>>> int amdgpu_mode_dumb_create(struct drm_file *file_priv, >>>> struct drm_device *dev, >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>> index 0e907ea..7256f83 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct >>>> amdgpu_fbdev *rfbdev, >>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >>>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | >>>> AMDGPU_GEM_CREATE_VRAM_CLEARED, >>>> - true, &gobj); >>>> + true, NULL, &gobj); >>>> if (ret) { >>>> pr_err("failed to allocate framebuffer (%d)\n", aligned_size); >>>> return -ENOMEM; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> index d028806..b8e8d67 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object >>>> *gobj) >>>> } >>>> int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned >>>> long size, >>>> - int alignment, u32 initial_domain, >>>> - u64 flags, bool kernel, >>>> - struct drm_gem_object **obj) >>>> + int alignment, u32 initial_domain, >>>> + u64 flags, bool kernel, >>>> + struct reservation_object *resv, >>>> + struct drm_gem_object **obj) >>>> { >>>> - struct amdgpu_bo *robj; >>>> + struct amdgpu_bo *bo; >>>> int r; >>>> *obj = NULL; >>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device >>>> *adev, unsigned long size, >>>> retry: >>>> r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, >>>> - flags, NULL, NULL, 0, &robj); >>>> + flags, NULL, resv, 0, &bo); >>>> if (r) { >>>> if (r != -ERESTARTSYS) { >>>> if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { >>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device >>>> *adev, unsigned long size, >>>> } >>>> return r; >>>> } >>>> - *obj = &robj->gem_base; >>>> + *obj = &bo->gem_base; >>>> return 0; >>>> } >>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct drm_gem_object >>>> *obj, >>>> struct amdgpu_vm *vm = &fpriv->vm; >>>> struct amdgpu_bo_list_entry vm_pd; >>>> - struct list_head list; >>>> + struct list_head list, duplicates; >>>> struct ttm_validate_buffer tv; >>>> struct ww_acquire_ctx ticket; >>>> struct amdgpu_bo_va *bo_va; >>>> int r; >>>> INIT_LIST_HEAD(&list); >>>> + INIT_LIST_HEAD(&duplicates); >>>> tv.bo = &bo->tbo; >>>> tv.shared = true; >>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object >>>> *obj, >>>> amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); >>>> - r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); >>>> + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); >>>> if (r) { >>>> dev_err(adev->dev, "leaking bo va because " >>>> "we fail to reserve bo (%d)\n", r); >>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, >>>> void *data, >>>> struct drm_file *filp) >>>> { >>>> struct amdgpu_device *adev = dev->dev_private; >>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv; >>>> + struct amdgpu_vm *vm = &fpriv->vm; >>>> union drm_amdgpu_gem_create *args = data; >>>> uint64_t flags = args->in.domain_flags; >>>> uint64_t size = args->in.bo_size; >>>> + struct reservation_object *resv = NULL; >>>> struct drm_gem_object *gobj; >>>> uint32_t handle; >>>> int r; >>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, >>>> void *data, >>>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >>>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS | >>>> AMDGPU_GEM_CREATE_CPU_GTT_USWC | >>>> - AMDGPU_GEM_CREATE_VRAM_CLEARED)) >>>> + AMDGPU_GEM_CREATE_VRAM_CLEARED | >>>> + AMDGPU_GEM_CREATE_LOCAL)) >>>> return -EINVAL; >>>> /* reject invalid gem domains */ >>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, >>>> void *data, >>>> } >>>> size = roundup(size, PAGE_SIZE); >>>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) { >>>> + r = amdgpu_bo_reserve(vm->root.base.bo, false); >>>> + if (r) >>>> + return r; >>>> + >>>> + resv = vm->root.base.bo->tbo.resv; >>>> + } >>>> + >>>> r = amdgpu_gem_object_create(adev, size, args->in.alignment, >>>> (u32)(0xffffffff & args->in.domains), >>>> - flags, false, &gobj); >>>> + flags, false, resv, &gobj); >>>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) { >>>> + if (!r) { >>>> + struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); >>>> + >>>> + abo->parent = amdgpu_bo_ref(vm->root.base.bo); >>>> + } >>>> + amdgpu_bo_unreserve(vm->root.base.bo); >>>> + } >>>> if (r) >>>> return r; >>>> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device >>>> *dev, void *data, >>>> } >>>> /* create a gem object to contain this object in */ >>>> - r = amdgpu_gem_object_create(adev, args->size, 0, >>>> - AMDGPU_GEM_DOMAIN_CPU, 0, >>>> - 0, &gobj); >>>> + r = amdgpu_gem_object_create(adev, args->size, 0, >>>> AMDGPU_GEM_DOMAIN_CPU, >>>> + 0, 0, NULL, &gobj); >>>> if (r) >>>> return r; >>>> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, >>>> void *data, >>>> struct amdgpu_bo_list_entry vm_pd; >>>> struct ttm_validate_buffer tv; >>>> struct ww_acquire_ctx ticket; >>>> - struct list_head list; >>>> + struct list_head list, duplicates; >>>> uint64_t va_flags; >>>> int r = 0; >>>> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, >>>> void *data, >>>> } >>>> INIT_LIST_HEAD(&list); >>>> + INIT_LIST_HEAD(&duplicates); >>>> if ((args->operation != AMDGPU_VA_OP_CLEAR) && >>>> !(args->flags & AMDGPU_VM_PAGE_PRT)) { >>>> gobj = drm_gem_object_lookup(filp, args->handle); >>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void >>>> *data, >>>> amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); >>>> - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); >>>> + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); >>>> if (r) >>>> goto error_unref; >>>> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, >>>> void *data, >>>> int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, >>>> struct drm_file *filp) >>>> { >>>> + struct amdgpu_device *adev = dev->dev_private; >>>> struct drm_amdgpu_gem_op *args = data; >>>> struct drm_gem_object *gobj; >>>> struct amdgpu_bo *robj; >>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void >>>> *data, >>>> if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) >>>> robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; >>>> + if (robj->flags & AMDGPU_GEM_CREATE_LOCAL) >>>> + amdgpu_vm_bo_invalidate(adev, robj, true); >>>> + >>>> amdgpu_bo_unreserve(robj); >>>> break; >>>> default: >>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file >>>> *file_priv, >>>> r = amdgpu_gem_object_create(adev, args->size, 0, >>>> AMDGPU_GEM_DOMAIN_VRAM, >>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, >>>> - ttm_bo_type_device, >>>> - &gobj); >>>> + false, NULL, &gobj); >>>> if (r) >>>> return -ENOMEM; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>> index 5b3f928..f407499 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct >>>> drm_device *dev, >>>> { >>>> struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); >>>> - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) >>>> + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || >>>> + bo->flags & AMDGPU_GEM_CREATE_LOCAL) >>>> return ERR_PTR(-EPERM); >>>> return drm_gem_prime_export(dev, gobj, flags); >>>> diff --git a/include/uapi/drm/amdgpu_drm.h >>>> b/include/uapi/drm/amdgpu_drm.h >>>> index d0ee739..05241a6 100644 >>>> --- a/include/uapi/drm/amdgpu_drm.h >>>> +++ b/include/uapi/drm/amdgpu_drm.h >>>> @@ -89,6 +89,8 @@ extern "C" { >>>> #define AMDGPU_GEM_CREATE_SHADOW (1 << 4) >>>> /* Flag that allocating the BO should use linear VRAM */ >>>> #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) >>>> +/* Flag that BO is local in the VM */ >>>> +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6) >>>> struct drm_amdgpu_gem_create_in { >>>> /** the requested memory size */ >>> >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx at lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx