Am 25.08.2017 um 21:19 schrieb Christian König: > Am 25.08.2017 um 18:22 schrieb Marek Olšák: >> On Fri, Aug 25, 2017 at 3:00 PM, Christian König >> <deathsimple at vodafone.de> wrote: >>> Am 25.08.2017 um 12:32 schrieb zhoucm1: >>>> >>>> >>>> On 2017å¹´08æ??25æ?¥ 17:38, Christian König wrote: >>>>> From: Christian König <christian.koenig at amd.com> >>>>> >>>>> Add the IOCTL interface so that applications can allocate per VM BOs. >>>>> >>>>> Still WIP since not all corner cases are tested yet, but this reduces >>>>> average >>>>> CS overhead for 10K BOs from 21ms down to 48us. >>>> Wow, cheers, eventually you get per vm bo to same reservation with >>>> PD/pts, >>>> indeed save a lot of bo list. >>> >>> Don't cheer to loud yet, that is a completely constructed test case. >>> >>> So far I wasn't able to archive any improvements with any real game >>> on this >>> with Mesa. >>> >>> BTW: Marek can you take a look with some CPU bound tests? I can >>> provide a >>> kernel branch if necessary. >> Do you have a branch that works on Raven? This patch series doesn't, >> and I didn't investigate why. > > I will come up with one on Monday. Branch vm_improvements on git://people.freedesktop.org/~deathsimple/linux together with the attached patch should work. Only testing on Tonga, but that's based on amd-staging-4.12 and so should work on Raven as well. If not I still have a bug somewhere which needs to be fixed. Thanks, Christian. > > Have a nice weekend guys, > Christian. > >> >> Marek >> >>> Regards, >>> Christian. >>> >>> >>>> overall looks good, I will take a detailed check for this tomorrow. >>>> >>>> Regards, >>>> David Zhou >>>>> >>>>> Signed-off-by: Christian König <christian.koenig at amd.com> >>>>> --- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++-- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 59 >>>>> ++++++++++++++++++++++--------- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 3 +- >>>>> include/uapi/drm/amdgpu_drm.h | 2 ++ >>>>> 5 files changed, 51 insertions(+), 22 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> index b1e817c..21cab36 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >>>>> @@ -457,9 +457,10 @@ struct amdgpu_sa_bo { >>>>> */ >>>>> void amdgpu_gem_force_release(struct amdgpu_device *adev); >>>>> int amdgpu_gem_object_create(struct amdgpu_device *adev, >>>>> unsigned long >>>>> size, >>>>> - int alignment, u32 initial_domain, >>>>> - u64 flags, bool kernel, >>>>> - struct drm_gem_object **obj); >>>>> + int alignment, u32 initial_domain, >>>>> + u64 flags, bool kernel, >>>>> + struct reservation_object *resv, >>>>> + struct drm_gem_object **obj); >>>>> int amdgpu_mode_dumb_create(struct drm_file *file_priv, >>>>> struct drm_device *dev, >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>>> index 0e907ea..7256f83 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c >>>>> @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct >>>>> amdgpu_fbdev *rfbdev, >>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >>>>> AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | >>>>> AMDGPU_GEM_CREATE_VRAM_CLEARED, >>>>> - true, &gobj); >>>>> + true, NULL, &gobj); >>>>> if (ret) { >>>>> pr_err("failed to allocate framebuffer (%d)\n", >>>>> aligned_size); >>>>> return -ENOMEM; >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>>> index d028806..b8e8d67 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>>> @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object >>>>> *gobj) >>>>> } >>>>> int amdgpu_gem_object_create(struct amdgpu_device *adev, >>>>> unsigned >>>>> long size, >>>>> - int alignment, u32 initial_domain, >>>>> - u64 flags, bool kernel, >>>>> - struct drm_gem_object **obj) >>>>> + int alignment, u32 initial_domain, >>>>> + u64 flags, bool kernel, >>>>> + struct reservation_object *resv, >>>>> + struct drm_gem_object **obj) >>>>> { >>>>> - struct amdgpu_bo *robj; >>>>> + struct amdgpu_bo *bo; >>>>> int r; >>>>> *obj = NULL; >>>>> @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device >>>>> *adev, unsigned long size, >>>>> retry: >>>>> r = amdgpu_bo_create(adev, size, alignment, kernel, >>>>> initial_domain, >>>>> - flags, NULL, NULL, 0, &robj); >>>>> + flags, NULL, resv, 0, &bo); >>>>> if (r) { >>>>> if (r != -ERESTARTSYS) { >>>>> if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { >>>>> @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device >>>>> *adev, unsigned long size, >>>>> } >>>>> return r; >>>>> } >>>>> - *obj = &robj->gem_base; >>>>> + *obj = &bo->gem_base; >>>>> return 0; >>>>> } >>>>> @@ -136,13 +137,14 @@ void amdgpu_gem_object_close(struct >>>>> drm_gem_object >>>>> *obj, >>>>> struct amdgpu_vm *vm = &fpriv->vm; >>>>> struct amdgpu_bo_list_entry vm_pd; >>>>> - struct list_head list; >>>>> + struct list_head list, duplicates; >>>>> struct ttm_validate_buffer tv; >>>>> struct ww_acquire_ctx ticket; >>>>> struct amdgpu_bo_va *bo_va; >>>>> int r; >>>>> INIT_LIST_HEAD(&list); >>>>> + INIT_LIST_HEAD(&duplicates); >>>>> tv.bo = &bo->tbo; >>>>> tv.shared = true; >>>>> @@ -150,7 +152,7 @@ void amdgpu_gem_object_close(struct >>>>> drm_gem_object >>>>> *obj, >>>>> amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); >>>>> - r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); >>>>> + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); >>>>> if (r) { >>>>> dev_err(adev->dev, "leaking bo va because " >>>>> "we fail to reserve bo (%d)\n", r); >>>>> @@ -185,9 +187,12 @@ int amdgpu_gem_create_ioctl(struct drm_device >>>>> *dev, >>>>> void *data, >>>>> struct drm_file *filp) >>>>> { >>>>> struct amdgpu_device *adev = dev->dev_private; >>>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv; >>>>> + struct amdgpu_vm *vm = &fpriv->vm; >>>>> union drm_amdgpu_gem_create *args = data; >>>>> uint64_t flags = args->in.domain_flags; >>>>> uint64_t size = args->in.bo_size; >>>>> + struct reservation_object *resv = NULL; >>>>> struct drm_gem_object *gobj; >>>>> uint32_t handle; >>>>> int r; >>>>> @@ -196,7 +201,8 @@ int amdgpu_gem_create_ioctl(struct drm_device >>>>> *dev, >>>>> void *data, >>>>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >>>>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS | >>>>> AMDGPU_GEM_CREATE_CPU_GTT_USWC | >>>>> - AMDGPU_GEM_CREATE_VRAM_CLEARED)) >>>>> + AMDGPU_GEM_CREATE_VRAM_CLEARED | >>>>> + AMDGPU_GEM_CREATE_LOCAL)) >>>>> return -EINVAL; >>>>> /* reject invalid gem domains */ >>>>> @@ -223,9 +229,25 @@ int amdgpu_gem_create_ioctl(struct drm_device >>>>> *dev, >>>>> void *data, >>>>> } >>>>> size = roundup(size, PAGE_SIZE); >>>>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) { >>>>> + r = amdgpu_bo_reserve(vm->root.base.bo, false); >>>>> + if (r) >>>>> + return r; >>>>> + >>>>> + resv = vm->root.base.bo->tbo.resv; >>>>> + } >>>>> + >>>>> r = amdgpu_gem_object_create(adev, size, args->in.alignment, >>>>> (u32)(0xffffffff & args->in.domains), >>>>> - flags, false, &gobj); >>>>> + flags, false, resv, &gobj); >>>>> + if (flags & AMDGPU_GEM_CREATE_LOCAL) { >>>>> + if (!r) { >>>>> + struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); >>>>> + >>>>> + abo->parent = amdgpu_bo_ref(vm->root.base.bo); >>>>> + } >>>>> + amdgpu_bo_unreserve(vm->root.base.bo); >>>>> + } >>>>> if (r) >>>>> return r; >>>>> @@ -267,9 +289,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device >>>>> *dev, void *data, >>>>> } >>>>> /* create a gem object to contain this object in */ >>>>> - r = amdgpu_gem_object_create(adev, args->size, 0, >>>>> - AMDGPU_GEM_DOMAIN_CPU, 0, >>>>> - 0, &gobj); >>>>> + r = amdgpu_gem_object_create(adev, args->size, 0, >>>>> AMDGPU_GEM_DOMAIN_CPU, >>>>> + 0, 0, NULL, &gobj); >>>>> if (r) >>>>> return r; >>>>> @@ -521,7 +542,7 @@ int amdgpu_gem_va_ioctl(struct drm_device >>>>> *dev, >>>>> void *data, >>>>> struct amdgpu_bo_list_entry vm_pd; >>>>> struct ttm_validate_buffer tv; >>>>> struct ww_acquire_ctx ticket; >>>>> - struct list_head list; >>>>> + struct list_head list, duplicates; >>>>> uint64_t va_flags; >>>>> int r = 0; >>>>> @@ -557,6 +578,7 @@ int amdgpu_gem_va_ioctl(struct drm_device >>>>> *dev, >>>>> void *data, >>>>> } >>>>> INIT_LIST_HEAD(&list); >>>>> + INIT_LIST_HEAD(&duplicates); >>>>> if ((args->operation != AMDGPU_VA_OP_CLEAR) && >>>>> !(args->flags & AMDGPU_VM_PAGE_PRT)) { >>>>> gobj = drm_gem_object_lookup(filp, args->handle); >>>>> @@ -573,7 +595,7 @@ int amdgpu_gem_va_ioctl(struct drm_device >>>>> *dev, void >>>>> *data, >>>>> amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); >>>>> - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); >>>>> + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); >>>>> if (r) >>>>> goto error_unref; >>>>> @@ -639,6 +661,7 @@ int amdgpu_gem_va_ioctl(struct drm_device >>>>> *dev, >>>>> void *data, >>>>> int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, >>>>> struct drm_file *filp) >>>>> { >>>>> + struct amdgpu_device *adev = dev->dev_private; >>>>> struct drm_amdgpu_gem_op *args = data; >>>>> struct drm_gem_object *gobj; >>>>> struct amdgpu_bo *robj; >>>>> @@ -686,6 +709,9 @@ int amdgpu_gem_op_ioctl(struct drm_device >>>>> *dev, void >>>>> *data, >>>>> if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) >>>>> robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; >>>>> + if (robj->flags & AMDGPU_GEM_CREATE_LOCAL) >>>>> + amdgpu_vm_bo_invalidate(adev, robj, true); >>>>> + >>>>> amdgpu_bo_unreserve(robj); >>>>> break; >>>>> default: >>>>> @@ -715,8 +741,7 @@ int amdgpu_mode_dumb_create(struct drm_file >>>>> *file_priv, >>>>> r = amdgpu_gem_object_create(adev, args->size, 0, >>>>> AMDGPU_GEM_DOMAIN_VRAM, >>>>> AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, >>>>> - ttm_bo_type_device, >>>>> - &gobj); >>>>> + false, NULL, &gobj); >>>>> if (r) >>>>> return -ENOMEM; >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> index 5b3f928..f407499 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c >>>>> @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct >>>>> drm_device *dev, >>>>> { >>>>> struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); >>>>> - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) >>>>> + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || >>>>> + bo->flags & AMDGPU_GEM_CREATE_LOCAL) >>>>> return ERR_PTR(-EPERM); >>>>> return drm_gem_prime_export(dev, gobj, flags); >>>>> diff --git a/include/uapi/drm/amdgpu_drm.h >>>>> b/include/uapi/drm/amdgpu_drm.h >>>>> index d0ee739..05241a6 100644 >>>>> --- a/include/uapi/drm/amdgpu_drm.h >>>>> +++ b/include/uapi/drm/amdgpu_drm.h >>>>> @@ -89,6 +89,8 @@ extern "C" { >>>>> #define AMDGPU_GEM_CREATE_SHADOW (1 << 4) >>>>> /* Flag that allocating the BO should use linear VRAM */ >>>>> #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) >>>>> +/* Flag that BO is local in the VM */ >>>>> +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6) >>>>> struct drm_amdgpu_gem_create_in { >>>>> /** the requested memory size */ >>>> >>>> _______________________________________________ >>>> amd-gfx mailing list >>>> amd-gfx at lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >>> >>> >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx at lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-radeonsi-set-a-per-buffer-flag-that-disables-inter-p.patch Type: text/x-patch Size: 11821 bytes Desc: not available URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20170827/336d6ead/attachment.bin>