> -----Original Message----- > From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf > Of Christian König > Sent: Tuesday, August 29, 2017 1:08 PM > To: amd-gfx at lists.freedesktop.org > Subject: [PATCH 3/4] drm/amdgpu: add IOCTL interface for per VM BOs v2 > > From: Christian König <christian.koenig at amd.com> > > Add the IOCTL interface so that applications can allocate per VM BOs. > > Still WIP since not all corner cases are tested yet, but this reduces average > CS overhead for 10K BOs from 21ms down to 48us. > > v2: add some extra checks, remove the WIP tag > > Signed-off-by: Christian König <christian.koenig at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 63 > ++++++++++++++++++++++--------- > drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 3 +- > include/uapi/drm/amdgpu_drm.h | 2 + > 5 files changed, 55 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index b1e817c..21cab36 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -457,9 +457,10 @@ struct amdgpu_sa_bo { > */ > void amdgpu_gem_force_release(struct amdgpu_device *adev); > int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned > long size, > - int alignment, u32 initial_domain, > - u64 flags, bool kernel, > - struct drm_gem_object **obj); > + int alignment, u32 initial_domain, > + u64 flags, bool kernel, > + struct reservation_object *resv, > + struct drm_gem_object **obj); > > int amdgpu_mode_dumb_create(struct drm_file *file_priv, > struct drm_device *dev, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c > index 0e907ea..7256f83 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c > @@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct > amdgpu_fbdev *rfbdev, > > AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | > > AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | > > AMDGPU_GEM_CREATE_VRAM_CLEARED, > - true, &gobj); > + true, NULL, &gobj); > if (ret) { > pr_err("failed to allocate framebuffer (%d)\n", aligned_size); > return -ENOMEM; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index e32a2b5..a835304 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct > drm_gem_object *gobj) > } > > int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned > long size, > - int alignment, u32 initial_domain, > - u64 flags, bool kernel, > - struct drm_gem_object **obj) > + int alignment, u32 initial_domain, > + u64 flags, bool kernel, > + struct reservation_object *resv, > + struct drm_gem_object **obj) > { > - struct amdgpu_bo *robj; > + struct amdgpu_bo *bo; > int r; > > *obj = NULL; > @@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device > *adev, unsigned long size, > > retry: > r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, > - flags, NULL, NULL, 0, &robj); > + flags, NULL, resv, 0, &bo); > if (r) { > if (r != -ERESTARTSYS) { > if (initial_domain == > AMDGPU_GEM_DOMAIN_VRAM) { > @@ -71,7 +72,7 @@ int amdgpu_gem_object_create(struct amdgpu_device > *adev, unsigned long size, > } > return r; > } > - *obj = &robj->gem_base; > + *obj = &bo->gem_base; > > return 0; > } > @@ -119,6 +120,10 @@ int amdgpu_gem_object_open(struct > drm_gem_object *obj, > if (mm && mm != current->mm) > return -EPERM; > > + if (abo->flags & AMDGPU_GEM_CREATE_LOCAL && > + abo->tbo.resv != vm->root.base.bo->tbo.resv) > + return -EPERM; > + > r = amdgpu_bo_reserve(abo, false); > if (r) > return r; > @@ -142,13 +147,14 @@ void amdgpu_gem_object_close(struct > drm_gem_object *obj, > struct amdgpu_vm *vm = &fpriv->vm; > > struct amdgpu_bo_list_entry vm_pd; > - struct list_head list; > + struct list_head list, duplicates; > struct ttm_validate_buffer tv; > struct ww_acquire_ctx ticket; > struct amdgpu_bo_va *bo_va; > int r; > > INIT_LIST_HEAD(&list); > + INIT_LIST_HEAD(&duplicates); > > tv.bo = &bo->tbo; > tv.shared = true; > @@ -156,7 +162,7 @@ void amdgpu_gem_object_close(struct > drm_gem_object *obj, > > amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); > > - r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); > + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); > if (r) { > dev_err(adev->dev, "leaking bo va because " > "we fail to reserve bo (%d)\n", r); > @@ -191,9 +197,12 @@ int amdgpu_gem_create_ioctl(struct drm_device > *dev, void *data, > struct drm_file *filp) > { > struct amdgpu_device *adev = dev->dev_private; > + struct amdgpu_fpriv *fpriv = filp->driver_priv; > + struct amdgpu_vm *vm = &fpriv->vm; > union drm_amdgpu_gem_create *args = data; > uint64_t flags = args->in.domain_flags; > uint64_t size = args->in.bo_size; > + struct reservation_object *resv = NULL; > struct drm_gem_object *gobj; > uint32_t handle; > int r; > @@ -202,7 +211,8 @@ int amdgpu_gem_create_ioctl(struct drm_device > *dev, void *data, > if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | > AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > AMDGPU_GEM_CREATE_CPU_GTT_USWC | > - AMDGPU_GEM_CREATE_VRAM_CLEARED)) > + AMDGPU_GEM_CREATE_VRAM_CLEARED | > + AMDGPU_GEM_CREATE_LOCAL)) > return -EINVAL; > > /* reject invalid gem domains */ > @@ -229,9 +239,25 @@ int amdgpu_gem_create_ioctl(struct drm_device > *dev, void *data, > } > size = roundup(size, PAGE_SIZE); > > + if (flags & AMDGPU_GEM_CREATE_LOCAL) { > + r = amdgpu_bo_reserve(vm->root.base.bo, false); > + if (r) > + return r; > + > + resv = vm->root.base.bo->tbo.resv; > + } > + > r = amdgpu_gem_object_create(adev, size, args->in.alignment, > (u32)(0xffffffff & args->in.domains), > - flags, false, &gobj); > + flags, false, resv, &gobj); > + if (flags & AMDGPU_GEM_CREATE_LOCAL) { > + if (!r) { > + struct amdgpu_bo *abo = > gem_to_amdgpu_bo(gobj); > + > + abo->parent = amdgpu_bo_ref(vm->root.base.bo); > + } > + amdgpu_bo_unreserve(vm->root.base.bo); > + } > if (r) > return r; > > @@ -273,9 +299,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device > *dev, void *data, > } > > /* create a gem object to contain this object in */ > - r = amdgpu_gem_object_create(adev, args->size, 0, > - AMDGPU_GEM_DOMAIN_CPU, 0, > - 0, &gobj); > + r = amdgpu_gem_object_create(adev, args->size, 0, > AMDGPU_GEM_DOMAIN_CPU, > + 0, 0, NULL, &gobj); > if (r) > return r; > > @@ -527,7 +552,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, > void *data, > struct amdgpu_bo_list_entry vm_pd; > struct ttm_validate_buffer tv; > struct ww_acquire_ctx ticket; > - struct list_head list; > + struct list_head list, duplicates; > uint64_t va_flags; > int r = 0; > > @@ -563,6 +588,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, > void *data, > } > > INIT_LIST_HEAD(&list); > + INIT_LIST_HEAD(&duplicates); > if ((args->operation != AMDGPU_VA_OP_CLEAR) && > !(args->flags & AMDGPU_VM_PAGE_PRT)) { > gobj = drm_gem_object_lookup(filp, args->handle); > @@ -579,7 +605,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, > void *data, > > amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); > > - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); > + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); > if (r) > goto error_unref; > > @@ -645,6 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, > void *data, > int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, > struct drm_file *filp) > { > + struct amdgpu_device *adev = dev->dev_private; > struct drm_amdgpu_gem_op *args = data; > struct drm_gem_object *gobj; > struct amdgpu_bo *robj; > @@ -692,6 +719,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, > void *data, > if (robj->allowed_domains == > AMDGPU_GEM_DOMAIN_VRAM) > robj->allowed_domains |= > AMDGPU_GEM_DOMAIN_GTT; > > + if (robj->flags & AMDGPU_GEM_CREATE_LOCAL) > + amdgpu_vm_bo_invalidate(adev, robj, true); > + > amdgpu_bo_unreserve(robj); > break; > default: > @@ -721,8 +751,7 @@ int amdgpu_mode_dumb_create(struct drm_file > *file_priv, > r = amdgpu_gem_object_create(adev, args->size, 0, > AMDGPU_GEM_DOMAIN_VRAM, > > AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, > - ttm_bo_type_device, > - &gobj); > + false, NULL, &gobj); > if (r) > return -ENOMEM; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > index 5b3f928..f407499 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c > @@ -136,7 +136,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct > drm_device *dev, > { > struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); > > - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) > + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || > + bo->flags & AMDGPU_GEM_CREATE_LOCAL) > return ERR_PTR(-EPERM); > > return drm_gem_prime_export(dev, gobj, flags); > diff --git a/include/uapi/drm/amdgpu_drm.h > b/include/uapi/drm/amdgpu_drm.h > index d0ee739..05241a6 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -89,6 +89,8 @@ extern "C" { > #define AMDGPU_GEM_CREATE_SHADOW (1 << 4) > /* Flag that allocating the BO should use linear VRAM */ > #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) > +/* Flag that BO is local in the VM */ > +#define AMDGPU_GEM_CREATE_LOCAL (1 << 6) I'm not crazy about the name LOCAL. Maybe something like ALWAYS_VALID? Alex > > struct drm_amdgpu_gem_create_in { > /** the requested memory size */ > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx