To improve synchronization of command submissions with page table updates RADV wants to manually wait for the updates to be completed without affecting parallel submissions. Implement this by allowing to specify a drm_sync_obj handle and a timeline point for the GEM_VA IOCTL. Signed-off-by: Christian König <christian.koenig@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 79 ++++++++++++++++++++----- include/uapi/drm/amdgpu_drm.h | 5 +- 2 files changed, 67 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 9cdfee67efeb..bf0092f629f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -33,6 +33,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> +#include <drm/drm_syncobj.h> #include <drm/drm_gem_ttm_helper.h> #include "amdgpu.h" @@ -598,6 +599,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, * @vm: vm to update * @bo_va: bo_va to update * @operation: map, unmap or clear + * @last_update: optional pointer to a dma_fence for the last VM update * * Update the bo_va directly after setting its address. Errors are not * vital here, so they are not reported back to userspace. @@ -605,20 +607,21 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va, - uint32_t operation) + uint32_t operation, + struct dma_fence **last_update) { int r; if (!amdgpu_vm_ready(vm)) return; - r = amdgpu_vm_clear_freed(adev, vm, NULL); + r = amdgpu_vm_clear_freed(adev, vm, last_update); if (r) goto error; if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false, last_update); if (r) goto error; } @@ -671,6 +674,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_gem_object *gobj; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct dma_fence *fence = dma_fence_get_stub(); + struct dma_fence_chain *chain = NULL; + struct drm_syncobj *syncobj = NULL; struct amdgpu_bo *abo; struct amdgpu_bo_va *bo_va; struct amdgpu_bo_list_entry vm_pd; @@ -714,17 +720,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - switch (args->operation) { - case AMDGPU_VA_OP_MAP: - case AMDGPU_VA_OP_UNMAP: - case AMDGPU_VA_OP_CLEAR: - case AMDGPU_VA_OP_REPLACE: - break; - default: - dev_dbg(dev->dev, "unsupported operation %d\n", - args->operation); - return -EINVAL; - } + /* For debugging delay all VM updates till CS time */ + if (!amdgpu_vm_debug) + args->flags |= AMDGPU_VM_DELAY_UPDATE; INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&duplicates); @@ -763,6 +761,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } + if (args->syncobj) { + syncobj = drm_syncobj_find(filp, args->syncobj); + if (!syncobj) { + r = -EINVAL; + goto error_backoff; + } + + if (args->timeline_point) { + chain = dma_fence_chain_alloc(); + if (!chain) { + r = -ENOMEM; + goto error_put_syncobj; + } + } + + /* + * Update the VM once before to make sure there are no other + * pending updates. + */ + if (!(args->flags & AMDGPU_VM_DELAY_UPDATE)) + amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, + args->operation, NULL); + } + switch (args->operation) { case AMDGPU_VA_OP_MAP: va_flags = amdgpu_gem_va_map_flags(adev, args->flags); @@ -786,17 +808,42 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, va_flags); break; default: + dev_dbg(dev->dev, "unsupported operation %d\n", + args->operation); + r = -EINVAL; break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) + if (r) + goto error_free_chain; + + if (!(args->flags & AMDGPU_VM_DELAY_UPDATE)) amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, - args->operation); + args->operation, syncobj ? + &fence : NULL); + + if (syncobj) { + if (chain) { + drm_syncobj_add_point(syncobj, chain, fence, + args->timeline_point); + chain = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } + } + +error_free_chain: + dma_fence_chain_free(chain); + +error_put_syncobj: + if (syncobj) + drm_syncobj_put(syncobj); error_backoff: ttm_eu_backoff_reservation(&ticket, &list); error_unref: drm_gem_object_put(gobj); + dma_fence_put(fence); return r; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1d65c1fbc4ec..f84b5f2c817c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -533,7 +533,8 @@ struct drm_amdgpu_gem_op { struct drm_amdgpu_gem_va { /** GEM object handle */ __u32 handle; - __u32 _pad; + /** Optional DRM Syncobj to signal when operation completes */ + __u32 syncobj; /** AMDGPU_VA_OP_* */ __u32 operation; /** AMDGPU_VM_PAGE_* */ @@ -544,6 +545,8 @@ struct drm_amdgpu_gem_va { __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ __u64 map_size; + /** Optional Syncobj timeline point to signal */ + __u64 timeline_point; }; #define AMDGPU_HW_IP_GFX 0 -- 2.25.1