This patch updates the VM_IOCTL to allow userspace to synchronize the mapping/unmapping of a BO in the page table. The major changes are: - it adds a drm_timeline object as an input parameter to the VM IOCTL. - this object is used by the kernel to sync the update of the BO in the page table during the mapping of the object. - the kernel also synchronizes the tlb flush of the page table entry of this object during the unmapping (Added in this series: https://patchwork.freedesktop.org/series/131276/ and https://patchwork.freedesktop.org/patch/584182/) - the userspace can wait on this timeline, and then the BO is ready to be consumed by the GPU. V2: - remove the eviction fence coupling V3: - added the drm timeline support instead of input/output fence (Christian) V4: - made timeline 64-bit (Christian) - bug fix (Arvind) V5: GLCTS bug fix (Arvind) V6: Rename syncobj_handle -> timeline_syncobj_out Rename point -> timeline_point_in (Marek) Cc: Alex Deucher <alexander.deucher@xxxxxxx> Cc: Christian Koenig <christian.koenig@xxxxxxx> Cc: Felix Kuehling <felix.kuehling@xxxxxxx> Signed-off-by: Arvind Yadav <arvind.yadav@xxxxxxx> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> Change-Id: I0942942641e095408a95d4ab6e2e9d813f0f78db --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 89 ++++++++++++++++++- .../gpu/drm/amd/include/amdgpu_userqueue.h | 3 + include/uapi/drm/amdgpu_drm.h | 4 + 4 files changed, 107 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index ebb3f87ef4f6..f4529f2fad97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -647,7 +647,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (!amdgpu_vm_ready(vm)) return; - r = amdgpu_vm_clear_freed(adev, vm, NULL); + r = amdgpu_vm_clear_freed(adev, vm, &vm->last_update); if (r) goto error; @@ -825,10 +825,20 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); + if (args->timeline_syncobj_out && args->timeline_point_in) { + r = amdgpu_userqueue_update_bo_mapping(filp, bo_va, args->operation, + args->timeline_syncobj_out, + args->timeline_point_in); + if (r) { + DRM_ERROR("Failed to update userqueue mapping (%u)\n", r); + } + } + } + error: drm_exec_fini(&exec); drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 5173718c3848..c9cc935caabd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -21,7 +21,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - +#include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_userqueue.h" @@ -154,6 +154,87 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, return r; } +static int +amdgpu_userqueue_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + DRM_ERROR("Fail to validate\n"); + + return ret; +} + +int amdgpu_userqueue_update_bo_mapping(struct drm_file *filp, struct amdgpu_bo_va *bo_va, + uint32_t operation, uint32_t syncobj_handle, + uint64_t point) +{ + struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + struct dma_fence *last_update; + + /* Find the sync object */ + syncobj = drm_syncobj_find(filp, syncobj_handle); + if (!syncobj) + return -ENOENT; + + /* Allocate the chain node */ + chain = dma_fence_chain_alloc(); + if (!chain) { + drm_syncobj_put(syncobj); + return -ENOMEM; + } + + /* Determine the last update fence */ + if ((bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)) || + (operation == AMDGPU_VA_OP_UNMAP) || + (operation == AMDGPU_VA_OP_CLEAR)) + last_update = vm->last_update; + else + last_update = bo_va->last_pt_update; + + /* Add given point to timeline */ + drm_syncobj_add_point(syncobj, chain, last_update, point); + return 0; +} + +static int +amdgpu_userqueue_update_vm(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_vm *vm) +{ + int ret; + + ret = amdgpu_bo_reserve(vm->root.bo, true); + if (ret) { + DRM_ERROR("Reserve failed\n"); + return ret; + } + + /* Validate page directory of the vm */ + ret = amdgpu_userqueue_validate_vm_bo(NULL, vm->root.bo); + if (ret) { + DRM_ERROR("Failed to validate PT BOs\n"); + goto unresv; + } + + ret = amdgpu_bo_sync_wait(vm->root.bo, AMDGPU_FENCE_OWNER_VM, false); + if (ret) { + DRM_ERROR("Sync failed\n"); + goto unresv; + } + +unresv: + amdgpu_bo_unreserve(vm->root.bo); + return ret; +} + static int amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) { @@ -222,6 +303,12 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) queue->flags = args->in.flags; queue->vm = &fpriv->vm; + r = amdgpu_userqueue_update_vm(uq_mgr, queue->vm); + if (r) { + DRM_ERROR("Failed to update vm\n"); + goto unlock; + } + /* Convert relative doorbell offset into absolute doorbell index */ index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, filp, args->in.doorbell_offset); if (index == (uint64_t)-EINVAL) { diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index a653e31350c5..d31e43404640 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -76,4 +76,7 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj); +int amdgpu_userqueue_update_bo_mapping(struct drm_file *filp, struct amdgpu_bo_va *bo_va, + uint32_t operation, uint32_t syncobj_handle, + uint64_t point); #endif diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6eac46e0f3fd..7367e72a38e9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -721,6 +721,10 @@ struct drm_amdgpu_gem_va { __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ __u64 map_size; + /** Sync object handle to wait for userqueue sync */ + __u32 timeline_syncobj_out; + /** Timeline point */ + __u64 timeline_point_in; }; #define AMDGPU_HW_IP_GFX 0 -- 2.45.1