Let's simply convert all the current callers towards direct xe_pm_runtime access and remove this extra layer of indirection. v2: Convert all the current callers instead of a big refactor at once. Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 7 ++--- drivers/gpu/drm/xe/xe_bo.c | 8 +++--- drivers/gpu/drm/xe/xe_device.c | 36 -------------------------- drivers/gpu/drm/xe/xe_device.h | 3 --- drivers/gpu/drm/xe/xe_device_types.h | 3 --- drivers/gpu/drm/xe/xe_exec_queue.c | 6 ++--- drivers/gpu/drm/xe/xe_ggtt.c | 9 ++++--- drivers/gpu/drm/xe/xe_sched_job.c | 5 ++-- drivers/gpu/drm/xe/xe_vm.c | 6 ++--- 9 files changed, 22 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 722c84a56607..403ed2d42f6b 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,6 +10,7 @@ #include "intel_fb_pin.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_pm.h" #include <drm/ttm/ttm_bo.h> @@ -190,7 +191,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, /* TODO: Consider sharing framebuffer mapping? * embed i915_vma inside intel_framebuffer */ - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); ret = mutex_lock_interruptible(&ggtt->lock); if (ret) goto out; @@ -242,7 +243,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, out_unlock: mutex_unlock(&ggtt->lock); out: - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return ret; } @@ -381,4 +382,4 @@ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) void intel_dpt_destroy(struct i915_address_space *vm) { return; -} \ No newline at end of file +} diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 7ee4406da127..4c9a8ab4e840 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -716,7 +716,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_assert(xe, migrate); trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { /* @@ -740,7 +740,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { ret = -EINVAL; - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); goto out; } @@ -758,7 +758,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, new_mem, handle_system_ccs); if (IS_ERR(fence)) { ret = PTR_ERR(fence); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); goto out; } if (!move_lacks_source) { @@ -783,7 +783,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, dma_fence_put(fence); } - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); out: return ret; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6ab402153242..31182ebe0b89 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -655,42 +655,6 @@ void xe_device_assert_mem_access(struct xe_device *xe) xe_assert(xe, !xe_pm_runtime_suspended(xe)); } -void xe_device_mem_access_get(struct xe_device *xe) -{ - int ref; - - /* - * This looks racy, but should be fine since the pm_callback_task only - * transitions from NULL -> current (and back to NULL again), during the - * runtime_resume() or runtime_suspend() callbacks, for which there can - * only be a single one running for our device. We only need to prevent - * recursively calling the runtime_get or runtime_put from those - * callbacks, as well as preventing triggering any access_ongoing - * asserts. - */ - if (xe_pm_read_callback_task(xe) == current) - return; - - xe_pm_runtime_get_noresume(xe); - ref = atomic_inc_return(&xe->mem_access.ref); - - xe_assert(xe, ref != S32_MAX); - -} - -void xe_device_mem_access_put(struct xe_device *xe) -{ - int ref; - - if (xe_pm_read_callback_task(xe) == current) - return; - - ref = atomic_dec_return(&xe->mem_access.ref); - xe_pm_runtime_put(xe); - - xe_assert(xe, ref >= 0); -} - void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p) { struct xe_gt *gt; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index b45592b0bf19..355bddfc8274 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -133,9 +133,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) return >->mmio.fw; } -void xe_device_mem_access_get(struct xe_device *xe); -void xe_device_mem_access_put(struct xe_device *xe); - void xe_device_assert_mem_access(struct xe_device *xe); static inline bool xe_device_in_fault_mode(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9785eef2e5a4..4bec8b90a37c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -377,9 +377,6 @@ struct xe_device { * triggering additional actions when they occur. */ struct { - /** @mem_access.ref: ref count of memory accesses */ - atomic_t ref; - /** * @mem_access.vram_userfault: Encapsulate vram_userfault * related stuff diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index f69a9c99329c..42ddf857e9e8 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -217,7 +217,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) for (i = 0; i < q->width; ++i) xe_lrc_finish(q->lrc + i); if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - xe_device_mem_access_put(gt_to_xe(q->gt)); + xe_pm_runtime_put(gt_to_xe(q->gt)); __xe_exec_queue_free(q); } @@ -589,7 +589,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* The migration vm doesn't hold rpm ref */ - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); @@ -598,7 +598,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, args->width, hwe, flags, args->extensions); - xe_device_mem_access_put(xe); /* now held by engine */ + xe_pm_runtime_put(xe); /* now held by engine */ xe_vm_put(migrate_vm); if (IS_ERR(new)) { diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0f96b7db6dcc..d73056aa2362 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -20,6 +20,7 @@ #include "xe_gt_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_wopcm.h" @@ -408,14 +409,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (err) return err; - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; } @@ -433,7 +434,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) { - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); xe_ggtt_clear(ggtt, node->start, node->size); @@ -443,7 +444,7 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); } void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 8151ddafb940..8ba35b1698ed 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -15,6 +15,7 @@ #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" +#include "xe_pm.h" #include "xe_trace.h" #include "xe_vm.h" @@ -157,7 +158,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, /* All other jobs require a VM to be open which has a ref */ if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL)) - xe_device_mem_access_get(job_to_xe(job)); + xe_pm_runtime_get_noresume(job_to_xe(job)); xe_device_assert_mem_access(job_to_xe(job)); trace_xe_sched_job_create(job); @@ -190,7 +191,7 @@ void xe_sched_job_destroy(struct kref *ref) container_of(ref, struct xe_sched_job, refcount); if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL)) - xe_device_mem_access_put(job_to_xe(job)); + xe_pm_runtime_put(job_to_xe(job)); xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index cbb9b8935c90..8263ae98c268 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1341,7 +1341,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->pt_ops = &xelp_pt_ops; if (!(flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { @@ -1452,7 +1452,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_range_fence_tree_fini(&vm->rftree[id]); kfree(vm); if (!(flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return ERR_PTR(err); } @@ -1575,7 +1575,7 @@ static void vm_destroy_work_func(struct work_struct *w) mutex_destroy(&vm->snap_mutex); if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); if (xe->info.has_asid && vm->usm.asid) { mutex_lock(&xe->usm.lock); -- 2.44.0