By applying the same logic as for wait-ioctl, we can query whether a request has completed without holding struct_mutex. The biggest impact system-wide is removing the flush_active and the contention that causes. Testcase: igt/gem_busy Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Akash Goel <akash.goel@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 115 +++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4af64d864587..a4f949038d50 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3463,49 +3463,104 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); } +static __always_inline unsigned +__busy_read_flag(const struct drm_i915_gem_request *request) +{ + return 0x10000 << request->engine->exec_id; +} + +static __always_inline unsigned +__busy_write_flag(const struct drm_i915_gem_request *request) +{ + return request->engine->exec_id; +} + +static __always_inline unsigned +__busy_flag(const struct i915_gem_active *active, + unsigned (*flag)(const struct drm_i915_gem_request *)) +{ + do { + struct drm_i915_gem_request *request; + unsigned busy; + + request = rcu_dereference(active->__request); + if (!request || i915_gem_request_completed(request)) + return 0; + + busy = flag(request); + if (request == rcu_dereference(active->__request)) + return busy; + } while (1); +} + +static inline unsigned +busy_read_flag(const struct i915_gem_active *active) +{ + return __busy_flag(active, __busy_read_flag); +} + +static inline unsigned +busy_write_flag(const struct i915_gem_active *active) +{ + return __busy_flag(active, __busy_write_flag); +} + int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + unsigned long active; obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; - /* Count all active objects as busy, even if they are currently not used - * by the gpu. Users of this interface expect objects to eventually - * become non-busy without any further actions. - */ args->busy = 0; - if (i915_gem_object_is_active(obj)) { - struct drm_i915_gem_request *req; - int i; + active = __I915_BO_ACTIVE(obj); + if (active) { + int idx; - for (i = 0; i < I915_NUM_ENGINES; i++) { - req = i915_gem_active_peek(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (req) - args->busy |= 1 << (16 + req->engine->exec_id); - } - req = i915_gem_active_peek(&obj->last_write, - &obj->base.dev->struct_mutex); - if (req) - args->busy |= req->engine->exec_id; + /* Yes, the lookups are intentionally racy. + * + * Even though we guard the pointer lookup by RCU, that only + * guarantees that the pointer and its contents remain + * dereferencable and does *not* mean that the request we + * have is the same as the one being tracked by the object. + * + * Consider that we lookup the request just as it is being + * retired and free. We take a local copy of the pointer, + * but before we add its engine into the busy set, the other + * thread reallocates it and assigns it to a task on another + * engine with a fresh and incomplete seqno. + * + * So after we lookup the engine's id, we double check that + * the active request is the same and only then do we add it + * into the busy set. + */ + rcu_read_lock(); + + for_each_active(active, idx) + args->busy |= busy_read_flag(&obj->last_read[idx]); + + /* For ABI sanity, we only care that the write engine is in + * the set of read engines. This is ensured by the ordering + * of setting last_read/last_write in i915_vma_move_to_active, + * and then in reverse in retire. + * + * We don't care that the set of active read/write engines + * may change during construction of the result, as it is + * equally liable to change before userspace can inspect + * the result. + */ + args->busy |= busy_write_flag(&obj->last_write); + + rcu_read_unlock(); } - i915_gem_object_put(obj); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return 0; } int -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx