We always try to do an unlocked wait before resorting to having a blocking wait under the mutex - so we very rarely have to sleep under the struct_mutex. However, when we do we want that wait to be as short as possible as the struct_mutex is our BKL that will stall the driver and all clients. There should be no impact for all typical workloads. v2: Move down a layer to apply to all waits. v3: Make the priority boost explicit. This makes the paths where we want boosting under the mutex clear and prevents boosting priority uselessly for when we are waiting for idle. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 10 +++++++++- drivers/gpu/drm/i915/i915_gem_request.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_gem_request.h | 10 +++++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d1fd3ef7fad4..0c1694fef903 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -330,6 +330,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | + I915_WAIT_PRIORITY | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); @@ -755,7 +756,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, + I915_WAIT_LOCKED | + I915_WAIT_PRIORITY, MAX_SCHEDULE_TIMEOUT, NULL); if (ret) @@ -806,6 +808,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | + I915_WAIT_PRIORITY | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); @@ -3074,6 +3077,8 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { int ret; + GEM_BUG_ON(flags & I915_WAIT_PRIORITY); + if (flags & I915_WAIT_LOCKED) { struct i915_gem_timeline *tl; @@ -3163,6 +3168,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | + I915_WAIT_PRIORITY | (write ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT, NULL); @@ -3285,6 +3291,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | + I915_WAIT_PRIORITY | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); @@ -3566,6 +3573,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | + I915_WAIT_PRIORITY | (write ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT, NULL); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index bacb875a6ef3..9910b565d1a0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1046,6 +1046,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, !!lockdep_is_held(&req->i915->drm.struct_mutex) != !!(flags & I915_WAIT_LOCKED)); #endif + GEM_BUG_ON((flags & I915_WAIT_PRIORITY) && !(flags & I915_WAIT_LOCKED)); GEM_BUG_ON(timeout < 0); if (i915_gem_request_completed(req)) @@ -1056,6 +1057,15 @@ long i915_wait_request(struct drm_i915_gem_request *req, trace_i915_gem_request_wait_begin(req); + /* Very rarely do we wait whilst holding the mutex. We try to always + * do an unlocked wait before using a locked wait. However, when we + * have to resort to a locked wait, we want that wait to be as short + * as possible as the struct_mutex is our BKL that will stall the + * driver and all clients. + */ + if (flags & I915_WAIT_PRIORITY && req->engine->schedule) + req->engine->schedule(req, I915_PRIORITY_LOCKED); + add_wait_queue(&req->execute, &exec); if (flags & I915_WAIT_LOCKED) { add_wait_queue(errq, &reset); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index da71ae280b2a..ba83c507613b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -74,7 +74,8 @@ struct i915_priotree { }; enum { - I915_PRIORITY_DISPLAY = I915_PRIORITY_MAX + I915_PRIORITY_LOCKED = I915_PRIORITY_MAX, + I915_PRIORITY_DISPLAY }; struct i915_gem_capture_list { @@ -301,7 +302,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ +#define I915_WAIT_PRIORITY BIT(2) /* struct_mutex held, boost priority */ +#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); @@ -726,7 +728,9 @@ i915_gem_active_retire(struct i915_gem_active *active, return 0; ret = i915_wait_request(request, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_PRIORITY, MAX_SCHEDULE_TIMEOUT); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cf4ec937d923..2f7651ef45d5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2158,7 +2158,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) return -ENOSPC; timeout = i915_wait_request(target, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_PRIORITY, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) return timeout; -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx