Start acquiring the logical intel_context and using that as our primary means for request allocation. This is the initial step to allow us to avoid requiring struct_mutex for request allocation along the perma-pinned kernel context, but it also provides a foundation for breaking up the complex request allocation to handle different scenarios inside execbuf. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_context.c | 20 +-- drivers/gpu/drm/i915/i915_perf.c | 22 ++-- drivers/gpu/drm/i915/i915_request.c | 118 ++++++++++-------- drivers/gpu/drm/i915/i915_request.h | 3 + drivers/gpu/drm/i915/i915_reset.c | 8 +- drivers/gpu/drm/i915/intel_overlay.c | 28 +++-- drivers/gpu/drm/i915/selftests/i915_active.c | 2 +- .../drm/i915/selftests/i915_gem_coherency.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 9 +- drivers/gpu/drm/i915/selftests/i915_request.c | 9 +- .../gpu/drm/i915/selftests/i915_timeline.c | 4 +- 11 files changed, 135 insertions(+), 90 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 25f267a03d3d..6a452345ffdb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -88,6 +88,7 @@ #include <linux/log2.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_pm.h" #include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" @@ -863,7 +864,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct intel_context *ce, *next; - intel_wakeref_t wakeref; int err = 0; lockdep_assert_held(&i915->drm.struct_mutex); @@ -876,7 +876,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, i915_active_init(i915, &cb->base, cb_retire); i915_active_acquire(&cb->base); - wakeref = intel_runtime_pm_get(i915); + i915_gem_unpark(i915); rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { struct intel_engine_cs *engine = ce->engine; struct i915_request *rq; @@ -890,7 +890,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, break; } - rq = i915_request_alloc(engine, ctx); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -906,7 +906,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (err) break; } - intel_runtime_pm_put(i915, wakeref); + i915_gem_park(i915); cb->task = err ? NULL : task; /* caller needs to unwind instead */ cb->data = data; @@ -930,11 +930,12 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, if (i915_terminally_wedged(i915)) return 0; + i915_gem_unpark(i915); for_each_engine_masked(engine, i915, mask, mask) { struct intel_ring *ring; struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -960,6 +961,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, i915_request_add(rq); } + i915_gem_park(i915); return 0; } @@ -1158,7 +1160,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) { struct drm_i915_private *i915 = ce->engine->i915; struct i915_request *rq, *prev; - intel_wakeref_t wakeref; int ret; lockdep_assert_held(&ce->pin_mutex); @@ -1173,9 +1174,9 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) return 0; /* Submitting requests etc needs the hw awake. */ - wakeref = intel_runtime_pm_get(i915); + i915_gem_unpark(i915); - rq = i915_request_alloc(ce->engine, i915->kernel_context); + rq = i915_request_create(ce->engine->kernel_context); if (IS_ERR(rq)) { ret = PTR_ERR(rq); goto out_put; @@ -1213,8 +1214,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) out_add: i915_request_add(rq); out_put: - intel_runtime_pm_put(i915, wakeref); - + i915_gem_park(i915); return ret; } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 85c5cb779297..fe7267da52e5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -196,6 +196,7 @@ #include <linux/uuid.h> #include "i915_drv.h" +#include "i915_gem_pm.h" #include "i915_oa_hsw.h" #include "i915_oa_bdw.h" #include "i915_oa_chv.h" @@ -1716,6 +1717,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); + i915_gem_unpark(dev_priv); /* * The OA register config is setup through the context image. This image @@ -1734,7 +1736,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (ret) - return ret; + goto out_pm; /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { @@ -1746,8 +1748,10 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, continue; regs = i915_gem_object_pin_map(ce->state->obj, map_type); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto out_pm; + } ce->state->obj->mm.dirty = true; regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); @@ -1761,13 +1765,17 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * Apply the configuration by doing one context restore of the edited * context image. */ - rq = i915_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_pm; + } i915_request_add(rq); - return 0; +out_pm: + i915_gem_park(dev_priv); + return ret; } static int gen8_enable_metric_set(struct i915_perf_stream *stream) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 8d396f3c747d..fd24f576ca61 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -576,51 +576,19 @@ static int add_timeline_barrier(struct i915_request *rq) return i915_request_await_active_request(rq, &rq->timeline->barrier); } -/** - * i915_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct i915_request * -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +struct i915_request *i915_request_create(struct intel_context *ce) { - struct drm_i915_private *i915 = engine->i915; - struct intel_context *ce; - struct i915_timeline *tl; + struct drm_i915_private *i915 = ce->engine->i915; + struct i915_timeline *tl = ce->ring->timeline; struct i915_request *rq; u32 seqno; int ret; - /* - * Preempt contexts are reserved for exclusive use to inject a - * preemption context switch. They are never to be used for any trivial - * request! - */ - GEM_BUG_ON(ctx == i915->preempt_context); + GEM_BUG_ON(!i915->gt.awake); - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - ret = i915_terminally_wedged(i915); - if (ret) - return ERR_PTR(ret); - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ce = intel_context_pin(ctx, engine); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - i915_gem_unpark(i915); - mutex_lock(&ce->ring->timeline->mutex); + /* Check that the caller provided an already pinned context */ + __intel_context_pin(ce); + mutex_lock(&tl->mutex); /* Move our oldest request to the slab-cache (if not in use!) */ rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); @@ -670,18 +638,17 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) INIT_LIST_HEAD(&rq->active_list); INIT_LIST_HEAD(&rq->execute_cb); - tl = ce->ring->timeline; ret = i915_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; rq->i915 = i915; - rq->engine = engine; - rq->gem_context = ctx; rq->hw_context = ce; + rq->gem_context = ce->gem_context; + rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &engine->timeline); + GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -713,7 +680,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); + rq->reserved_space = + 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -727,17 +695,19 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unwind; - ret = engine->request_alloc(rq); + ret = rq->engine->request_alloc(rq); if (ret) goto err_unwind; + rq->infix = rq->ring->emit; /* end of header; start of user payload */ + /* Keep a second pin for the dual retirement along engine and ring */ __intel_context_pin(ce); - - rq->infix = rq->ring->emit; /* end of header; start of user payload */ + atomic_inc(&i915->gt.active_requests); /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); + lockdep_assert_held(&tl->mutex); return rq; err_unwind: @@ -751,12 +721,62 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: - mutex_unlock(&ce->ring->timeline->mutex); - i915_gem_park(i915); + mutex_unlock(&tl->mutex); intel_context_unpin(ce); return ERR_PTR(ret); } +/** + * i915_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct i915_request * +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; + struct i915_request *rq; + int ret; + + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == i915->preempt_context); + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + ret = i915_terminally_wedged(i915); + if (ret) + return ERR_PTR(ret); + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + i915_gem_unpark(i915); + + rq = i915_request_create(ce); + + i915_gem_park(i915); + intel_context_unpin(ce); + + return rq; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index cd6c130964cd..37f84ad067da 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -228,6 +228,9 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) return fence->ops == &i915_fence_ops; } +struct i915_request * __must_check +i915_request_create(struct intel_context *ce); + struct i915_request * __must_check i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 0aea19cefe4a..e1f2cf64639a 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -8,6 +8,7 @@ #include <linux/stop_machine.h> #include "i915_drv.h" +#include "i915_gem_pm.h" #include "i915_gpu_error.h" #include "i915_reset.h" @@ -727,9 +728,8 @@ static void restart_work(struct work_struct *work) struct drm_i915_private *i915 = arg->i915; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(i915); + i915_gem_unpark(i915); mutex_lock(&i915->drm.struct_mutex); WRITE_ONCE(i915->gpu_error.restart, NULL); @@ -744,13 +744,13 @@ static void restart_work(struct work_struct *work) if (!intel_engine_is_idle(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (!IS_ERR(rq)) i915_request_add(rq); } mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); + i915_gem_park(i915); kfree(arg); } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a882b8d42bd9..e23a91eb9149 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -29,6 +29,7 @@ #include <drm/drm_fourcc.h> #include "i915_drv.h" +#include "i915_gem_pm.h" #include "i915_reg.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -235,10 +236,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static struct i915_request *alloc_request(struct intel_overlay *overlay) { - struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; + struct intel_engine_cs *engine = overlay->i915->engine[RCS0]; - return i915_request_alloc(engine, dev_priv->kernel_context); + return i915_request_create(engine->kernel_context); } /* overlay needs to be disable in OCMD reg */ @@ -247,17 +247,21 @@ static int intel_overlay_on(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; struct i915_request *rq; u32 *cs; + int err; WARN_ON(overlay->active); + i915_gem_unpark(dev_priv); rq = alloc_request(overlay); - if (IS_ERR(rq)) - return PTR_ERR(rq); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pm; + } cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - i915_request_add(rq); - return PTR_ERR(cs); + err = PTR_ERR(cs); + goto err_rq; } overlay->active = true; @@ -272,6 +276,12 @@ static int intel_overlay_on(struct intel_overlay *overlay) intel_ring_advance(rq, cs); return intel_overlay_do_wait_request(overlay, rq, NULL); + +err_rq: + i915_request_add(rq); +err_pm: + i915_gem_park(dev_priv); + return err; } static void intel_overlay_flip_prepare(struct intel_overlay *overlay, @@ -376,6 +386,8 @@ static void intel_overlay_off_tail(struct i915_active_request *active, if (IS_I830(dev_priv)) i830_overlay_clock_gating(dev_priv, true); + + i915_gem_park(dev_priv); } /* overlay needs to be disabled in OCMD reg */ @@ -485,6 +497,8 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv) overlay->old_yscale = 0; overlay->crtc = NULL; overlay->active = false; + + i915_gem_park(dev_priv); } static int packed_depth_bytes(u32 format) diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 42bcceba175c..b10308c20e7d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -47,7 +47,7 @@ static int __live_active_setup(struct drm_i915_private *i915, for_each_engine(engine, i915, id) { struct i915_request *rq; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 497929238f02..b926f40bb01a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index cd6590e01dec..e5166f9b04c1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -308,7 +308,6 @@ static int igt_partial_tiling(void *arg) const unsigned int nreal = 1 << 12; /* largest tile row x2 */ struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; int tiling; int err; @@ -333,8 +332,8 @@ static int igt_partial_tiling(void *arg) goto out; } + i915_gem_unpark(i915); mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); if (1) { IGT_TIMEOUT(end); @@ -445,8 +444,8 @@ next_tiling: ; } out_unlock: - intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); + i915_gem_park(i915); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); @@ -468,7 +467,9 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context); + i915_gem_unpark(i915); + rq = i915_request_create(i915->engine[RCS0]->kernel_context); + i915_gem_park(i915); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 665cafa82390..03bea3caaafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -550,8 +550,7 @@ static int live_nop_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { - request = i915_request_alloc(engine, - i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) { err = PTR_ERR(request); goto out_unlock; @@ -648,7 +647,7 @@ empty_request(struct intel_engine_cs *engine, struct i915_request *request; int err; - request = i915_request_alloc(engine, engine->i915->kernel_context); + request = i915_request_create(engine->kernel_context); if (IS_ERR(request)) return request; @@ -850,7 +849,7 @@ static int live_all_engines(void *arg) } for_each_engine(engine, i915, id) { - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed with err=%d\n", @@ -958,7 +957,7 @@ static int live_sequential_engines(void *arg) goto out_unlock; } - request[id] = i915_request_alloc(engine, i915->kernel_context); + request[id] = i915_request_create(engine->kernel_context); if (IS_ERR(request[id])) { err = PTR_ERR(request[id]); pr_err("%s: Request allocation failed for %s with err=%d\n", diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index b04969ea74d3..77f6c8c66568 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -455,7 +455,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) goto out; } - rq = i915_request_alloc(engine, engine->i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) goto out_unpin; @@ -676,7 +676,7 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - rq = i915_request_alloc(engine, i915->kernel_context); + rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx