On 18/05/2017 10:46, Chris Wilson wrote:
Keep the recently freed context objects for reuse. This allows us to use the current GGTT bindings and dma bound pages, avoiding any clflushes as required. We mark the objects as purgeable under memory pressure, and reap the list of freed objects as soon as the device is idle.
One additional thought - how about making the batch pool more generic, or more precisely extracting a layer from it to be called object pool, and then having batch pool and context pool on top of that?
There would be some details to flesh out, like do we want to strictly split internal from shmemfs backed objects, or can allow batch pool to get either, the API and similar.
But if it could be done with not a lot of code it may be preferential to have one implementation of the similar basic idea.
Regards, Tvrtko
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 59 ++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_gem_context.h | 5 ++ drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/selftests/mock_context.c | 1 + drivers/gpu/drm/i915/selftests/mock_gem_device.c | 1 + 8 files changed, 68 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1d55bbde68df..1fa1e7d48f02 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2316,6 +2316,8 @@ struct drm_i915_private { struct llist_head free_list; struct work_struct free_work; + struct list_head freed_objects; + /* The hw wants to have a stable context identifier for the * lifetime of the context (for OA, PASID, faults, etc). * This is limited in execlists to 21 bits. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c044b5dbdb66..f482c320d810 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3212,6 +3212,7 @@ i915_gem_idle_work_handler(struct work_struct *work) DRM_ERROR("Timeout waiting for engines to idle\n"); intel_engines_mark_idle(dev_priv); + i915_gem_contexts_mark_idle(dev_priv); i915_gem_timelines_mark_idle(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 198b0064a3d3..1e51fd36f355 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -96,6 +96,48 @@ /* Initial size (as log2) to preallocate the handle->object hashtable */ #define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */ +struct drm_i915_gem_object * +i915_gem_context_create_object(struct drm_i915_private *i915, + unsigned long size) +{ + struct drm_i915_gem_object *obj, *on; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry_safe(obj, on, + &i915->contexts.freed_objects, + batch_pool_link) { + if (obj->mm.madv != I915_MADV_DONTNEED) { + /* Purge the heretic! */ + list_del(&obj->batch_pool_link); + i915_gem_object_put(obj); + continue; + } + + if (obj->base.size == size) { + list_del(&obj->batch_pool_link); + obj->mm.madv = I915_MADV_WILLNEED; + return obj; + } + } + + return i915_gem_object_create(i915, size); +} + +void i915_gem_contexts_mark_idle(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj, *on; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry_safe(obj, on, + &i915->contexts.freed_objects, + batch_pool_link) { + list_del(&obj->batch_pool_link); + i915_gem_object_put(obj); + } +} + static void resize_vma_ht(struct work_struct *work) { struct i915_gem_context_vma_lut *lut = @@ -160,9 +202,10 @@ static void vma_lut_free(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { + struct drm_i915_private *i915 = ctx->i915; int i; - lockdep_assert_held(&ctx->i915->drm.struct_mutex); + lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); vma_lut_free(ctx); @@ -178,7 +221,11 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) if (ce->ring) intel_ring_free(ce->ring); - __i915_gem_object_release_unless_active(ce->state->obj); + /* Keep the objects around for quick reuse */ + GEM_BUG_ON(ce->state->obj->mm.madv != I915_MADV_WILLNEED); + ce->state->obj->mm.madv = I915_MADV_DONTNEED; + list_add(&ce->state->obj->batch_pool_link, + &i915->contexts.freed_objects); } kfree(ctx->name); @@ -186,7 +233,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) list_del(&ctx->link); - ida_simple_remove(&ctx->i915->contexts.hw_ida, ctx->hw_id); + ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); kfree_rcu(ctx, rcu); } @@ -457,6 +504,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) return 0; INIT_LIST_HEAD(&dev_priv->contexts.list); + INIT_LIST_HEAD(&dev_priv->contexts.freed_objects); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); init_llist_head(&dev_priv->contexts.free_list); @@ -542,12 +590,17 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(work_pending(&i915->contexts.free_work)); + /* Keep the context so that we can free it immediately ourselves */ ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context)); GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); context_close(ctx); i915_gem_context_free(ctx); + i915_gem_contexts_mark_idle(i915); + GEM_BUG_ON(!list_empty(&i915->contexts.freed_objects)); + /* Must free all deferred contexts (via flush_workqueue) first */ ida_destroy(&i915->contexts.hw_ida); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 04320f80f9f4..4654caab9b6c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -294,6 +294,11 @@ void i915_gem_context_release(struct kref *ctx_ref); struct i915_gem_context * i915_gem_context_create_gvt(struct drm_device *dev); +struct drm_i915_gem_object * +i915_gem_context_create_object(struct drm_i915_private *i915, + unsigned long size); +void i915_gem_contexts_mark_idle(struct drm_i915_private *i915); + int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 42ab91acccc5..21a1519db936 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2011,7 +2011,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; - ctx_obj = i915_gem_object_create(ctx->i915, context_size); + ctx_obj = i915_gem_context_create_object(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); return PTR_ERR(ctx_obj); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index acd1da9b62a3..253d32fef133 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1454,7 +1454,7 @@ alloc_context_vma(struct intel_engine_cs *engine) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = i915_gem_object_create(i915, engine->context_size); + obj = i915_gem_context_create_object(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj); diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 9c7c68181f82..f4972674c96e 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -92,6 +92,7 @@ void mock_init_contexts(struct drm_i915_private *i915) INIT_LIST_HEAD(&i915->contexts.list); ida_init(&i915->contexts.hw_ida); + INIT_LIST_HEAD(&i915->contexts.freed_objects); INIT_WORK(&i915->contexts.free_work, contexts_free_worker); init_llist_head(&i915->contexts.free_list); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 47613d20bba8..5eb931ecf0a4 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -53,6 +53,7 @@ static void mock_device_release(struct drm_device *dev) mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); + i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); cancel_delayed_work_sync(&i915->gt.retire_work);
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx