This patch doesn't seem to be the panacea that I had hoped, although I've yet to thoroughly test if it actually improves some of the other tests which caused trouble. What occurs is the following (assume just 2 procs) 1. proc A gets to execbuf while out of memory, gets struct_mutex. 2. OOM killer comes in and chooses proc B 3. proc B closes it's fds, which requires struct mutex, blocks 4, OOM killer waits for B to die before killing another process (this part is speculative) In order to let the OOM complete, we defer processing the context destruction parts which are those that require struct_mutex. This patch has not really been cleaned up, I am only posting it for posterity. (Several of the hunks are only relevant to full PPGTT patches which are not merged). Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++- drivers/gpu/drm/i915/i915_drv.h | 9 +++-- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 68 ++++++++++++++++++++++++--------- 4 files changed, 61 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 835a43e..e6b5f3e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1717,11 +1717,13 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_hw_ppgtt *pvt_ppgtt; - pvt_ppgtt = ctx_to_ppgtt(file_priv->private_default_ctx); + pvt_ppgtt = + ctx_to_ppgtt(file_priv->ctx_info->private_default_ctx); seq_printf(m, "proc: %s\n", get_pid_task(file->pid, PIDTYPE_PID)->comm); seq_puts(m, " default context:\n"); - idr_for_each(&file_priv->context_idr, per_file_ctx, m); + idr_for_each(&file_priv->ctx_info->context_idr, per_file_ctx, + m); } seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK)); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6fdab40..931fc42 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1735,15 +1735,18 @@ struct drm_i915_gem_request { struct drm_i915_file_private { struct drm_i915_private *dev_priv; - struct { spinlock_t lock; struct list_head request_list; struct delayed_work idle_work; } mm; - struct idr context_idr; + struct i915_gem_ctx_info { + struct drm_i915_private *dev_priv; + struct idr context_idr; + struct work_struct close_work; + struct i915_hw_context *private_default_ctx; + } *ctx_info; - struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; }; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6312d61..a32f6df 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2326,7 +2326,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) hs = &request->ctx->hang_stats; else if (request->file_priv) - hs = &request->file_priv->private_default_ctx->hang_stats; + hs = &request->file_priv->ctx_info->private_default_ctx->hang_stats; if (hs) { if (guilty) { diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f55f0a9..770b394 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -209,8 +209,8 @@ __create_hw_context(struct drm_device *dev, if (file_priv == NULL) return ctx; - ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID, 0, - GFP_KERNEL); + ret = idr_alloc(&file_priv->ctx_info->context_idr, + ctx, DEFAULT_CONTEXT_ID, 0, GFP_KERNEL); if (ret < 0) goto err_out; @@ -481,29 +481,54 @@ static int context_idr_cleanup(int id, void *p, void *data) return 0; } +static void i915_gem_context_close_work_handler(struct work_struct *work) +{ + struct i915_gem_ctx_info *ctx_info = container_of(work, + struct i915_gem_ctx_info, + close_work); + struct drm_i915_private *dev_priv = ctx_info->dev_priv; + + mutex_lock(&dev_priv->dev->struct_mutex); + idr_for_each(&ctx_info->context_idr, context_idr_cleanup, NULL); + i915_gem_context_unreference(ctx_info->private_default_ctx); + idr_destroy(&ctx_info->context_idr); + mutex_unlock(&dev_priv->dev->struct_mutex); + + kfree(ctx_info); +} + int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_private *dev_priv = dev->dev_private; + file_priv->ctx_info = kzalloc(sizeof(*file_priv->ctx_info), GFP_KERNEL); + if (!file_priv->ctx_info) + return -ENOMEM; + + file_priv->ctx_info->dev_priv = file_priv->dev_priv; + INIT_WORK(&file_priv->ctx_info->close_work, + i915_gem_context_close_work_handler); + if (!HAS_HW_CONTEXTS(dev)) { /* Cheat for hang stats */ - file_priv->private_default_ctx = + file_priv->ctx_info->private_default_ctx = kzalloc(sizeof(struct i915_hw_context), GFP_KERNEL); - file_priv->private_default_ctx->vm = &dev_priv->gtt.base; + file_priv->ctx_info->private_default_ctx->vm = + &dev_priv->gtt.base; return 0; } - idr_init(&file_priv->context_idr); + idr_init(&file_priv->ctx_info->context_idr); mutex_lock(&dev->struct_mutex); - file_priv->private_default_ctx = + file_priv->ctx_info->private_default_ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); mutex_unlock(&dev->struct_mutex); - if (IS_ERR(file_priv->private_default_ctx)) { - idr_destroy(&file_priv->context_idr); - return PTR_ERR(file_priv->private_default_ctx); + if (IS_ERR(file_priv->ctx_info->private_default_ctx)) { + idr_destroy(&file_priv->ctx_info->context_idr); + return PTR_ERR(file_priv->ctx_info->private_default_ctx); } return 0; @@ -511,27 +536,34 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_file_private *file_priv = file->driver_priv; if (!HAS_HW_CONTEXTS(dev)) { - kfree(file_priv->private_default_ctx); + kfree(file_priv->ctx_info->private_default_ctx); + kfree(file_priv->ctx_info); return; } - mutex_lock(&dev->struct_mutex); - idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); - i915_gem_context_unreference(file_priv->private_default_ctx); - idr_destroy(&file_priv->context_idr); - mutex_unlock(&dev->struct_mutex); + if (mutex_trylock(&dev->struct_mutex)) { + idr_for_each(&file_priv->ctx_info->context_idr, + context_idr_cleanup, NULL); + i915_gem_context_unreference(file_priv->ctx_info->private_default_ctx); + idr_destroy(&file_priv->ctx_info->context_idr); + mutex_unlock(&dev->struct_mutex); + kfree(file_priv->ctx_info); + } else { + queue_work(dev_priv->wq, &file_priv->ctx_info->close_work); + } } struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) { if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev)) - return file_priv->private_default_ctx; + return file_priv->ctx_info->private_default_ctx; - return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); + return (struct i915_hw_context *)idr_find(&file_priv->ctx_info->context_idr, id); } static inline int @@ -773,7 +805,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } - idr_remove(&ctx->file_priv->context_idr, ctx->id); + idr_remove(&ctx->file_priv->ctx_info->context_idr, ctx->id); i915_gem_context_unreference(ctx); mutex_unlock(&dev->struct_mutex); -- 1.8.4.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx