Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > On Tue, Nov 15, 2016 at 04:36:35PM +0200, Mika Kuoppala wrote: >> If we have a bad client submitting unfavourably across different >> contexts, creating new ones, the per context scoring of badness >> doesn't remove the root cause, the offending client. >> To counter, keep track of per client context bans. Deny access if >> client is responsible for more than 3 context bans in >> it's lifetime. >> >> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 4 ++++ >> drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++++++------- >> drivers/gpu/drm/i915/i915_gem_context.c | 16 ++++++++++++++++ >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++-- >> drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++---- >> 5 files changed, 53 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 5af1c38..92c5284 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -411,6 +411,9 @@ struct drm_i915_file_private { >> } rps; >> >> unsigned int bsd_engine; >> + >> +#define I915_MAX_CLIENT_CONTEXT_BANS 3 >> + int context_bans; > > 3 feels a little too small. But possibly ok since this is at the context > level. Still webgl... > 3 bans is a significant amount on hangs. 12 consecutive hangs without single succesful request in between to reach this. >> }; >> >> /* Used by dp and fdi links */ >> @@ -854,6 +857,7 @@ struct drm_i915_error_state { >> >> pid_t pid; >> char comm[TASK_COMM_LEN]; >> + int context_bans; >> } engine[I915_NUM_ENGINES]; >> >> struct drm_i915_error_buffer { >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index 40a9e10..f56230f 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -2636,19 +2636,33 @@ static bool i915_context_is_banned(const struct i915_gem_context *ctx) >> return false; >> } >> >> -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) >> +static void i915_gem_request_mark_guilty(struct drm_i915_gem_request *request) >> { >> - struct i915_ctx_hang_stats *hs = &ctx->hang_stats; >> + struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats; >> >> hs->ban_score += 10; >> >> - hs->banned = i915_context_is_banned(ctx); >> + hs->banned = i915_context_is_banned(request->ctx); >> hs->batch_active++; >> + >> + DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", >> + request->ctx->name, hs->ban_score, yesno(hs->banned)); >> + >> + if (!request->file_priv) >> + return; >> + >> + if (hs->banned) { >> + request->file_priv->context_bans++; >> + >> + DRM_DEBUG_DRIVER("client %s has has %d context banned\n", >> + request->ctx->name, >> + request->file_priv->context_bans); >> + } >> } >> >> -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) >> +static void i915_gem_request_mark_innocent(struct drm_i915_gem_request *request) >> { >> - struct i915_ctx_hang_stats *hs = &ctx->hang_stats; >> + struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats; >> >> hs->batch_pending++; >> } >> @@ -2719,9 +2733,9 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) >> } >> >> if (ring_hung) >> - i915_gem_context_mark_guilty(request->ctx); >> + i915_gem_request_mark_guilty(request); >> else >> - i915_gem_context_mark_innocent(request->ctx); >> + i915_gem_request_mark_innocent(request); > > Why? Just use the file_priv on the ctx. > >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c >> index 9abaae4..4ddd044 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.c >> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >> @@ -347,6 +347,14 @@ __create_hw_context(struct drm_device *dev, >> return ERR_PTR(ret); >> } >> >> +static bool client_is_banned(struct drm_i915_file_private *file_priv) >> +{ >> + if (file_priv->context_bans <= I915_MAX_CLIENT_CONTEXT_BANS) >> + return false; > > return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS; >> + >> + return true; >> +} >> + >> /** >> * The default context needs to exist per ring that uses contexts. It stores the >> * context state of the GPU for applications that don't utilize HW contexts, as >> @@ -360,6 +368,14 @@ i915_gem_create_context(struct drm_device *dev, >> >> lockdep_assert_held(&dev->struct_mutex); >> >> + if (file_priv && client_is_banned(file_priv)) { > > So this belongs to context_craate_ioctl > Ah yes. >> + DRM_DEBUG("client %s[%d] banned from creating ctx\n", >> + current->comm, >> + pid_nr(get_task_pid(current, PIDTYPE_PID))); >> + >> + return ERR_PTR(-EIO); >> + } >> + >> ctx = __create_hw_context(dev, file_priv); >> if (IS_ERR(ctx)) >> return ctx; >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index e804cb2..21beaf0 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -1231,16 +1231,20 @@ static struct i915_gem_context * >> i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, >> struct intel_engine_cs *engine, const u32 ctx_id) >> { >> + struct drm_i915_file_private *file_priv = file->driver_priv; >> struct i915_gem_context *ctx; >> struct i915_ctx_hang_stats *hs; >> >> - ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); >> + ctx = i915_gem_context_lookup(file_priv, ctx_id); >> if (IS_ERR(ctx)) >> return ctx; >> >> hs = &ctx->hang_stats; >> if (hs->banned) { >> - DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); >> + DRM_DEBUG("Client %s banned from submitting (%d:%d)\n", >> + ctx->name, >> + file_priv->context_bans, >> + hs->ban_score); > > Context bans prevents creating new contexts. What is its relevance here? > No relevanse, just upgraded the ctx->name. Should not be in this patch. -Mika > (We should start using pr_debug for these user aides, at least something > more suitable than DRM_DEBUG). > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx