[PATCH 5/6] drm/i915: Add per client max context ban limit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



If we have a bad client submitting unfavourably across different
contexts, creating new ones, the per context scoring of badness
doesn't remove the root cause, the offending client.
To counter, keep track of per client context bans. Deny access if
client is responsible for more than 3 context bans in
it's lifetime.

Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h            |  4 ++++
 drivers/gpu/drm/i915/i915_gem.c            | 28 +++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_gem_context.c    | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 ++++++--
 drivers/gpu/drm/i915/i915_gpu_error.c      | 10 ++++++----
 5 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5af1c38..92c5284 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -411,6 +411,9 @@ struct drm_i915_file_private {
 	} rps;
 
 	unsigned int bsd_engine;
+
+#define I915_MAX_CLIENT_CONTEXT_BANS 3
+	int context_bans;
 };
 
 /* Used by dp and fdi links */
@@ -854,6 +857,7 @@ struct drm_i915_error_state {
 
 		pid_t pid;
 		char comm[TASK_COMM_LEN];
+		int context_bans;
 	} engine[I915_NUM_ENGINES];
 
 	struct drm_i915_error_buffer {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 40a9e10..f56230f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2636,19 +2636,33 @@ static bool i915_context_is_banned(const struct i915_gem_context *ctx)
 	return false;
 }
 
-static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
+static void i915_gem_request_mark_guilty(struct drm_i915_gem_request *request)
 {
-	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
+	struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
 
 	hs->ban_score += 10;
 
-	hs->banned = i915_context_is_banned(ctx);
+	hs->banned = i915_context_is_banned(request->ctx);
 	hs->batch_active++;
+
+	DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
+			 request->ctx->name, hs->ban_score, yesno(hs->banned));
+
+	if (!request->file_priv)
+		return;
+
+	if (hs->banned) {
+		request->file_priv->context_bans++;
+
+		DRM_DEBUG_DRIVER("client %s has has %d context banned\n",
+				 request->ctx->name,
+				 request->file_priv->context_bans);
+	}
 }
 
-static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
+static void i915_gem_request_mark_innocent(struct drm_i915_gem_request *request)
 {
-	struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
+	struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
 
 	hs->batch_pending++;
 }
@@ -2719,9 +2733,9 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	}
 
 	if (ring_hung)
-		i915_gem_context_mark_guilty(request->ctx);
+		i915_gem_request_mark_guilty(request);
 	else
-		i915_gem_context_mark_innocent(request->ctx);
+		i915_gem_request_mark_innocent(request);
 
 	if (!ring_hung)
 		return;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9abaae4..4ddd044 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -347,6 +347,14 @@ __create_hw_context(struct drm_device *dev,
 	return ERR_PTR(ret);
 }
 
+static bool client_is_banned(struct drm_i915_file_private *file_priv)
+{
+	if (file_priv->context_bans <= I915_MAX_CLIENT_CONTEXT_BANS)
+		return false;
+
+	return true;
+}
+
 /**
  * The default context needs to exist per ring that uses contexts. It stores the
  * context state of the GPU for applications that don't utilize HW contexts, as
@@ -360,6 +368,14 @@ i915_gem_create_context(struct drm_device *dev,
 
 	lockdep_assert_held(&dev->struct_mutex);
 
+	if (file_priv && client_is_banned(file_priv)) {
+		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
+			  current->comm,
+			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
+
+		return ERR_PTR(-EIO);
+	}
+
 	ctx = __create_hw_context(dev, file_priv);
 	if (IS_ERR(ctx))
 		return ctx;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e804cb2..21beaf0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1231,16 +1231,20 @@ static struct i915_gem_context *
 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
 			  struct intel_engine_cs *engine, const u32 ctx_id)
 {
+	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 	struct i915_ctx_hang_stats *hs;
 
-	ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
+	ctx = i915_gem_context_lookup(file_priv, ctx_id);
 	if (IS_ERR(ctx))
 		return ctx;
 
 	hs = &ctx->hang_stats;
 	if (hs->banned) {
-		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
+		DRM_DEBUG("Client %s banned from submitting (%d:%d)\n",
+			  ctx->name,
+			  file_priv->context_bans,
+			  hs->ban_score);
 		return ERR_PTR(-EIO);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5d2e233..b8dfa0c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -536,10 +536,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].hangcheck_guilty &&
 		    error->engine[i].pid != -1) {
-			err_printf(m, "Active process (on ring %s): %s [%d]\n",
+			err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n",
 				   engine_str(i),
 				   error->engine[i].comm,
-				   error->engine[i].pid);
+				   error->engine[i].pid,
+				   error->engine[i].context_bans);
 		}
 	}
 	err_printf(m, "Reset count: %u\n", error->reset_count);
@@ -630,9 +631,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		if (obj) {
 			err_puts(m, dev_priv->engine[i]->name);
 			if (ee->pid != -1)
-				err_printf(m, " (submitted by %s [%d])",
+				err_printf(m, " (submitted by %s [%d], bans %d)",
 					   ee->comm,
-					   ee->pid);
+					   ee->pid,
+					   ee->context_bans);
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux