[RFC PATCH v2 5/5] drm/i915: Expose RPCS (SSEU) configuration to userspace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

We want to allow userspace to reconfigure the subslice configuration for
its own use case. To do so, we expose a context parameter to allow
adjustment of the RPCS register stored within the context image (and
currently not accessible via LRI). If the context is adjusted before
first use, the adjustment is for "free"; otherwise if the context is
active we flush the context off the GPU (stalling all users) and forcing
the GPU to save the context to memory where we can modify it and so
ensure that the register is reloaded on next execution.

The overhead of managing additional EU subslices can be significant,
especially in multi-context workloads. Non-GPGPU contexts should
preferably disable the subslices it is not using, and others should
fine-tune the number to match their workload.

We expose complete control over the RPCS register, allowing
configuration of slice/subslice, via masks packed into a u64 for
simplicity. For example,

	struct drm_i915_gem_context_param arg;
	struct drm_i915_gem_context_param_sseu sseu = { .flags = I915_EXEC_RENDER };

	memset(&arg, 0, sizeof(arg));
	arg.ctx_id = ctx;
	arg.param = I915_CONTEXT_PARAM_SSEU;
	arg.value = (uintptr_t) &sseu;
	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
		sseu.packed.subslice_mask = 0;

		drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
	}

could be used to disable all subslices where supported.

v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)

v3: Add ability to program this per engine (Chris)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
c: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx>
CC: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
CC: Zhipeng Gong <zhipeng.gong@xxxxxxxxx>
CC: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 49 ++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 82 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.h        |  5 ++
 include/uapi/drm/i915_drm.h             | 28 +++++++++++
 4 files changed, 164 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b386574259a1..088b5035c3a6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1042,6 +1042,30 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_BANNABLE:
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_SSEU: {
+		struct drm_i915_gem_context_param_sseu param_sseu;
+		struct intel_engine_cs *engine;
+
+		if (copy_from_user(&param_sseu, u64_to_user_ptr(args->value),
+				   sizeof(param_sseu))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		engine = i915_gem_engine_from_flags(to_i915(dev), file,
+						    param_sseu.flags);
+		if (!engine) {
+			ret = -EINVAL;
+			break;
+		}
+
+		param_sseu.value = intel_lr_context_get_sseu(ctx, engine);
+
+		if (copy_to_user(u64_to_user_ptr(args->value), &param_sseu,
+				 sizeof(param_sseu)))
+			ret = -EFAULT;
+		break;
+	}
 	default:
 		ret = -EINVAL;
 		break;
@@ -1097,6 +1121,31 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			i915_gem_context_clear_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_SSEU:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!i915_modparams.enable_execlists)
+			ret = -ENODEV;
+		else {
+			struct drm_i915_gem_context_param_sseu param_sseu;
+			struct intel_engine_cs *engine;
+
+			if (copy_from_user(&param_sseu, u64_to_user_ptr(args->value),
+					   sizeof(param_sseu))) {
+				ret = -EFAULT;
+				break;
+			}
+
+			engine = i915_gem_engine_from_flags(to_i915(dev), file,
+							    param_sseu.flags);
+			if (!engine) {
+				ret = -EINVAL;
+				break;
+			}
+
+			ret = intel_lr_context_set_sseu(ctx, engine, param_sseu.value);
+		}
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f5e9caf4913c..bffdc1126838 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2164,3 +2164,85 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 		}
 	}
 }
+
+int intel_lr_context_set_sseu(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *engine,
+			      u64 value)
+{
+	struct drm_i915_gem_context_param_sseu user = { .value = value };
+	struct drm_i915_private *i915 = ctx->i915;
+	struct sseu_dev_info sseu = ctx->engine[engine->id].sseu;
+	struct intel_context *ce;
+	enum intel_engine_id id;
+	int ret;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	sseu.slice_mask = user.packed.slice_mask == 0 ?
+		INTEL_INFO(i915)->sseu.slice_mask :
+		(user.packed.slice_mask & INTEL_INFO(i915)->sseu.slice_mask);
+	sseu.subslice_mask = user.packed.subslice_mask == 0 ?
+		INTEL_INFO(i915)->sseu.subslice_mask :
+		(user.packed.subslice_mask & INTEL_INFO(i915)->sseu.subslice_mask);
+	sseu.min_eu_per_subslice =
+		max(user.packed.min_eu_per_subslice,
+		    INTEL_INFO(i915)->sseu.min_eu_per_subslice);
+	sseu.max_eu_per_subslice =
+		min(user.packed.max_eu_per_subslice,
+		    INTEL_INFO(i915)->sseu.max_eu_per_subslice);
+
+	if (memcmp(&sseu, &ctx->engine[engine->id].sseu, sizeof(sseu)) == 0)
+		return 0;
+
+	/*
+	 * We can only program this on render ring.
+	 */
+	ce = &ctx->engine[RCS];
+
+	if (ce->pin_count) { /* Assume that the context is active! */
+		ret = i915_gem_switch_to_kernel_context(i915);
+		if (ret)
+			return ret;
+
+		ret = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
+		if (ret)
+			return ret;
+	}
+
+	if (ce->state) {
+		u32 *regs;
+
+		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB) +
+			LRC_STATE_PN * PAGE_SIZE;
+		if (IS_ERR(regs))
+			return PTR_ERR(regs);
+
+		regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(&sseu);
+		i915_gem_object_unpin_map(ce->state->obj);
+	}
+
+	/*
+	 * Apply the configuration to all engine. Our hardware doesn't
+	 * currently support different configurations for each engine.
+	 */
+	for_each_engine(engine, i915, id)
+		ctx->engine[id].sseu = sseu;
+
+	return 0;
+}
+
+u64 intel_lr_context_get_sseu(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_context_param_sseu user;
+	const struct sseu_dev_info *sseu = &ctx->engine[engine->id].sseu;
+
+	user.packed.slice_mask = sseu->slice_mask;
+	user.packed.subslice_mask = sseu->subslice_mask;
+	user.packed.min_eu_per_subslice = sseu->min_eu_per_subslice;
+	user.packed.max_eu_per_subslice = sseu->max_eu_per_subslice;
+
+	return user.value;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 314adee7127a..a51e67d9fec5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -106,6 +106,11 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
+int intel_lr_context_set_sseu(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *engine,
+			      u64 value);
+u64 intel_lr_context_get_sseu(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *engine);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index fe25a01c81f2..ed1cbced8e6e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1360,9 +1360,37 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE	0x4
 #define I915_CONTEXT_PARAM_BANNABLE	0x5
+/*
+ * When using the following param, value should be a pointer to
+ * drm_i915_gem_context_param_sseu.
+ */
+#define I915_CONTEXT_PARAM_SSEU		0x6
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine to be configured or queried. Same value you would use with
+	 * drm_i915_gem_execbuffer2.
+	 */
+	__u64 flags;
+
+	/*
+	 * Setting slice_mask or subslice_mask to 0 will make the context use
+	 * masks reported respectively by I915_PARAM_SLICE_MASK or
+	 * I915_PARAM_SUBSLICE_MASK.
+	 */
+	union {
+		struct {
+			__u8 slice_mask;
+			__u8 subslice_mask;
+			__u8 min_eu_per_subslice;
+			__u8 max_eu_per_subslice;
+		} packed;
+		__u64 value;
+	};
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux