[RFC PATCH 2/3] drm/i915: IOMMU based SVM implementation v16

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx>

Use David's new IOMMU layer functions for supporting SVM in i915.

TODO:
  error record collection for failing SVM contexts
  callback handling for fatal faults
  scheduling

v2: integrate David's core IOMMU support
    make sure we don't clobber the PASID in the context reg state
v3: fixup for intel-svm.h changes (David)
v4: use fault & halt for now (Jesse)
    fix ring free in error path on context alloc (Julia)
v5: update with new callback struct (Jesse)
v6: fix init svm check per new IOMMU code (Jesse)
v7: drop debug code and obsolete i915_svm.c file (Jesse)
v8: fix !CONFIG_INTEL_IOMMU_SVM init stub (Jesse)
v9: update to new execlist and reg handling bits (Jesse)
    context teardown fix (lrc deferred alloc vs teardown race?) (Jesse)
    check for SVM availability at context create (Jesse)
v10: intel_context_svm_init/fini & rebase
v11: move context specific stuff to i915_gem_context
v12: move addressing to context descriptor
v13: strip out workqueue and mm notifiers
v14: remove fault cb
v15: streamline execlist context pdps update, add asserts
v16: introduce HAS_SVM

Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: David Woodhouse <dwmw2@xxxxxxxxxxxxx>
Signed-off-by: David Woodhouse <David.Woodhouse@xxxxxxxxx> (v3)
Signed-off-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> (v9)
Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h          |  3 ++
 drivers/gpu/drm/i915/i915_gem_context.c  | 78 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h  | 30 ++++++++++++
 drivers/gpu/drm/i915/i915_reg.h          |  1 +
 drivers/gpu/drm/i915/intel_device_info.c |  6 +++
 drivers/gpu/drm/i915/intel_lrc.c         | 11 ++++-
 include/uapi/drm/i915_drm.h              |  1 +
 7 files changed, 129 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 52d01be..ca1eaaf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -39,6 +39,7 @@
 #include <linux/backlight.h>
 #include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
+#include <linux/intel-svm.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
@@ -787,6 +788,7 @@ struct intel_csr {
 	func(has_fpga_dbg); \
 	func(has_full_ppgtt); \
 	func(has_full_48bit_ppgtt); \
+	func(has_svm); \
 	func(has_gmbus_irq); \
 	func(has_gmch_display); \
 	func(has_guc); \
@@ -2784,6 +2786,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
 #define USES_FULL_PPGTT(dev_priv)	(i915.enable_ppgtt >= 2)
 #define USES_FULL_48BIT_PPGTT(dev_priv)	(i915.enable_ppgtt == 3)
+#define HAS_SVM(dev_priv)		((dev_priv)->info.has_svm)
 
 #define HAS_OVERLAY(dev_priv)		 ((dev_priv)->info.has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index dadc845..c0f6c9e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -134,6 +134,16 @@ static int get_context_size(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
+static void i915_gem_context_svm_fini(struct i915_gem_context *ctx)
+{
+	struct device *dev = &ctx->i915->drm.pdev->dev;
+
+	GEM_BUG_ON(!ctx->task);
+
+	intel_svm_unbind_mm(dev, ctx->pasid);
+	put_task_struct(ctx->task);
+}
+
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -143,6 +153,9 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	trace_i915_context_free(ctx);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
+	if (i915_gem_context_is_svm(ctx))
+		i915_gem_context_svm_fini(ctx);
+
 	i915_ppgtt_put(ctx->ppgtt);
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -272,6 +285,61 @@ static u32 create_default_ctx_desc(const struct drm_i915_private *dev_priv)
 	return desc;
 }
 
+static u32 create_svm_ctx_desc(void)
+{
+	/*
+	 * Switch to stream once we have a scheduler and can
+	 * re-submit contexts.
+	 */
+	return GEN8_CTX_VALID |
+		INTEL_ADVANCED_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT |
+		FAULT_AND_HALT << GEN8_CTX_FAULT_SHIFT;
+}
+
+static int i915_gem_context_svm_init(struct i915_gem_context *ctx)
+{
+	struct device *dev = &ctx->i915->drm.pdev->dev;
+	int ret;
+
+	GEM_BUG_ON(ctx->task);
+
+	if (!HAS_SVM(ctx->i915))
+		return -ENODEV;
+
+	get_task_struct(current);
+
+	ret = intel_svm_bind_mm(dev, &ctx->pasid, 0, NULL);
+	if (ret) {
+		DRM_DEBUG_DRIVER("pasid alloc fail: %d\n", ret);
+		put_task_struct(current);
+		return ret;
+	}
+
+	ctx->task = current;
+	i915_gem_context_set_svm(ctx);
+
+	return 0;
+}
+
+static int i915_gem_context_enable_svm(struct i915_gem_context *ctx)
+{
+	int ret;
+
+	if (!HAS_SVM(ctx->i915))
+		return -ENODEV;
+
+	if (i915_gem_context_is_svm(ctx))
+		return -EINVAL;
+
+	ret = i915_gem_context_svm_init(ctx);
+	if (ret)
+		return ret;
+
+	ctx->desc_template = create_svm_ctx_desc();
+
+	return 0;
+}
+
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
@@ -1071,6 +1139,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_BANNABLE:
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_SVM:
+		args->value = i915_gem_context_is_svm(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1128,6 +1199,13 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			i915_gem_context_clear_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_SVM:
+		/* There is no coming back once svm is enabled */
+		if (args->value && !args->size)
+			ret = i915_gem_context_enable_svm(ctx);
+		else
+			ret = -EINVAL;
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 0ac750b..2514abb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -108,6 +108,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNABLE		3
 #define CONTEXT_BANNED			4
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	5
+#define CONTEXT_SVM			6
 
 	/**
 	 * @hw_id: - unique identifier for the context
@@ -140,6 +141,25 @@ struct i915_gem_context {
 	 */
 	int priority;
 
+	/**
+	 * @pasid: process address space identifier
+	 *
+	 * Unique identifier for the shared address space with cpu.
+	 * Used by the gpu to associate context's (ppgtt) address
+	 * space with the corresponding process's address space for
+	 * Shared Virtual Memory (SVM). 20 bits.
+	 */
+	u32 pasid;
+
+	/**
+	 * @task: user space task struct for this context
+	 *
+	 * If this is svm context, @task is the corresponding
+	 * user space process with which we share the vm. See
+	 * @pasid.
+	 */
+	struct task_struct *task;
+
 	/** ggtt_alignment: alignment restriction for context objects */
 	u32 ggtt_alignment;
 	/** ggtt_offset_bias: placement restriction for context objects */
@@ -241,6 +261,16 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
 	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
 }
 
+static inline bool i915_gem_context_is_svm(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_SVM, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_svm(struct i915_gem_context *ctx)
+{
+	__set_bit(CONTEXT_SVM, &ctx->flags);
+}
+
 static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
 {
 	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 36f4146..eff0eef 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3375,6 +3375,7 @@ enum {
 				INTEL_LEGACY_64B_CONTEXT : \
 				INTEL_LEGACY_32B_CONTEXT)
 
+#define GEN8_CTX_FAULT_SHIFT 6
 #define GEN8_CTX_ID_SHIFT 32
 #define GEN8_CTX_ID_WIDTH 21
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index f642f6d..2878bd3 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -414,6 +414,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
 		info->has_snoop = false;
 
+	info->has_svm = INTEL_GEN(dev_priv) >= 9 &&
+		info->has_full_48bit_ppgtt &&
+		intel_svm_available(&dev_priv->drm.pdev->dev);
+
 	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
 	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
 	DRM_DEBUG_DRIVER("subslice total: %u\n",
@@ -429,4 +433,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 			 info->sseu.has_subslice_pg ? "y" : "n");
 	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
 			 info->sseu.has_eu_pg ? "y" : "n");
+	DRM_DEBUG_DRIVER("has svm: %s\n",
+			 info->has_svm ? "y" : "n");
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06322a5..13c9c56 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -209,6 +209,13 @@
 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x17
 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x26
 
+#define ASSIGN_CTX_SVM(reg_state, ctx, engine) do { \
+		ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, \
+			       GEN8_RING_PDP_UDW((engine), 0), 0); \
+		ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, \
+			       GEN8_RING_PDP_LDW((engine), 0), (ctx)->pasid); \
+} while (0)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 
@@ -2084,7 +2091,9 @@ static void execlists_init_reg_state(u32 *reg_state,
 	ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0),
 		       0);
 
-	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+	if (i915_gem_context_is_svm(ctx)) {
+		ASSIGN_CTX_SVM(reg_state, ctx, engine);
+	} else if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
 		/* 64b PPGTT (48bit canonical)
 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
 		 * other PDP Descriptors are ignored.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index da32c2f..5b06ab1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1227,6 +1227,7 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE	0x4
 #define I915_CONTEXT_PARAM_BANNABLE	0x5
+#define I915_CONTEXT_PARAM_SVM		0x6
 	__u64 value;
 };
 
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux