From: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> Use David's new IOMMU layer functions for supporting SVM in i915. TODO: error record collection for failing SVM contexts callback handling for fatal faults scheduling v2: integrate David's core IOMMU support make sure we don't clobber the PASID in the context reg state v3: fixup for intel-svm.h changes (David) v4: use fault & halt for now (Jesse) fix ring free in error path on context alloc (Julia) v5: update with new callback struct (Jesse) v6: fix init svm check per new IOMMU code (Jesse) v7: drop debug code and obsolete i915_svm.c file (Jesse) v8: fix !CONFIG_INTEL_IOMMU_SVM init stub (Jesse) v9: update to new execlist and reg handling bits (Jesse) context teardown fix (lrc deferred alloc vs teardown race?) (Jesse) check for SVM availability at context create (Jesse) v10: intel_context_svm_init/fini & rebase v11: move context specific stuff to i915_gem_context v12: move addressing to context descriptor v13: strip out workqueue and mm notifiers Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: David Woodhouse <dwmw2@xxxxxxxxxxxxx> Signed-off-by: David Woodhouse <David.Woodhouse@xxxxxxxxx> (v3) Signed-off-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> (v9) Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 32 ++++++++++ drivers/gpu/drm/i915/i915_gem.c | 7 +++ drivers/gpu/drm/i915/i915_gem_context.c | 104 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 18 ++++++ drivers/gpu/drm/i915/intel_lrc.c | 39 +++++------- 5 files changed, 167 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 598e078418e3..64f3f0f18509 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -39,6 +39,7 @@ #include <linux/backlight.h> #include <linux/hashtable.h> #include <linux/intel-iommu.h> +#include <linux/intel-svm.h> #include <linux/kref.h> #include <linux/pm_qos.h> #include <linux/shmem_fs.h> @@ -866,6 +867,8 @@ struct i915_ctx_hang_stats { * @remap_slice: l3 row remapping information. * @flags: context specific flags: * CONTEXT_NO_ZEROMAP: do not allow mapping things to page 0. + * CONTEXT_NO_ERROR_CAPTURE: do not capture gpu state on hang. + * CONTEXT_SVM: context with 1:1 gpu vs cpu mapping of vm. * @file_priv: filp associated with this context (NULL for global default * context). * @hang_stats: information about the role of this context in possible GPU @@ -891,6 +894,8 @@ struct i915_gem_context { unsigned long flags; #define CONTEXT_NO_ZEROMAP BIT(0) #define CONTEXT_NO_ERROR_CAPTURE BIT(1) +#define CONTEXT_SVM BIT(2) + unsigned hw_id; u32 user_handle; @@ -909,6 +914,9 @@ struct i915_gem_context { struct atomic_notifier_head status_notifier; bool execlists_force_single_submission; + u32 pasid; /* svm, 20 bits */ + struct task_struct *task; + struct list_head link; u8 remap_slice; @@ -2001,6 +2009,8 @@ struct drm_i915_private { struct i915_runtime_pm pm; + bool svm_available; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { void (*cleanup_engine)(struct intel_engine_cs *engine); @@ -3628,6 +3638,28 @@ extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +/* svm */ +#ifdef CONFIG_INTEL_IOMMU_SVM +static inline bool intel_init_svm(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + dev_priv->svm_available = USES_FULL_48BIT_PPGTT(dev_priv) && + intel_svm_available(&dev->pdev->dev); + + return dev_priv->svm_available; +} +#else +static inline bool intel_init_svm(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + dev_priv->svm_available = false; + + return dev_priv->svm_available; +} +#endif + /* overlay */ extern struct intel_overlay_error_state * intel_overlay_capture_error_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7e08c774a1aa..45d67b54c018 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4304,6 +4304,13 @@ i915_gem_init_hw(struct drm_device *dev) } } + if (INTEL_GEN(dev) >= 8) { + if (intel_init_svm(dev)) + DRM_DEBUG_DRIVER("Initialized Intel SVM support\n"); + else + DRM_ERROR("Failed to enable Intel SVM support\n"); + } + i915_gem_init_swizzling(dev); /* diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 189a6c018b72..9ab6332f296b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -134,6 +134,47 @@ static int get_context_size(struct drm_i915_private *dev_priv) return ret; } +static void i915_svm_fault_cb(struct device *dev, int pasid, u64 addr, + u32 private, int rwxp, int response) +{ +} + +static struct svm_dev_ops i915_svm_ops = { + .fault_cb = i915_svm_fault_cb, +}; + +static int i915_gem_context_svm_init(struct i915_gem_context *ctx) +{ + struct device *dev = &ctx->i915->drm.pdev->dev; + int ret; + + if (WARN_ON_ONCE(!ctx->i915->svm_available)) + return -ENODEV; + + get_task_struct(current); + + ret = intel_svm_bind_mm(dev, &ctx->pasid, 0, &i915_svm_ops); + if (ret) { + DRM_DEBUG_DRIVER("pasid alloc fail: %d\n", ret); + put_task_struct(current); + return ret; + } + + ctx->task = current; + + return 0; +} + +static void i915_gem_context_svm_fini(struct i915_gem_context *ctx) +{ + struct device *dev = &ctx->i915->drm.pdev->dev; + + if (ctx->task) { + intel_svm_unbind_mm(dev, ctx->pasid); + put_task_struct(ctx->task); + } +} + void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -143,6 +184,9 @@ void i915_gem_context_free(struct kref *ctx_ref) trace_i915_context_free(ctx); GEM_BUG_ON(!ctx->closed); + if (ctx->flags & CONTEXT_SVM) + i915_gem_context_svm_fini(ctx); + i915_ppgtt_put(ctx->ppgtt); for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -257,9 +301,34 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } +static u32 __create_ctx_desc(struct drm_i915_private *dev_priv, u32 flags) +{ + u32 desc = GEN8_CTX_VALID; + + if (flags & I915_GEM_CONTEXT_ENABLE_SVM) { + desc |= INTEL_ADVANCED_CONTEXT << + GEN8_CTX_ADDRESSING_MODE_SHIFT; + /* + * Switch to stream once we have a scheduler and can + * re-submit contexts. + */ + desc |= FAULT_AND_HALT << GEN8_CTX_FAULT_SHIFT; + } else { + if (IS_GEN8(dev_priv)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + desc |= GEN8_CTX_PRIVILEGE; + + desc |= GEN8_CTX_ADDRESSING_MODE(dev_priv) << + GEN8_CTX_ADDRESSING_MODE_SHIFT; + } + + return desc; +} + static struct i915_gem_context * __create_hw_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv) + struct drm_i915_file_private *file_priv, u32 flags) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_gem_context *ctx; @@ -323,8 +392,8 @@ __create_hw_context(struct drm_device *dev, ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD; ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + ctx->desc_template = __create_ctx_desc(dev_priv, flags); + ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); return ctx; @@ -345,13 +414,14 @@ i915_gem_create_context(struct drm_device *dev, { struct i915_gem_context *ctx; bool create_vm = false; + int ret; lockdep_assert_held(&dev->struct_mutex); if (flags & (I915_GEM_CONTEXT_FULL_PPGTT | I915_GEM_CONTEXT_ENABLE_SVM)) create_vm = true; - ctx = __create_hw_context(dev, file_priv); + ctx = __create_hw_context(dev, file_priv, flags); if (IS_ERR(ctx)) return ctx; @@ -360,19 +430,31 @@ i915_gem_create_context(struct drm_device *dev, i915_ppgtt_create(to_i915(dev), file_priv); if (IS_ERR(ppgtt)) { - DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", - PTR_ERR(ppgtt)); - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); - return ERR_CAST(ppgtt); + ret = PTR_ERR(ppgtt); + DRM_DEBUG_DRIVER("PPGTT setup failed (%d)\n", ret); + goto free_ctx; } ctx->ppgtt = ppgtt; } + if (flags & I915_GEM_CONTEXT_ENABLE_SVM) { + ret = i915_gem_context_svm_init(ctx); + if (ret) + goto free_ctx; + + ctx->flags |= CONTEXT_SVM; + } + trace_i915_context_create(ctx); return ctx; + +free_ctx: + idr_remove(&file_priv->context_idr, ctx->user_handle); + context_close(ctx); + + return ERR_PTR(ret); } /** @@ -987,6 +1069,7 @@ static bool contexts_enabled(struct drm_device *dev) int i915_gem_context_create2_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_context_create2 *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; @@ -1007,7 +1090,8 @@ int i915_gem_context_create2_ioctl(struct drm_device *dev, void *data, if (USES_FULL_PPGTT(dev)) flags |= I915_GEM_CONTEXT_FULL_PPGTT; - if (flags & I915_GEM_CONTEXT_ENABLE_SVM) { + if ((flags & I915_GEM_CONTEXT_ENABLE_SVM) && + !dev_priv->svm_available) { ret = -ENODEV; goto unlock; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d4adf2806c50..8eebb038622b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3062,6 +3062,24 @@ enum { INTEL_LEGACY_64B_CONTEXT }; +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) + +#define GEN8_CTX_FAULT_SHIFT 6 +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 +#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 +#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 #define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ INTEL_LEGACY_64B_CONTEXT : \ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6b49df4316f4..6e27cc83aa43 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -185,12 +185,6 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define GEN8_CTX_VALID (1<<0) -#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) -#define GEN8_CTX_FORCE_RESTORE (1<<2) -#define GEN8_CTX_L3LLC_COHERENT (1<<5) -#define GEN8_CTX_PRIVILEGE (1<<8) - #define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ @@ -207,16 +201,13 @@ reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ } while (0) -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 -#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 -#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 +#define ASSIGN_CTX_SVM(reg_state, ctx, engine) do { \ + ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, \ + GEN8_RING_PDP_UDW((engine), 0), 0); \ + ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, \ + GEN8_RING_PDP_LDW((engine), 0), (ctx)->pasid); \ +} while (0) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ @@ -270,10 +261,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) && (engine->id == VCS || engine->id == VCS2); - engine->ctx_desc_template = GEN8_CTX_VALID; - if (IS_GEN8(dev_priv)) - engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; - engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE; /* TODO: WaDisableLiteRestore when we start using semaphore * signalling between Command Streamers */ @@ -380,7 +367,9 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + if (!(rq->ctx->flags & CONTEXT_SVM) && + ppgtt && + !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) execlists_update_context_pdps(ppgtt, reg_state); } @@ -1399,7 +1388,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * it is unsafe in case of lite-restore (because the ctx is * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ - if (req->ctx->ppgtt && + + if (!(req->ctx->flags & CONTEXT_SVM) && + req->ctx->ppgtt && (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { if (!USES_FULL_48BIT_PPGTT(req->i915) && !intel_vgpu_active(req->i915)) { @@ -2057,7 +2048,9 @@ populate_lr_context(struct i915_gem_context *ctx, ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); - if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + if (ctx->flags & CONTEXT_SVM) { + ASSIGN_CTX_SVM(reg_state, ctx, engine); + } else if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx