+#define MI_LRM_USE_GLOBAL_GTT (1<<22)
+#define MI_LRM_ASYNC_MODE_ENABLE (1<<21)
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
@@ -1520,6 +1544,8 @@ enum skl_disp_power_wells {
#define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12)
#define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10)
+#define GEN8_RS_PREEMPT_STATUS 0x215C
+
/* Fuse readout registers for GT */
#define CHV_FUSE_GT (VLV_DISPLAY_BASE + 0x2168)
#define CHV_FGT_EU_DIS_SS0_R0_SHIFT 16
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 47473e5..b34ef48 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -202,6 +202,7 @@ enum {
FAULT_AND_CONTINUE /* Unsupported */
};
#define GEN8_CTX_ID_SHIFT 32
+#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
static int intel_lr_context_pin(struct intel_engine_cs *ring,
struct intel_context *ctx);
@@ -1107,6 +1108,208 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
return 0;
}
+static struct intel_ringbuffer *
+create_wa_bb(struct intel_engine_cs *ring, uint32_t bb_size)
+{
+ struct drm_device *dev = ring->dev;
+ struct intel_ringbuffer *ringbuf;
+ int ret;
+
+ ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
+ if (!ringbuf)
+ return NULL;
+
+ ringbuf->ring = ring;
+
+ ringbuf->size = roundup(bb_size, PAGE_SIZE);
+ ringbuf->effective_size = ringbuf->size;
+ ringbuf->head = 0;
+ ringbuf->tail = 0;
+ ringbuf->space = ringbuf->size;
+ ringbuf->last_retired_head = -1;
+
+ ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
+ ring->name, ret);
+ kfree(ringbuf);
+ return NULL;
+ }
+
+ ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+ if (ret) {
+ DRM_ERROR("Failed to pin and map %s w/a batch: %d\n",
+ ring->name, ret);
+ intel_destroy_ringbuffer_obj(ringbuf);
+ kfree(ringbuf);
+ return NULL;
+ }
+
+ return ringbuf;
+}
+
+static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
+ struct intel_context *ctx)
+{
+ unsigned long flags = 0;
+ u32 scratch_addr;
+ struct intel_ringbuffer *ringbuf = NULL;
+
+ if (ring->scratch.obj == NULL) {
+ DRM_ERROR("scratch page not allocated for %s\n", ring->name);
+ return -EINVAL;
+ }
+
+ ringbuf = create_wa_bb(ring, PAGE_SIZE);
+ if (!ringbuf)
+ return -ENOMEM;
+
+ ctx->indirect_ctx_wa_bb = ringbuf;
+
+ /* WaDisableCtxRestoreArbitration:bdw,chv */
+ intel_logical_ring_emit(ringbuf, MI_ARB_ON_OFF | MI_ARB_DISABLE);
+
+ /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw,chv */
+ intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+ intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+
+ /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+ flags = PIPE_CONTROL_FLUSH_RO_CACHES |
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE;
+
+ /* Actual scratch location is at 128 bytes offset */
+ scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
+ scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
+
+ intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+ intel_logical_ring_emit(ringbuf, flags);
+ intel_logical_ring_emit(ringbuf, scratch_addr);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+
+ /* Padding to align with cache line */
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+
+ /*
+ * No MI_BATCH_BUFFER_END is required in Indirect ctx BB because
+ * execution depends on the size defined in CTX_RCS_INDIRECT_CTX
+ */
+
+ return 0;
+}
+
+static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
+ struct intel_context *ctx)
+{
+ unsigned long flags = 0;
+ u32 scratch_addr;
+ struct intel_ringbuffer *ringbuf = NULL;
+
+ if (ring->scratch.obj == NULL) {
+ DRM_ERROR("scratch page not allocated for %s\n", ring->name);
+ return -EINVAL;
+ }
+
+ ringbuf = create_wa_bb(ring, PAGE_SIZE);
+ if (!ringbuf)
+ return -ENOMEM;
+
+ ctx->per_ctx_wa_bb = ringbuf;
+
+ /* Actual scratch location is at 128 bytes offset */
+ scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
+ scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
+
+ /* WaDisableCtxRestoreArbitration:bdw,chv */
+ intel_logical_ring_emit(ringbuf, MI_ARB_ON_OFF | MI_ARB_ENABLE);
+
+ /*
+ * As per Bspec, to workaround a known HW issue, SW must perform the
+ * below programming sequence prior to programming MI_BATCH_BUFFER_END.
+ *
+ * This is only applicable for Gen8.
+ */
+
+ /* WaRsRestoreWithPerCtxtBb:bdw,chv */
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+ intel_logical_ring_emit(ringbuf, INSTPM);
+ intel_logical_ring_emit(ringbuf,
+ _MASKED_BIT_DISABLE(INSTPM_FORCE_ORDERING));
+
+ flags = MI_ATOMIC_MEMORY_TYPE_GGTT |
+ MI_ATOMIC_INLINE_DATA |
+ MI_ATOMIC_CS_STALL |
+ MI_ATOMIC_RETURN_DATA_CTL |
+ MI_ATOMIC_MOVE;
+
+ intel_logical_ring_emit(ringbuf, MI_ATOMIC(5) | flags);
+ intel_logical_ring_emit(ringbuf, scratch_addr);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf,
+ _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+ intel_logical_ring_emit(ringbuf,
+ _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+
+ /*
+ * Bspec says MI_LOAD_REGISTER_MEM, MI_LOAD_REGISTER_REG and
+ * MI_BATCH_BUFFER_END need to be in the same cacheline.
+ */
+ while (((unsigned long) ringbuf->tail % CACHELINE_BYTES) != 0)
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+
+ intel_logical_ring_emit(ringbuf,
+ MI_LOAD_REGISTER_MEM |
+ MI_LRM_USE_GLOBAL_GTT |
+ MI_LRM_ASYNC_MODE_ENABLE);
+ intel_logical_ring_emit(ringbuf, INSTPM);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, scratch_addr);
+
+ /*
+ * Bspec says there should not be any commands programmed
+ * between MI_LOAD_REGISTER_REG and MI_BATCH_BUFFER_END so
+ * do not add any new commands
+ */
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_REG);
+ intel_logical_ring_emit(ringbuf, GEN8_RS_PREEMPT_STATUS);
+ intel_logical_ring_emit(ringbuf, GEN8_RS_PREEMPT_STATUS);
+
+ intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_END);
+
+ return 0;
+}
+
+static int intel_init_workaround_bb(struct intel_engine_cs *ring,
+ struct intel_context *ctx)
+{
+ int ret;
+ struct drm_device *dev = ring->dev;
+
+ WARN_ON(ring->id != RCS);
+
+ if (IS_GEN8(dev)) {
+ ret = gen8_init_indirectctx_bb(ring, ctx);
+ if (ret)
+ return ret;
+
+ ret = gen8_init_perctx_bb(ring, ctx);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+}
+
static int gen8_init_common_ring(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
@@ -1337,38 +1540,6 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf,
return 0;
}
-static struct intel_ringbuffer *
-create_wa_bb(struct intel_engine_cs *ring, uint32_t bb_size)
-{
- struct drm_device *dev = ring->dev;
- struct intel_ringbuffer *ringbuf;
- int ret;
-
- ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
- if (!ringbuf)
- return NULL;
-
- ringbuf->ring = ring;
-
- ringbuf->size = roundup(bb_size, PAGE_SIZE);
- ringbuf->effective_size = ringbuf->size;
- ringbuf->head = 0;
- ringbuf->tail = 0;
- ringbuf->space = ringbuf->size;
- ringbuf->last_retired_head = -1;
-
- ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
- if (ret) {
- DRM_DEBUG_DRIVER(
- "Failed to allocate ringbuf obj for wa_bb%s: %d\n",
- ring->name, ret);
- kfree(ringbuf);
- return NULL;
- }
-
- return ringbuf;
-}
-
static int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
struct intel_context *ctx)
{
@@ -1490,6 +1661,7 @@ static int logical_render_ring_init(struct drm_device *dev)
else
ring->init_hw = gen8_init_render_ring;
ring->init_context = gen8_init_rcs_context;
+ ring->init_context_bb = intel_init_workaround_bb;
ring->cleanup = intel_fini_pipe_control;
ring->get_seqno = gen8_get_seqno;
ring->set_seqno = gen8_set_seqno;
@@ -1500,11 +1672,16 @@ static int logical_render_ring_init(struct drm_device *dev)
ring->emit_bb_start = gen8_emit_bb_start;
ring->dev = dev;
+
+ ret = intel_init_pipe_control(ring);
+ if (ret)
+ return ret;
+
ret = logical_ring_init(dev, ring);
if (ret)
return ret;
- return intel_init_pipe_control(ring);
+ return 0;
}
static int logical_bsd_ring_init(struct drm_device *dev)
@@ -1784,15 +1961,29 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
reg_state[CTX_SECOND_BB_STATE+1] = 0;
if (ring->id == RCS) {
- /* TODO: according to BSpec, the register state context
- * for CHV does not have these. OTOH, these registers do
- * exist in CHV. I'm waiting for a clarification */
reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
- reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
+
+ if (ctx->per_ctx_wa_bb)
+ reg_state[CTX_BB_PER_CTX_PTR + 1] =
+ i915_gem_obj_ggtt_offset(
+ ctx->per_ctx_wa_bb->obj) | 0x01;
+ else
+ reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
+
reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
- reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
- reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
+
+ if (ctx->indirect_ctx_wa_bb) {
+ reg_state[CTX_RCS_INDIRECT_CTX + 1] =
+ i915_gem_obj_ggtt_offset(
+ ctx->indirect_ctx_wa_bb->obj) | 0x01;
+
+ reg_state[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
+ CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
+ } else {
+ reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
+ reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
+ }
}
reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
@@ -1859,6 +2050,18 @@ void intel_lr_context_free(struct intel_context *ctx)
drm_gem_object_unreference(&ctx_obj->base);
}
}
+
+ if (ctx->indirect_ctx_wa_bb) {
+ intel_unpin_ringbuffer_obj(ctx->indirect_ctx_wa_bb);
+ intel_destroy_ringbuffer_obj(ctx->indirect_ctx_wa_bb);
+ kfree(ctx->indirect_ctx_wa_bb);
+ }
+
+ if (ctx->per_ctx_wa_bb) {
+ intel_unpin_ringbuffer_obj(ctx->per_ctx_wa_bb);
+ intel_destroy_ringbuffer_obj(ctx->per_ctx_wa_bb);
+ kfree(ctx->per_ctx_wa_bb);
+ }
}
static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
@@ -1985,6 +2188,16 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
}
+ if (ring->id == RCS && !ctx->rcs_initialized) {
+ if (ring->init_context_bb) {
+ ret = ring->init_context_bb(ring, ctx);
+ if (ret) {
+ DRM_ERROR("ring init context bb: %d\n", ret);
+ goto error;
+ }
+ }
+ }
+
ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
if (ret) {
DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
@@ -2013,6 +2226,17 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
return 0;
error:
+ if (ctx->indirect_ctx_wa_bb) {
+ intel_unpin_ringbuffer_obj(ctx->indirect_ctx_wa_bb);
+ intel_destroy_ringbuffer_obj(ctx->indirect_ctx_wa_bb);
+ kfree(ctx->indirect_ctx_wa_bb);
+ }
+ if (ctx->per_ctx_wa_bb) {
+ intel_unpin_ringbuffer_obj(ctx->per_ctx_wa_bb);
+ intel_destroy_ringbuffer_obj(ctx->per_ctx_wa_bb);
+ kfree(ctx->per_ctx_wa_bb);
+ }
+
if (is_global_default_ctx)
intel_unpin_ringbuffer_obj(ringbuf);
error_destroy_rbuf:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 39183fc..839d698 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -146,6 +146,9 @@ struct intel_engine_cs {
int (*init_context)(struct intel_engine_cs *ring,
struct intel_context *ctx);
+ int (*init_context_bb)(struct intel_engine_cs *ring,
+ struct intel_context *ctx);
+
void (*write_tail)(struct intel_engine_cs *ring,
u32 value);
int __must_check (*flush)(struct intel_engine_cs *ring,