Split the definition, construction and updating of the Logical Ring Context from the execlist submission interface. The LRC is used by the HW, irrespective of our different submission backends. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_context_sseu.c | 3 +- .../drm/i915/gt/intel_execlists_submission.c | 1683 +-------------- .../drm/i915/gt/intel_execlists_submission.h | 17 - drivers/gpu/drm/i915/gt/intel_lrc.c | 1561 ++++++++++++++ drivers/gpu/drm/i915/gt/intel_lrc.h | 82 + drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 2 + drivers/gpu/drm/i915/gt/selftest_execlists.c | 1776 +--------------- drivers/gpu/drm/i915/gt/selftest_lrc.c | 1861 +++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 26 +- drivers/gpu/drm/i915/gvt/scheduler.c | 1 + drivers/gpu/drm/i915/i915_perf.c | 2 +- 14 files changed, 3597 insertions(+), 3422 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_lrc.c create mode 100644 drivers/gpu/drm/i915/gt/intel_lrc.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_lrc.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f9ef5199b124..849c7b3fc941 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -104,6 +104,7 @@ gt-y += \ gt/intel_gt_requests.o \ gt/intel_gtt.o \ gt/intel_llc.o \ + gt/intel_lrc.o \ gt/intel_mocs.o \ gt/intel_ppgtt.o \ gt/intel_rc6.o \ diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 9c6f0ebfa3cf..1972dd5dca00 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -5,7 +5,7 @@ #include "gen8_engine_cs.h" #include "i915_drv.h" -#include "intel_execlists_submission.h" /* XXX */ +#include "intel_lrc.h" #include "intel_gpu_commands.h" #include "intel_ring.h" diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index 5f94b44022dc..8dfd8f656aaa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -8,8 +8,7 @@ #include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gpu_commands.h" -#include "intel_execlists_submission.h" -#include "intel_lrc_reg.h" +#include "intel_lrc.h" #include "intel_ring.h" #include "intel_sseu.h" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index dcecc2887891..358fd2455f6e 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -109,7 +109,6 @@ #include <linux/interrupt.h> #include "i915_drv.h" -#include "i915_perf.h" #include "i915_trace.h" #include "i915_vgpu.h" #include "gen8_engine_cs.h" @@ -120,6 +119,7 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_lrc.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -144,8 +144,6 @@ #define GEN8_CTX_STATUS_COMPLETED_MASK \ (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) -#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) - #define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ #define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ #define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) @@ -205,136 +203,6 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } -static int __execlists_context_alloc(struct intel_context *ce, - struct intel_engine_cs *engine); - -static void execlists_init_reg_state(u32 *reg_state, - const struct intel_context *ce, - const struct intel_engine_cs *engine, - const struct intel_ring *ring, - bool close); -static void -__execlists_update_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine, - u32 head); - -static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x60; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x54; - else if (engine->class == RENDER_CLASS) - return 0x58; - else - return -1; -} - -static int lrc_ring_gpr0(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x74; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x68; - else if (engine->class == RENDER_CLASS) - return 0xd8; - else - return -1; -} - -static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x12; - else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) - return 0x18; - else - return -1; -} - -static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) -{ - int x; - - x = lrc_ring_wa_bb_per_ctx(engine); - if (x < 0) - return x; - - return x + 2; -} - -static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) -{ - int x; - - x = lrc_ring_indirect_ptr(engine); - if (x < 0) - return x; - - return x + 2; -} - -static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) -{ - if (engine->class != RENDER_CLASS) - return -1; - - if (INTEL_GEN(engine->i915) >= 12) - return 0xb6; - else if (INTEL_GEN(engine->i915) >= 11) - return 0xaa; - else - return -1; -} - -static u32 -lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) -{ - switch (INTEL_GEN(engine->i915)) { - default: - MISSING_CASE(INTEL_GEN(engine->i915)); - fallthrough; - case 12: - return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 11: - return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 10: - return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 9: - return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 8: - return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - } -} - -static void -lrc_ring_setup_indirect_ctx(u32 *regs, - const struct intel_engine_cs *engine, - u32 ctx_bb_ggtt_addr, - u32 size) -{ - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); - GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); - regs[lrc_ring_indirect_ptr(engine) + 1] = - ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); - - GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); - regs[lrc_ring_indirect_offset(engine) + 1] = - lrc_ring_indirect_offset_default(engine) << 6; -} - -static u32 intel_context_get_runtime(const struct intel_context *ce) -{ - /* - * We can use either ppHWSP[16] which is recorded before the context - * switch (and so excludes the cost of context switches) or use the - * value from the context image itself, which is saved/restored earlier - * and so includes the cost of the save. - */ - return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); -} - static void mark_eio(struct i915_request *rq) { if (i915_request_completed(rq)) @@ -513,568 +381,6 @@ assert_priority_queue(const struct i915_request *prev, return rq_prio(prev) >= rq_prio(next); } -/* - * The context descriptor encodes various attributes of a context, - * including its GTT address and some flags. Because it's fairly - * expensive to calculate, we'll just do it once and cache the result, - * which remains valid until the context is unpinned. - * - * This is what a descriptor looks like, from LSB to MSB:: - * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) - * bits 12-31: LRCA, GTT address of (the HWSP of) this context - * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) - * bits 53-54: mbz, reserved for use by hardware - * bits 55-63: group ID, currently unused and set to 0 - * - * Starting from Gen11, the upper dword of the descriptor has a new format: - * - * bits 32-36: reserved - * bits 37-47: SW context ID - * bits 48:53: engine instance - * bit 54: mbz, reserved for use by hardware - * bits 55-60: SW counter - * bits 61-63: engine class - * - * engine info, SW context ID and SW counter need to form a unique number - * (Context ID) per lrc. - */ -static u32 -lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) -{ - u32 desc; - - desc = INTEL_LEGACY_32B_CONTEXT; - if (i915_vm_is_4lvl(ce->vm)) - desc = INTEL_LEGACY_64B_CONTEXT; - desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; - - desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - if (IS_GEN(engine->i915, 8)) - desc |= GEN8_CTX_L3LLC_COHERENT; - - return i915_ggtt_offset(ce->state) | desc; -} - -static inline unsigned int dword_in_page(void *addr) -{ - return offset_in_page(addr) / sizeof(u32); -} - -static void set_offsets(u32 *regs, - const u8 *data, - const struct intel_engine_cs *engine, - bool clear) -#define NOP(x) (BIT(7) | (x)) -#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) -#define POSTED BIT(0) -#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) -#define REG16(x) \ - (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ - (((x) >> 2) & 0x7f) -#define END(total_state_size) 0, (total_state_size) -{ - const u32 base = engine->mmio_base; - - while (*data) { - u8 count, flags; - - if (*data & BIT(7)) { /* skip */ - count = *data++ & ~BIT(7); - if (clear) - memset32(regs, MI_NOOP, count); - regs += count; - continue; - } - - count = *data & 0x3f; - flags = *data >> 6; - data++; - - *regs = MI_LOAD_REGISTER_IMM(count); - if (flags & POSTED) - *regs |= MI_LRI_FORCE_POSTED; - if (INTEL_GEN(engine->i915) >= 11) - *regs |= MI_LRI_LRM_CS_MMIO; - regs++; - - GEM_BUG_ON(!count); - do { - u32 offset = 0; - u8 v; - - do { - v = *data++; - offset <<= 7; - offset |= v & ~BIT(7); - } while (v & BIT(7)); - - regs[0] = base + (offset << 2); - if (clear) - regs[1] = 0; - regs += 2; - } while (--count); - } - - if (clear) { - u8 count = *++data; - - /* Clear past the tail for HW access */ - GEM_BUG_ON(dword_in_page(regs) > count); - memset32(regs, MI_NOOP, count - dword_in_page(regs)); - - /* Close the batch; used mainly by live_lrc_layout() */ - *regs = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - *regs |= BIT(0); - } -} - -static const u8 gen8_xcs_offsets[] = { - NOP(1), - LRI(11, 0), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x11c), - REG(0x114), - REG(0x118), - - NOP(9), - LRI(9, 0), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(13), - LRI(2, 0), - REG16(0x200), - REG(0x028), - - END(80) -}; - -static const u8 gen9_xcs_offsets[] = { - NOP(1), - LRI(14, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x11c), - REG(0x114), - REG(0x118), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - - NOP(3), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(13), - LRI(1, POSTED), - REG16(0x200), - - NOP(13), - LRI(44, POSTED), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - - END(176) -}; - -static const u8 gen12_xcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - END(80) -}; - -static const u8 gen8_rcs_offsets[] = { - NOP(1), - LRI(14, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x11c), - REG(0x114), - REG(0x118), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - - NOP(3), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(13), - LRI(1, 0), - REG(0x0c8), - - END(80) -}; - -static const u8 gen9_rcs_offsets[] = { - NOP(1), - LRI(14, POSTED), - REG16(0x244), - REG(0x34), - REG(0x30), - REG(0x38), - REG(0x3c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x11c), - REG(0x114), - REG(0x118), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - - NOP(3), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(13), - LRI(1, 0), - REG(0xc8), - - NOP(13), - LRI(44, POSTED), - REG(0x28), - REG(0x9c), - REG(0xc0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x68), - - END(176) -}; - -static const u8 gen11_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x11c), - REG(0x114), - REG(0x118), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(1, POSTED), - REG(0x1b0), - - NOP(10), - LRI(1, 0), - REG(0x0c8), - - END(80) -}; - -static const u8 gen12_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - NOP(3 + 9 + 1), - - LRI(51, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - REG(0x084), - NOP(1), - - END(192) -}; - -#undef END -#undef REG16 -#undef REG -#undef LRI -#undef NOP - -static const u8 *reg_offsets(const struct intel_engine_cs *engine) -{ - /* - * The gen12+ lists only have the registers we program in the basic - * default state. We rely on the context image using relative - * addressing to automatic fixup the register state between the - * physical engines for virtual engine. - */ - GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && - !intel_engine_has_relative_mmio(engine)); - - if (engine->class == RENDER_CLASS) { - if (INTEL_GEN(engine->i915) >= 12) - return gen12_rcs_offsets; - else if (INTEL_GEN(engine->i915) >= 11) - return gen11_rcs_offsets; - else if (INTEL_GEN(engine->i915) >= 9) - return gen9_rcs_offsets; - else - return gen8_rcs_offsets; - } else { - if (INTEL_GEN(engine->i915) >= 12) - return gen12_xcs_offsets; - else if (INTEL_GEN(engine->i915) >= 9) - return gen9_xcs_offsets; - else - return gen8_xcs_offsets; - } -} - static struct i915_request * __unwind_incomplete_requests(struct intel_engine_cs *engine) { @@ -1187,58 +493,6 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } -static void -execlists_check_context(const struct intel_context *ce, - const struct intel_engine_cs *engine, - const char *when) -{ - const struct intel_ring *ring = ce->ring; - u32 *regs = ce->lrc_reg_state; - bool valid = true; - int x; - - if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { - pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", - engine->name, - regs[CTX_RING_START], - i915_ggtt_offset(ring->vma)); - regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); - valid = false; - } - - if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != - (RING_CTL_SIZE(ring->size) | RING_VALID)) { - pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", - engine->name, - regs[CTX_RING_CTL], - (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); - regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; - valid = false; - } - - x = lrc_ring_mi_mode(engine); - if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { - pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", - engine->name, regs[x + 1]); - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; - valid = false; - } - - WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); -} - -static void restore_default_state(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - u32 *regs; - - regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE); - execlists_init_reg_state(regs, ce, engine, ce->ring, true); - - ce->runtime.last = intel_context_get_runtime(ce); -} - static void reset_active(struct i915_request *rq, struct intel_engine_cs *engine) { @@ -1271,42 +525,10 @@ static void reset_active(struct i915_request *rq, head = intel_ring_wrap(ce->ring, head); /* Scrub the context image to prevent replaying the previous batch */ - restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine, head); + lrc_init_regs(ce, engine, true); /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; -} - -static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) -{ -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - ce->runtime.num_underflow++; - ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); -#endif -} - -static void intel_context_update_runtime(struct intel_context *ce) -{ - u32 old; - s32 dt; - - if (intel_context_is_barrier(ce)) - return; - - old = ce->runtime.last; - ce->runtime.last = intel_context_get_runtime(ce); - dt = ce->runtime.last - old; - - if (unlikely(dt < 0)) { - CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", - old, ce->runtime.last, dt); - st_update_runtime_underflow(ce, dt); - return; - } - - ewma_runtime_add(&ce->runtime.avg, dt); - ce->runtime.total += dt; + ce->lrc.lrca = lrc_update_regs(ce, engine, head); } static inline struct intel_engine_cs * @@ -1321,7 +543,7 @@ __execlists_schedule_in(struct i915_request *rq) reset_active(rq, engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - execlists_check_context(ce, engine, "before"); + lrc_check_regs(ce, engine, "before"); if (ce->tag) { /* Use a fixed tag for OA and friends */ @@ -1393,7 +615,7 @@ __execlists_schedule_out(struct i915_request *rq, */ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - execlists_check_context(ce, engine, "after"); + lrc_check_regs(ce, engine, "after"); /* * If we have just completed this context, the engine may now be @@ -1411,7 +633,7 @@ __execlists_schedule_out(struct i915_request *rq, set_bit(ccid - 1, &engine->context_tag); } - intel_context_update_runtime(ce); + lrc_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) @@ -1752,12 +974,6 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } -static void virtual_update_register_offsets(u32 *regs, - struct intel_engine_cs *engine) -{ - set_offsets(regs, reg_offsets(engine), engine, false); -} - static bool virtual_matches(const struct virtual_engine *ve, const struct i915_request *rq, const struct intel_engine_cs *engine) @@ -1793,8 +1009,7 @@ static void virtual_xfer_context(struct virtual_engine *ve, GEM_BUG_ON(READ_ONCE(ve->context.inflight)); if (!intel_engine_has_relative_mmio(engine)) - virtual_update_register_offsets(ve->context.lrc_reg_state, - engine); + lrc_update_offsets(&ve->context, engine); /* * Move the bound engine to the top of the list for @@ -3287,248 +2502,55 @@ static void execlists_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->active.lock, flags); } -static void __execlists_context_fini(struct intel_context *ce) -{ - intel_ring_put(ce->ring); - i915_vma_put(ce->state); -} - -static void execlists_context_destroy(struct kref *kref) +static int execlists_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) { - struct intel_context *ce = container_of(kref, typeof(*ce), ref); - - GEM_BUG_ON(!i915_active_is_idle(&ce->active)); - GEM_BUG_ON(intel_context_is_pinned(ce)); - - if (ce->state) - __execlists_context_fini(ce); - - intel_context_fini(ce); - intel_context_free(ce); -} - -static void -set_redzone(void *vaddr, const struct intel_engine_cs *engine) -{ - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; - - vaddr += engine->context_size; - - memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); -} - -static void -check_redzone(const void *vaddr, const struct intel_engine_cs *engine) -{ - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; - - vaddr += engine->context_size; - - if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) - drm_err_once(&engine->i915->drm, - "%s context redzone overwritten!\n", - engine->name); -} - -static void execlists_context_unpin(struct intel_context *ce) -{ - check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, - ce->engine); -} - -static void execlists_context_post_unpin(struct intel_context *ce) -{ - i915_gem_object_unpin_map(ce->state->obj); -} - -static u32 * -gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT | - MI_LRI_LRM_CS_MMIO; - *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); - *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + - CTX_TIMESTAMP * sizeof(u32); - *cs++ = 0; - - *cs++ = MI_LOAD_REGISTER_REG | - MI_LRR_SOURCE_CS_MMIO | - MI_LRI_LRM_CS_MMIO; - *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); - *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); - - *cs++ = MI_LOAD_REGISTER_REG | - MI_LRR_SOURCE_CS_MMIO | - MI_LRI_LRM_CS_MMIO; - *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); - *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); - - return cs; + return lrc_pre_pin(ce, ce->engine, ww, vaddr); } -static u32 * -gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) -{ - GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); - - *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT | - MI_LRI_LRM_CS_MMIO; - *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); - *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + - (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); - *cs++ = 0; - - return cs; -} - -static u32 * -gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) -{ - GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); - - *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT | - MI_LRI_LRM_CS_MMIO; - *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); - *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + - (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); - *cs++ = 0; - - *cs++ = MI_LOAD_REGISTER_REG | - MI_LRR_SOURCE_CS_MMIO | - MI_LRI_LRM_CS_MMIO; - *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); - *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); - - return cs; -} - -static u32 * -gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) -{ - cs = gen12_emit_timestamp_wa(ce, cs); - cs = gen12_emit_cmd_buf_wa(ce, cs); - cs = gen12_emit_restore_scratch(ce, cs); - - return cs; -} - -static u32 * -gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) -{ - cs = gen12_emit_timestamp_wa(ce, cs); - cs = gen12_emit_restore_scratch(ce, cs); - - return cs; -} - -static inline u32 context_wa_bb_offset(const struct intel_context *ce) -{ - return PAGE_SIZE * ce->wa_bb_page; -} - -static u32 *context_indirect_bb(const struct intel_context *ce) -{ - void *ptr; - - GEM_BUG_ON(!ce->wa_bb_page); - - ptr = ce->lrc_reg_state; - ptr -= LRC_STATE_OFFSET; /* back to start of context image */ - ptr += context_wa_bb_offset(ce); - - return ptr; -} - -static void -setup_indirect_ctx_bb(const struct intel_context *ce, - const struct intel_engine_cs *engine, - u32 *(*emit)(const struct intel_context *, u32 *)) +static int execlists_context_pin(struct intel_context *ce, void *vaddr) { - u32 * const start = context_indirect_bb(ce); - u32 *cs; - - cs = emit(ce, start); - GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); - while ((unsigned long)cs % CACHELINE_BYTES) - *cs++ = MI_NOOP; - - lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine, - i915_ggtt_offset(ce->state) + - context_wa_bb_offset(ce), - (cs - start) * sizeof(*cs)); + return lrc_pin(ce, ce->engine, vaddr); } -static void -__execlists_update_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine, - u32 head) +static int __lrc_setup(struct intel_context *ce, + struct intel_engine_cs *engine) { - struct intel_ring *ring = ce->ring; - u32 *regs = ce->lrc_reg_state; - - GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); - GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - - regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD] = head; - regs[CTX_RING_TAIL] = ring->tail; - regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; - - /* RPCS */ - if (engine->class == RENDER_CLASS) { - regs[CTX_R_PWR_CLK_STATE] = - intel_sseu_make_rpcs(engine->gt, &ce->sseu); + struct drm_i915_gem_object *obj = ce->state->obj; + void *vaddr; - i915_oa_init_reg_state(ce, engine); + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + drm_dbg(&engine->i915->drm, "Could not map object pages!\n"); + return PTR_ERR(vaddr); } - if (ce->wa_bb_page) { - u32 *(*fn)(const struct intel_context *ce, u32 *cs); - - fn = gen12_emit_indirect_ctx_xcs; - if (ce->engine->class == RENDER_CLASS) - fn = gen12_emit_indirect_ctx_rcs; + lrc_init_state(ce, engine, vaddr); - /* Mutually exclusive wrt to global indirect bb */ - GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); - setup_indirect_ctx_bb(ce, engine, fn); - } + __i915_gem_object_flush_map(obj, 0, engine->context_size); + i915_gem_object_unpin_map(obj); + return 0; } -static int -execlists_context_pre_pin(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, void **vaddr) +static int __execlists_context_alloc(struct intel_context *ce, + struct intel_engine_cs *engine) { - GEM_BUG_ON(!ce->state); - GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - - *vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(ce->engine->i915) | - I915_MAP_OVERRIDE); + int err; - return PTR_ERR_OR_ZERO(*vaddr); -} + err = lrc_alloc(ce, engine); + if (err) + return err; -static int -__execlists_context_pin(struct intel_context *ce, - struct intel_engine_cs *engine, - void *vaddr) -{ - ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; - ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; - __execlists_update_reg_state(ce, engine, ce->ring->tail); + err = __lrc_setup(ce, engine); + if (err) + goto err_lrc; return 0; -} -static int execlists_context_pin(struct intel_context *ce, void *vaddr) -{ - return __execlists_context_pin(ce, ce->engine, vaddr); +err_lrc: + lrc_fini(ce); + return err; } static int execlists_context_alloc(struct intel_context *ce) @@ -3536,34 +2558,19 @@ static int execlists_context_alloc(struct intel_context *ce) return __execlists_context_alloc(ce, ce->engine); } -static void execlists_context_reset(struct intel_context *ce) -{ - CE_TRACE(ce, "reset\n"); - GEM_BUG_ON(!intel_context_is_pinned(ce)); - - intel_ring_reset(ce->ring, ce->ring->emit); - - /* Scrub away the garbage */ - execlists_init_reg_state(ce->lrc_reg_state, - ce, ce->engine, ce->ring, true); - __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - - ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; -} - static const struct intel_context_ops execlists_context_ops = { .alloc = execlists_context_alloc, .pre_pin = execlists_context_pre_pin, .pin = execlists_context_pin, - .unpin = execlists_context_unpin, - .post_unpin = execlists_context_post_unpin, + .unpin = lrc_unpin, + .post_unpin = lrc_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, - .reset = execlists_context_reset, - .destroy = execlists_context_destroy, + .reset = lrc_reset, + .destroy = lrc_destroy, }; static int emit_pdps(struct i915_request *rq) @@ -3650,330 +2657,6 @@ static int execlists_request_alloc(struct i915_request *request) return 0; } -/* - * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after - * PIPE_CONTROL instruction. This is required for the flush to happen correctly - * but there is a slight complication as this is applied in WA batch where the - * values are only initialized once so we cannot take register value at the - * beginning and reuse it further; hence we save its value to memory, upload a - * constant value with bit21 set and then we restore it back with the saved value. - * To simplify the WA, a constant value is formed by using the default value - * of this register. This shouldn't be a problem because we are only modifying - * it for a short period and this batch in non-premptible. We can ofcourse - * use additional instructions that read the actual value of the register - * at that time and set our bit of interest but it makes the WA complicated. - * - * This WA is also required for Gen9 so extracting as a function avoids - * code duplication. - */ -static u32 * -gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) -{ - /* NB no one else is allowed to scribble over scratch + 256! */ - *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); - *batch++ = 0; - - *batch++ = MI_LOAD_REGISTER_IMM(1); - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; - - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE, - 0); - - *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); - *batch++ = 0; - - return batch; -} - -/* - * Typically we only have one indirect_ctx and per_ctx batch buffer which are - * initialized at the beginning and shared across all contexts but this field - * helps us to have multiple batches at different offsets and select them based - * on a criteria. At the moment this batch always start at the beginning of the page - * and at this point we don't have multiple wa_ctx batch buffers. - * - * The number of WA applied are not known at the beginning; we use this field - * to return the no of DWORDS written. - * - * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END - * so it adds NOOPs as padding to make it cacheline aligned. - * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together - * makes a complete batch buffer. - */ -static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - /* WaDisableCtxRestoreArbitration:bdw,chv */ - *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - - /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ - if (IS_BROADWELL(engine->i915)) - batch = gen8_emit_flush_coherentl3_wa(engine, batch); - - /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ - /* Actual scratch location is at 128 bytes offset */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_STORE_DATA_INDEX | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE, - LRC_PPHWSP_SCRATCH_ADDR); - - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - /* - * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because - * execution depends on the length specified in terms of cache lines - * in the register CTX_RCS_INDIRECT_CTX - */ - - return batch; -} - -struct lri { - i915_reg_t reg; - u32 value; -}; - -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) -{ - GEM_BUG_ON(!count || count > 63); - - *batch++ = MI_LOAD_REGISTER_IMM(count); - do { - *batch++ = i915_mmio_reg_offset(lri->reg); - *batch++ = lri->value; - } while (lri++, --count); - *batch++ = MI_NOOP; - - return batch; -} - -static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - static const struct lri lri[] = { - /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ - { - COMMON_SLICE_CHICKEN2, - __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, - 0), - }, - - /* BSpec: 11391 */ - { - FF_SLICE_CHICKEN, - __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, - FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), - }, - - /* BSpec: 11299 */ - { - _3D_CHICKEN3, - __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, - _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), - } - }; - - *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ - batch = gen8_emit_flush_coherentl3_wa(engine, batch); - - /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_STORE_DATA_INDEX | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE, - LRC_PPHWSP_SCRATCH_ADDR); - - batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); - - /* WaMediaPoolStateCmdInWABB:bxt,glk */ - if (HAS_POOLED_EU(engine->i915)) { - /* - * EU pool configuration is setup along with golden context - * during context initialization. This value depends on - * device type (2x6 or 3x6) and needs to be updated based - * on which subslice is disabled especially for 2x6 - * devices, however it is safe to load default - * configuration of 3x6 device instead of masking off - * corresponding bits because HW ignores bits of a disabled - * subslice and drops down to appropriate config. Please - * see render_state_setup() in i915_gem_render_state.c for - * possible configurations, to avoid duplication they are - * not shown here again. - */ - *batch++ = GEN9_MEDIA_POOL_STATE; - *batch++ = GEN9_MEDIA_POOL_ENABLE; - *batch++ = 0x00777000; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; - } - - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - return batch; -} - -static u32 * -gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - int i; - - /* - * WaPipeControlBefore3DStateSamplePattern: cnl - * - * Ensure the engine is idle prior to programming a - * 3DSTATE_SAMPLE_PATTERN during a context restore. - */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_CS_STALL, - 0); - /* - * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for - * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in - * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is - * confusing. Since gen8_emit_pipe_control() already advances the - * batch by 6 dwords, we advance the other 10 here, completing a - * cacheline. It's not clear if the workaround requires this padding - * before other commands, or if it's just the regular padding we would - * already have for the workaround bb, so leave it here for now. - */ - for (i = 0; i < 10; i++) - *batch++ = MI_NOOP; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - return batch; -} - -#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) - -static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); - if (err) - goto err; - - engine->wa_ctx.vma = vma; - return 0; - -err: - i915_gem_object_put(obj); - return err; -} - -static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) -{ - i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); -} - -typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); - -static int intel_init_workaround_bb(struct intel_engine_cs *engine) -{ - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, - &wa_ctx->per_ctx }; - wa_bb_func_t wa_bb_fn[2]; - void *batch, *batch_ptr; - unsigned int i; - int ret; - - if (engine->class != RENDER_CLASS) - return 0; - - switch (INTEL_GEN(engine->i915)) { - case 12: - case 11: - return 0; - case 10: - wa_bb_fn[0] = gen10_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; - case 9: - wa_bb_fn[0] = gen9_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; - case 8: - wa_bb_fn[0] = gen8_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; - default: - MISSING_CASE(INTEL_GEN(engine->i915)); - return 0; - } - - ret = lrc_setup_wa_ctx(engine); - if (ret) { - drm_dbg(&engine->i915->drm, - "Failed to setup context WA page: %d\n", ret); - return ret; - } - - batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); - - /* - * Emit the two workaround batch buffers, recording the offset from the - * start of the workaround batch buffer object for each and their - * respective sizes. - */ - batch_ptr = batch; - for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { - wa_bb[i]->offset = batch_ptr - batch; - if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, - CACHELINE_BYTES))) { - ret = -EINVAL; - break; - } - if (wa_bb_fn[i]) - batch_ptr = wa_bb_fn[i](engine, batch_ptr); - wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); - } - GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); - - __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); - __i915_gem_object_release_map(wa_ctx->vma->obj); - if (ret) - lrc_destroy_wa_ctx(engine); - - return ret; -} - static void reset_csb_pointers(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -4185,25 +2868,6 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) engine->execlists.reset_ccid = active_ccid(engine); } -static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) -{ - int x; - - x = lrc_ring_mi_mode(engine); - if (x != -1) { - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; - } -} - -static void __execlists_reset_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine) -{ - u32 *regs = ce->lrc_reg_state; - - __reset_stop_ring(regs, engine); -} - static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -4287,9 +2951,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) out_replay: ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", head, ce->ring->tail); - __execlists_reset_reg_state(ce, engine); - __execlists_update_reg_state(ce, engine, head); - ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + lrc_reset_regs(ce, engine); + ce->lrc.lrca = lrc_update_regs(ce, engine, head); unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -4487,7 +3150,7 @@ static void execlists_release(struct intel_engine_cs *engine) execlists_shutdown(engine); intel_engine_cleanup_common(engine); - lrc_destroy_wa_ctx(engine); + lrc_fini_wa_ctx(engine); } static void @@ -4581,7 +3244,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) if (engine->class == RENDER_CLASS) rcs_submission_override(engine); - if (intel_init_workaround_bb(engine)) + if (lrc_init_wa_ctx(engine)) /* * We continue even if we fail to initialize WA batch * because we only expect rare glitches but nothing @@ -4622,218 +3285,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) return 0; } -static void init_common_reg_state(u32 * const regs, - const struct intel_engine_cs *engine, - const struct intel_ring *ring, - bool inhibit) -{ - u32 ctl; - - ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); - ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (inhibit) - ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; - if (INTEL_GEN(engine->i915) < 11) - ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); - regs[CTX_CONTEXT_CONTROL] = ctl; - - regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; - regs[CTX_TIMESTAMP] = 0; -} - -static void init_wa_bb_reg_state(u32 * const regs, - const struct intel_engine_cs *engine) -{ - const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; - - if (wa_ctx->per_ctx.size) { - const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); - regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } - - if (wa_ctx->indirect_ctx.size) { - lrc_ring_setup_indirect_ctx(regs, engine, - i915_ggtt_offset(wa_ctx->vma) + - wa_ctx->indirect_ctx.offset, - wa_ctx->indirect_ctx.size); - } -} - -static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) -{ - if (i915_vm_is_4lvl(&ppgtt->vm)) { - /* 64b PPGTT (48bit canonical) - * PDP0_DESCRIPTOR contains the base address to PML4 and - * other PDP Descriptors are ignored. - */ - ASSIGN_CTX_PML4(ppgtt, regs); - } else { - ASSIGN_CTX_PDP(ppgtt, regs, 3); - ASSIGN_CTX_PDP(ppgtt, regs, 2); - ASSIGN_CTX_PDP(ppgtt, regs, 1); - ASSIGN_CTX_PDP(ppgtt, regs, 0); - } -} - -static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) -{ - if (i915_is_ggtt(vm)) - return i915_vm_to_ggtt(vm)->alias; - else - return i915_vm_to_ppgtt(vm); -} - -static void execlists_init_reg_state(u32 *regs, - const struct intel_context *ce, - const struct intel_engine_cs *engine, - const struct intel_ring *ring, - bool inhibit) -{ - /* - * A context is actually a big batch buffer with several - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The - * values we are setting here are only for the first context restore: - * on a subsequent save, the GPU will recreate this batchbuffer with new - * values (including all the missing MI_LOAD_REGISTER_IMM commands that - * we are not initializing here). - * - * Must keep consistent with virtual_update_register_offsets(). - */ - set_offsets(regs, reg_offsets(engine), engine, inhibit); - - init_common_reg_state(regs, engine, ring, inhibit); - init_ppgtt_reg_state(regs, vm_alias(ce->vm)); - - init_wa_bb_reg_state(regs, engine); - - __reset_stop_ring(regs, engine); -} - -static int -populate_lr_context(struct intel_context *ce, - struct drm_i915_gem_object *ctx_obj, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - bool inhibit = true; - void *vaddr; - - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - drm_dbg(&engine->i915->drm, "Could not map object pages!\n"); - return PTR_ERR(vaddr); - } - - set_redzone(vaddr, engine); - - if (engine->default_state) { - shmem_read(engine->default_state, 0, - vaddr, engine->context_size); - __set_bit(CONTEXT_VALID_BIT, &ce->flags); - inhibit = false; - } - - /* Clear the ppHWSP (inc. per-context counters) */ - memset(vaddr, 0, PAGE_SIZE); - - /* - * The second page of the context object contains some registers which - * must be set up prior to the first execution. - */ - execlists_init_reg_state(vaddr + LRC_STATE_OFFSET, - ce, engine, ring, inhibit); - - __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); - i915_gem_object_unpin_map(ctx_obj); - return 0; -} - -static struct intel_timeline *pinned_timeline(struct intel_context *ce) -{ - struct intel_timeline *tl = fetch_and_zero(&ce->timeline); - - return intel_timeline_create_from_engine(ce->engine, - page_unmask_bits(tl)); -} - -static int __execlists_context_alloc(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *ctx_obj; - struct intel_ring *ring; - struct i915_vma *vma; - u32 context_size; - int ret; - - GEM_BUG_ON(ce->state); - context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - context_size += I915_GTT_PAGE_SIZE; /* for redzone */ - - if (INTEL_GEN(engine->i915) == 12) { - ce->wa_bb_page = context_size / PAGE_SIZE; - context_size += PAGE_SIZE; - } - - ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); - if (IS_ERR(ctx_obj)) - return PTR_ERR(ctx_obj); - - vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto error_deref_obj; - } - - if (!page_mask_bits(ce->timeline)) { - struct intel_timeline *tl; - - /* - * Use the static global HWSP for the kernel context, and - * a dynamically allocated cacheline for everyone else. - */ - if (unlikely(ce->timeline)) - tl = pinned_timeline(ce); - else - tl = intel_timeline_create(engine->gt); - if (IS_ERR(tl)) { - ret = PTR_ERR(tl); - goto error_deref_obj; - } - - ce->timeline = tl; - } - - ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - goto error_deref_obj; - } - - ret = populate_lr_context(ce, ctx_obj, engine, ring); - if (ret) { - drm_dbg(&engine->i915->drm, - "Failed to populate LRC: %d\n", ret); - goto error_ring_free; - } - - ce->ring = ring; - ce->state = vma; - - return 0; - -error_ring_free: - intel_ring_put(ring); -error_deref_obj: - i915_gem_object_put(ctx_obj); - return ret; -} - static struct list_head *virtual_queue(struct virtual_engine *ve) { return &ve->base.execlists.default_priolist.requests[0]; @@ -4891,8 +3342,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); GEM_BUG_ON(!list_empty(virtual_queue(ve))); - if (ve->context.state) - __execlists_context_fini(&ve->context); + lrc_fini(&ve->context); intel_context_fini(&ve->context); intel_breadcrumbs_free(ve->base.breadcrumbs); @@ -4952,12 +3402,21 @@ static int virtual_context_alloc(struct intel_context *ce) return __execlists_context_alloc(ce, ve->siblings[0]); } -static int virtual_context_pin(struct intel_context *ce, void *vaddr) +static int virtual_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); /* Note: we must use a real engine class for setting up reg state */ - return __execlists_context_pin(ce, ve->siblings[0], vaddr); + return lrc_pre_pin(ce, ve->siblings[0], ww, vaddr); +} + +static int virtual_context_pin(struct intel_context *ce, void *vaddr) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + return lrc_pin(ce, ve->siblings[0], vaddr); } static void virtual_context_enter(struct intel_context *ce) @@ -4985,10 +3444,10 @@ static void virtual_context_exit(struct intel_context *ce) static const struct intel_context_ops virtual_context_ops = { .alloc = virtual_context_alloc, - .pre_pin = execlists_context_pre_pin, + .pre_pin = virtual_context_pre_pin, .pin = virtual_context_pin, - .unpin = execlists_context_unpin, - .post_unpin = execlists_context_post_unpin, + .unpin = lrc_unpin, + .post_unpin = lrc_post_unpin, .enter = virtual_context_enter, .exit = virtual_context_exit, @@ -5470,28 +3929,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, spin_unlock_irqrestore(&engine->active.lock, flags); } -void intel_lr_context_reset(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 head, - bool scrub) -{ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - - /* - * We want a simple context + ring to execute the breadcrumb update. - * We cannot rely on the context being intact across the GPU hang, - * so clear it and rebuild just what we need for the breadcrumb. - * All pending requests for this context will be zapped, and any - * future request will be after userspace has had the opportunity - * to recreate its own state. - */ - if (scrub) - restore_default_state(ce, engine); - - /* Rerun the request; its payload has been neutered (if guilty). */ - __execlists_update_reg_state(ce, engine, head); -} - bool intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index 2c9d7354b42f..0c675bbff351 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -22,25 +22,8 @@ enum { int intel_execlists_submission_setup(struct intel_engine_cs *engine); -/* Logical Ring Contexts */ -/* At the start of the context image is its per-process HWS page */ -#define LRC_PPHWSP_PN (0) -#define LRC_PPHWSP_SZ (1) -/* After the PPHWSP we have the logical state for the context */ -#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) -#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE) - -/* Space within PPHWSP reserved to be used as scratch */ -#define LRC_PPHWSP_SCRATCH 0x34 -#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) - void intel_execlists_set_default_submission(struct intel_engine_cs *engine); -void intel_lr_context_reset(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 head, - bool scrub); - void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c new file mode 100644 index 000000000000..35f4352a484f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -0,0 +1,1561 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014 Intel Corporation + */ + +#include "gen8_engine_cs.h" +#include "i915_drv.h" +#include "i915_perf.h" +#include "intel_engine.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_lrc.h" +#include "intel_lrc_reg.h" +#include "intel_ring.h" +#include "shmem_utils.h" + +static inline unsigned int dword_in_page(void *addr) +{ + return offset_in_page(addr) / sizeof(u32); +} + +static void set_offsets(u32 *regs, + const u8 *data, + const struct intel_engine_cs *engine, + bool close) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END 0 +{ + const u32 base = engine->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + count = *data++ & ~BIT(7); + regs += count; + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + if (INTEL_GEN(engine->i915) >= 11) + *regs |= MI_LRI_LRM_CS_MMIO; + regs++; + + GEM_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + regs[0] = base + (offset << 2); + regs += 2; + } while (--count); + } + + if (close) { + /* Close the batch; used mainly by live_lrc_layout() */ + *regs = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *regs |= BIT(0); + } +} + +static const u8 gen8_xcs_offsets[] = { + NOP(1), + LRI(11, 0), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + + NOP(9), + LRI(9, 0), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, 0), + REG16(0x200), + REG(0x028), + + END +}; + +static const u8 gen9_xcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, POSTED), + REG16(0x200), + + NOP(13), + LRI(44, POSTED), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END +}; + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + +static const u8 gen8_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 gen9_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x34), + REG(0x30), + REG(0x38), + REG(0x3c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0xc8), + + NOP(13), + LRI(44, POSTED), + REG(0x28), + REG(0x9c), + REG(0xc0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x68), + + END +}; + +static const u8 gen11_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(1, POSTED), + REG(0x1b0), + + NOP(10), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + NOP(3 + 9 + 1), + + LRI(51, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + REG(0x084), + NOP(1), + + END +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(const struct intel_engine_cs *engine) +{ + /* + * The gen12+ lists only have the registers we program in the basic + * default state. We rely on the context image using relative + * addressing to automatic fixup the register state between the + * physical engines for virtual engine. + */ + GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + !intel_engine_has_relative_mmio(engine)); + + if (engine->class == RENDER_CLASS) { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 11) + return gen11_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_rcs_offsets; + else + return gen8_rcs_offsets; + } else { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_xcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_xcs_offsets; + else + return gen8_xcs_offsets; + } +} + +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; +} + +static int lrc_ring_gpr0(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x74; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x68; + else if (engine->class == RENDER_CLASS) + return 0xd8; + else + return -1; +} + +static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) >= 12) + return 0x12; + else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) + return 0x18; + else + return -1; +} + +static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) +{ + int x; + + x = lrc_ring_wa_bb_per_ctx(engine); + if (x < 0) + return x; + + return x + 2; +} + +static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) +{ + int x; + + x = lrc_ring_indirect_ptr(engine); + if (x < 0) + return x; + + return x + 2; +} + +static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) +{ + if (engine->class != RENDER_CLASS) + return -1; + + if (INTEL_GEN(engine->i915) >= 12) + return 0xb6; + else if (INTEL_GEN(engine->i915) >= 11) + return 0xaa; + else + return -1; +} + +static u32 +lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) +{ + switch (INTEL_GEN(engine->i915)) { + default: + MISSING_CASE(INTEL_GEN(engine->i915)); + fallthrough; + case 12: + return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 11: + return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 10: + return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 9: + return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 8: + return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + } +} + +static void +lrc_setup_indirect_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr, + u32 size) +{ + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); + GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); + regs[lrc_ring_indirect_ptr(engine) + 1] = + ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); + + GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); + regs[lrc_ring_indirect_offset(engine) + 1] = + lrc_ring_indirect_offset_default(engine) << 6; +} + +static void init_common_regs(u32 * const regs, + const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool inhibit) +{ + u32 ctl; + + ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (inhibit) + ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + if (INTEL_GEN(engine->i915) < 11) + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + regs[CTX_CONTEXT_CONTROL] = ctl; + + regs[CTX_TIMESTAMP] = ce->runtime.last; +} + +static void init_wa_bb_regs(u32 * const regs, + const struct intel_engine_cs *engine) +{ + const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; + + if (wa_ctx->per_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; + } + + if (wa_ctx->indirect_ctx.size) { + lrc_setup_indirect_ctx(regs, engine, + i915_ggtt_offset(wa_ctx->vma) + + wa_ctx->indirect_ctx.offset, + wa_ctx->indirect_ctx.size); + } +} + +static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt) +{ + if (i915_vm_is_4lvl(&ppgtt->vm)) { + /* 64b PPGTT (48bit canonical) + * PDP0_DESCRIPTOR contains the base address to PML4 and + * other PDP Descriptors are ignored. + */ + ASSIGN_CTX_PML4(ppgtt, regs); + } else { + ASSIGN_CTX_PDP(ppgtt, regs, 3); + ASSIGN_CTX_PDP(ppgtt, regs, 2); + ASSIGN_CTX_PDP(ppgtt, regs, 1); + ASSIGN_CTX_PDP(ppgtt, regs, 0); + } +} + +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + return i915_vm_to_ggtt(vm)->alias; + else + return i915_vm_to_ppgtt(vm); +} + +static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) +{ + int x; + + x = lrc_ring_mi_mode(engine); + if (x != -1) { + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; + } +} + +static void __lrc_init_regs(u32 *regs, + const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool inhibit) +{ + /* + * A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + * + * Must keep consistent with virtual_update_register_offsets(). + */ + + if (inhibit) + memset(regs, 0, PAGE_SIZE); + + set_offsets(regs, reg_offsets(engine), engine, inhibit); + + init_common_regs(regs, ce, engine, inhibit); + init_ppgtt_regs(regs, vm_alias(ce->vm)); + + init_wa_bb_regs(regs, engine); + + __reset_stop_ring(regs, engine); +} + +void lrc_init_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool inhibit) +{ + __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit); +} + +void lrc_reset_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + __reset_stop_ring(ce->lrc_reg_state, engine); +} + +static void +set_redzone(void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += engine->context_size; + + memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); +} + +static void +check_redzone(const void *vaddr, const struct intel_engine_cs *engine) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + vaddr += engine->context_size; + + if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) + drm_err_once(&engine->i915->drm, + "%s context redzone overwritten!\n", + engine->name); +} + +void lrc_init_state(struct intel_context *ce, + struct intel_engine_cs *engine, + void *state) +{ + bool inhibit = true; + + set_redzone(state, engine); + + if (engine->default_state) { + shmem_read(engine->default_state, 0, + state, engine->context_size); + __set_bit(CONTEXT_VALID_BIT, &ce->flags); + inhibit = false; + } + + /* Clear the ppHWSP (inc. per-context counters) */ + memset(state, 0, PAGE_SIZE); + + /* + * The second page of the context object contains some registers which + * must be set up prior to the first execution. + */ + __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); +} + +static struct i915_vma * +__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 context_size; + + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + context_size += I915_GTT_PAGE_SIZE; /* for redzone */ + + if (INTEL_GEN(engine->i915) == 12) { + ce->wa_bb_page = context_size / PAGE_SIZE; + context_size += PAGE_SIZE; + } + + obj = i915_gem_object_create_shmem(engine->i915, context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + return vma; +} + +static struct intel_timeline * +pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) +{ + struct intel_timeline *tl = fetch_and_zero(&ce->timeline); + + return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); +} + +int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) +{ + struct intel_ring *ring; + struct i915_vma *vma; + int err; + + GEM_BUG_ON(ce->state); + + vma = __lrc_alloc_state(ce, engine); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto err_vma; + } + + if (!page_mask_bits(ce->timeline)) { + struct intel_timeline *tl; + + /* + * Use the static global HWSP for the kernel context, and + * a dynamically allocated cacheline for everyone else. + */ + if (unlikely(ce->timeline)) + tl = pinned_timeline(ce, engine); + else + tl = intel_timeline_create(engine->gt); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto err_ring; + } + + ce->timeline = tl; + } + + ce->ring = ring; + ce->state = vma; + + return 0; + +err_ring: + intel_ring_put(ring); +err_vma: + i915_vma_put(vma); + return err; +} + +void lrc_reset(struct intel_context *ce) +{ + CE_TRACE(ce, "reset\n"); + GEM_BUG_ON(!intel_context_is_pinned(ce)); + + intel_ring_reset(ce->ring, ce->ring->emit); + + /* Scrub away the garbage */ + lrc_init_regs(ce, ce->engine, true); + ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail); +} + +int +lrc_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + GEM_BUG_ON(!ce->state); + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); + + *vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(ce->engine->i915) | + I915_MAP_OVERRIDE); + + return PTR_ERR_OR_ZERO(*vaddr); +} + +int +lrc_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr) +{ + ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; + ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail); + return 0; +} + +void lrc_unpin(struct intel_context *ce) +{ + check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, + ce->engine); +} + +void lrc_post_unpin(struct intel_context *ce) +{ + i915_gem_object_unpin_map(ce->state->obj); +} + +void lrc_fini(struct intel_context *ce) +{ + if (!ce->state) + return; + + intel_ring_put(fetch_and_zero(&ce->ring)); + i915_vma_put(fetch_and_zero(&ce->state)); +} + +void lrc_destroy(struct kref *kref) +{ + struct intel_context *ce = container_of(kref, typeof(*ce), ref); + + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); + GEM_BUG_ON(intel_context_is_pinned(ce)); + + lrc_fini(ce); + + intel_context_fini(ce); + intel_context_free(ce); +} + +static u32 * +gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + CTX_TIMESTAMP * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); + + return cs; +} + +static u32 * +gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) +{ + GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); + *cs++ = 0; + + return cs; +} + +static u32 * +gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) +{ + GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + + (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_REG | + MI_LRR_SOURCE_CS_MMIO | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); + *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); + + return cs; +} + +static u32 * +gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) +{ + cs = gen12_emit_timestamp_wa(ce, cs); + cs = gen12_emit_cmd_buf_wa(ce, cs); + cs = gen12_emit_restore_scratch(ce, cs); + + return cs; +} + +static u32 * +gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) +{ + cs = gen12_emit_timestamp_wa(ce, cs); + cs = gen12_emit_restore_scratch(ce, cs); + + return cs; +} + +static inline u32 context_wa_bb_offset(const struct intel_context *ce) +{ + return PAGE_SIZE * ce->wa_bb_page; +} + +static u32 *context_indirect_bb(const struct intel_context *ce) +{ + void *ptr; + + GEM_BUG_ON(!ce->wa_bb_page); + + ptr = ce->lrc_reg_state; + ptr -= LRC_STATE_OFFSET; /* back to start of context image */ + ptr += context_wa_bb_offset(ce); + + return ptr; +} + +static void +setup_indirect_ctx_bb(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 *(*emit)(const struct intel_context *, u32 *)) +{ + u32 * const start = context_indirect_bb(ce); + u32 *cs; + + cs = emit(ce, start); + GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); + while ((unsigned long)cs % CACHELINE_BYTES) + *cs++ = MI_NOOP; + + lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, + i915_ggtt_offset(ce->state) + + context_wa_bb_offset(ce), + (cs - start) * sizeof(*cs)); +} + +/* + * The context descriptor encodes various attributes of a context, + * including its GTT address and some flags. Because it's fairly + * expensive to calculate, we'll just do it once and cache the result, + * which remains valid until the context is unpinned. + * + * This is what a descriptor looks like, from LSB to MSB:: + * + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) + * bits 53-54: mbz, reserved for use by hardware + * bits 55-63: group ID, currently unused and set to 0 + * + * Starting from Gen11, the upper dword of the descriptor has a new format: + * + * bits 32-36: reserved + * bits 37-47: SW context ID + * bits 48:53: engine instance + * bit 54: mbz, reserved for use by hardware + * bits 55-60: SW counter + * bits 61-63: engine class + * + * engine info, SW context ID and SW counter need to form a unique number + * (Context ID) per lrc. + */ +static inline u32 lrc_descriptor(const struct intel_context *ce) +{ + u32 desc; + + desc = INTEL_LEGACY_32B_CONTEXT; + if (i915_vm_is_4lvl(ce->vm)) + desc = INTEL_LEGACY_64B_CONTEXT; + desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; + + desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + if (IS_GEN(ce->vm->i915, 8)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + return i915_ggtt_offset(ce->state) | desc; +} + +u32 lrc_update_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 head) +{ + struct intel_ring *ring = ce->ring; + u32 *regs = ce->lrc_reg_state; + + GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); + GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); + + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD] = head; + regs[CTX_RING_TAIL] = ring->tail; + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + + /* RPCS */ + if (engine->class == RENDER_CLASS) { + regs[CTX_R_PWR_CLK_STATE] = + intel_sseu_make_rpcs(engine->gt, &ce->sseu); + + i915_oa_init_reg_state(ce, engine); + } + + if (ce->wa_bb_page) { + u32 *(*fn)(const struct intel_context *ce, u32 *cs); + + fn = gen12_emit_indirect_ctx_xcs; + if (ce->engine->class == RENDER_CLASS) + fn = gen12_emit_indirect_ctx_rcs; + + /* Mutually exclusive wrt to global indirect bb */ + GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); + setup_indirect_ctx_bb(ce, engine, fn); + } + + return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; +} + +void lrc_update_offsets(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false); +} + +void lrc_check_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + const char *when) +{ + const struct intel_ring *ring = ce->ring; + u32 *regs = ce->lrc_reg_state; + bool valid = true; + int x; + + if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { + pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_START], + i915_ggtt_offset(ring->vma)); + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); + valid = false; + } + + if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != + (RING_CTL_SIZE(ring->size) | RING_VALID)) { + pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_CTL], + (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + valid = false; + } + + x = lrc_ring_mi_mode(engine); + if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { + pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", + engine->name, regs[x + 1]); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; + valid = false; + } + + WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); +} + +/* + * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after + * PIPE_CONTROL instruction. This is required for the flush to happen correctly + * but there is a slight complication as this is applied in WA batch where the + * values are only initialized once so we cannot take register value at the + * beginning and reuse it further; hence we save its value to memory, upload a + * constant value with bit21 set and then we restore it back with the saved value. + * To simplify the WA, a constant value is formed by using the default value + * of this register. This shouldn't be a problem because we are only modifying + * it for a short period and this batch in non-premptible. We can ofcourse + * use additional instructions that read the actual value of the register + * at that time and set our bit of interest but it makes the WA complicated. + * + * This WA is also required for Gen9 so extracting as a function avoids + * code duplication. + */ +static u32 * +gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) +{ + /* NB no one else is allowed to scribble over scratch + 256! */ + *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); + *batch++ = 0; + + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; + + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); + *batch++ = 0; + + return batch; +} + +/* + * Typically we only have one indirect_ctx and per_ctx batch buffer which are + * initialized at the beginning and shared across all contexts but this field + * helps us to have multiple batches at different offsets and select them based + * on a criteria. At the moment this batch always start at the beginning of the page + * and at this point we don't have multiple wa_ctx batch buffers. + * + * The number of WA applied are not known at the beginning; we use this field + * to return the no of DWORDS written. + * + * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END + * so it adds NOOPs as padding to make it cacheline aligned. + * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together + * makes a complete batch buffer. + */ +static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + /* WaDisableCtxRestoreArbitration:bdw,chv */ + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ + if (IS_BROADWELL(engine->i915)) + batch = gen8_emit_flush_coherentl3_wa(engine, batch); + + /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ + /* Actual scratch location is at 128 bytes offset */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + LRC_PPHWSP_SCRATCH_ADDR); + + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + /* Pad to end of cacheline */ + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; + + /* + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because + * execution depends on the length specified in terms of cache lines + * in the register CTX_RCS_INDIRECT_CTX + */ + + return batch; +} + +struct lri { + i915_reg_t reg; + u32 value; +}; + +static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) +{ + GEM_BUG_ON(!count || count > 63); + + *batch++ = MI_LOAD_REGISTER_IMM(count); + do { + *batch++ = i915_mmio_reg_offset(lri->reg); + *batch++ = lri->value; + } while (lri++, --count); + *batch++ = MI_NOOP; + + return batch; +} + +static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + static const struct lri lri[] = { + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ + { + COMMON_SLICE_CHICKEN2, + __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, + 0), + }, + + /* BSpec: 11391 */ + { + FF_SLICE_CHICKEN, + __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, + FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), + }, + + /* BSpec: 11299 */ + { + _3D_CHICKEN3, + __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, + _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), + } + }; + + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ + batch = gen8_emit_flush_coherentl3_wa(engine, batch); + + /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + LRC_PPHWSP_SCRATCH_ADDR); + + batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); + + /* WaMediaPoolStateCmdInWABB:bxt,glk */ + if (HAS_POOLED_EU(engine->i915)) { + /* + * EU pool configuration is setup along with golden context + * during context initialization. This value depends on + * device type (2x6 or 3x6) and needs to be updated based + * on which subslice is disabled especially for 2x6 + * devices, however it is safe to load default + * configuration of 3x6 device instead of masking off + * corresponding bits because HW ignores bits of a disabled + * subslice and drops down to appropriate config. Please + * see render_state_setup() in i915_gem_render_state.c for + * possible configurations, to avoid duplication they are + * not shown here again. + */ + *batch++ = GEN9_MEDIA_POOL_STATE; + *batch++ = GEN9_MEDIA_POOL_ENABLE; + *batch++ = 0x00777000; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; + } + + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + /* Pad to end of cacheline */ + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; + + return batch; +} + +static u32 * +gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + int i; + + /* + * WaPipeControlBefore3DStateSamplePattern: cnl + * + * Ensure the engine is idle prior to programming a + * 3DSTATE_SAMPLE_PATTERN during a context restore. + */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL, + 0); + /* + * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for + * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in + * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is + * confusing. Since gen8_emit_pipe_control() already advances the + * batch by 6 dwords, we advance the other 10 here, completing a + * cacheline. It's not clear if the workaround requires this padding + * before other commands, or if it's just the regular padding we would + * already have for the workaround bb, so leave it here for now. + */ + for (i = 0; i < 10; i++) + *batch++ = MI_NOOP; + + /* Pad to end of cacheline */ + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; + + return batch; +} + +#define CTX_WA_BB_SIZE (PAGE_SIZE) + +static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); + if (err) + goto err; + + engine->wa_ctx.vma = vma; + return 0; + +err: + i915_gem_object_put(obj); + return err; +} + +void lrc_fini_wa_ctx(struct intel_engine_cs *engine) +{ + i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); +} + +typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); + +int lrc_init_wa_ctx(struct intel_engine_cs *engine) +{ + struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + struct i915_wa_ctx_bb *wa_bb[] = { + &wa_ctx->indirect_ctx, &wa_ctx->per_ctx + }; + wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; + void *batch, *batch_ptr; + unsigned int i; + int ret; + + if (engine->class != RENDER_CLASS) + return 0; + + switch (INTEL_GEN(engine->i915)) { + case 12: + case 11: + return 0; + case 10: + wa_bb_fn[0] = gen10_init_indirectctx_bb; + wa_bb_fn[1] = NULL; + break; + case 9: + wa_bb_fn[0] = gen9_init_indirectctx_bb; + wa_bb_fn[1] = NULL; + break; + case 8: + wa_bb_fn[0] = gen8_init_indirectctx_bb; + wa_bb_fn[1] = NULL; + break; + default: + MISSING_CASE(INTEL_GEN(engine->i915)); + return 0; + } + + ret = lrc_setup_wa_ctx(engine); + if (ret) { + drm_dbg(&engine->i915->drm, + "Failed to setup context WA page: %d\n", ret); + return ret; + } + + batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); + + /* + * Emit the two workaround batch buffers, recording the offset from the + * start of the workaround batch buffer object for each and their + * respective sizes. + */ + batch_ptr = batch; + for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { + wa_bb[i]->offset = batch_ptr - batch; + if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, + CACHELINE_BYTES))) { + ret = -EINVAL; + break; + } + if (wa_bb_fn[i]) + batch_ptr = wa_bb_fn[i](engine, batch_ptr); + wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); + } + GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE); + + __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); + __i915_gem_object_release_map(wa_ctx->vma->obj); + if (ret) + lrc_fini_wa_ctx(engine); + + return ret; +} + +static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) +{ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) + ce->runtime.num_underflow++; + ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); +#endif +} + +void lrc_update_runtime(struct intel_context *ce) +{ + u32 old; + s32 dt; + + if (intel_context_is_barrier(ce)) + return; + + old = ce->runtime.last; + ce->runtime.last = lrc_get_runtime(ce); + dt = ce->runtime.last - old; + + if (unlikely(dt < 0)) { + CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", + old, ce->runtime.last, dt); + st_update_runtime_underflow(ce, dt); + return; + } + + ewma_runtime_add(&ce->runtime.avg, dt); + ce->runtime.total += dt; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_lrc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h new file mode 100644 index 000000000000..4e006853e815 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014 Intel Corporation + */ + +#ifndef __INTEL_LRC_H__ +#define __INTEL_LRC_H__ + +#include <linux/types.h> + +#include "intel_context.h" +#include "intel_lrc_reg.h" + +struct drm_i915_gem_object; +struct intel_engine_cs; +struct intel_ring; + +/* At the start of the context image is its per-process HWS page */ +#define LRC_PPHWSP_PN (0) +#define LRC_PPHWSP_SZ (1) +/* After the PPHWSP we have the logical state for the context */ +#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) +#define LRC_STATE_OFFSET (LRC_STATE_PN * PAGE_SIZE) + +/* Space within PPHWSP reserved to be used as scratch */ +#define LRC_PPHWSP_SCRATCH 0x34 +#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) + +int lrc_init_wa_ctx(struct intel_engine_cs *engine); +void lrc_fini_wa_ctx(struct intel_engine_cs *engine); + +int lrc_alloc(struct intel_context *ce, + struct intel_engine_cs *engine); +void lrc_reset(struct intel_context *ce); +void lrc_fini(struct intel_context *ce); +void lrc_destroy(struct kref *kref); + +int +lrc_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + void **vaddr); +int +lrc_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr); +void lrc_unpin(struct intel_context *ce); +void lrc_post_unpin(struct intel_context *ce); + +void lrc_init_state(struct intel_context *ce, + struct intel_engine_cs *engine, + void *state); + +void lrc_init_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool clear); +void lrc_reset_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine); + +u32 lrc_update_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 head); +void lrc_update_offsets(struct intel_context *ce, + struct intel_engine_cs *engine); + +void lrc_check_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + const char *when); + +void lrc_update_runtime(struct intel_context *ce); +static inline u32 lrc_get_runtime(const struct intel_context *ce) +{ + /* + * We can use either ppHWSP[16] which is recorded before the context + * switch (and so excludes the cost of context switches) or use the + * value from the context image itself, which is saved/restored earlier + * and so includes the cost of the save. + */ + return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); +} + +#endif /* __INTEL_LRC_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index b2e03ce35599..65fe76738335 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,6 +9,8 @@ #include <linux/types.h> +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) + /* GEN8 to GEN12 Reg State Context */ #define CTX_CONTEXT_CONTROL (0x02 + 1) #define CTX_RING_HEAD (0x04 + 1) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 95d41c01d0e0..34c2bb8313eb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -249,7 +249,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) } GEM_BUG_ON(!ce[1]->ring->size); intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); - __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); + lrc_update_regs(ce[1], engine, ce[1]->ring->head); rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); if (IS_ERR(rq[0])) { @@ -4705,1777 +4705,3 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) return intel_gt_live_subtests(tests, &i915->gt); } - -static int emit_semaphore_signal(struct intel_context *ce, void *slot) -{ - const u32 offset = - i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(slot); - struct i915_request *rq; - u32 *cs; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - i915_request_add(rq); - return PTR_ERR(cs); - } - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = offset; - *cs++ = 0; - *cs++ = 1; - - intel_ring_advance(rq, cs); - - rq->sched.attr.priority = I915_PRIORITY_BARRIER; - i915_request_add(rq); - return 0; -} - -static int context_flush(struct intel_context *ce, long timeout) -{ - struct i915_request *rq; - struct dma_fence *fence; - int err = 0; - - rq = intel_engine_create_kernel_request(ce->engine); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - fence = i915_active_fence_get(&ce->timeline->last_request); - if (fence) { - i915_request_await_dma_fence(rq, fence); - dma_fence_put(fence); - } - - rq = i915_request_get(rq); - i915_request_add(rq); - if (i915_request_wait(rq, 0, timeout) < 0) - err = -ETIME; - i915_request_put(rq); - - rmb(); /* We know the request is written, make sure all state is too! */ - return err; -} - -static int live_lrc_layout(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 *lrc; - int err; - - /* - * Check the registers offsets we use to create the initial reg state - * match the layout saved by HW. - */ - - lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!lrc) - return -ENOMEM; - - err = 0; - for_each_engine(engine, gt, id) { - u32 *hw; - int dw; - - if (!engine->default_state) - continue; - - hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); - break; - } - hw += LRC_STATE_OFFSET / sizeof(*hw); - - execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), - engine->kernel_context, - engine, - engine->kernel_context->ring, - true); - - dw = 0; - do { - u32 lri = hw[dw]; - - if (lri == 0) { - dw++; - continue; - } - - if (lrc[dw] == 0) { - pr_debug("%s: skipped instruction %x at dword %d\n", - engine->name, lri, dw); - dw++; - continue; - } - - if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { - pr_err("%s: Expected LRI command at dword %d, found %08x\n", - engine->name, dw, lri); - err = -EINVAL; - break; - } - - if (lrc[dw] != lri) { - pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", - engine->name, dw, lri, lrc[dw]); - err = -EINVAL; - break; - } - - lri &= 0x7f; - lri++; - dw++; - - while (lri) { - if (hw[dw] != lrc[dw]) { - pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", - engine->name, dw, hw[dw], lrc[dw]); - err = -EINVAL; - break; - } - - /* - * Skip over the actual register value as we - * expect that to differ. - */ - dw += 2; - lri -= 2; - } - } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); - - if (err) { - pr_info("%s: HW register image:\n", engine->name); - igt_hexdump(hw, PAGE_SIZE); - - pr_info("%s: SW register image:\n", engine->name); - igt_hexdump(lrc, PAGE_SIZE); - } - - shmem_unpin_map(engine->default_state, hw); - if (err) - break; - } - - kfree(lrc); - return err; -} - -static int find_offset(const u32 *lri, u32 offset) -{ - int i; - - for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) - if (lri[i] == offset) - return i; - - return -1; -} - -static int live_lrc_fixed(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* - * Check the assumed register offsets match the actual locations in - * the context image. - */ - - for_each_engine(engine, gt, id) { - const struct { - u32 reg; - u32 offset; - const char *name; - } tbl[] = { - { - i915_mmio_reg_offset(RING_START(engine->mmio_base)), - CTX_RING_START - 1, - "RING_START" - }, - { - i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), - CTX_RING_CTL - 1, - "RING_CTL" - }, - { - i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), - CTX_RING_HEAD - 1, - "RING_HEAD" - }, - { - i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), - CTX_RING_TAIL - 1, - "RING_TAIL" - }, - { - i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), - lrc_ring_mi_mode(engine), - "RING_MI_MODE" - }, - { - i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), - CTX_BB_STATE - 1, - "BB_STATE" - }, - { - i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), - lrc_ring_wa_bb_per_ctx(engine), - "RING_BB_PER_CTX_PTR" - }, - { - i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), - lrc_ring_indirect_ptr(engine), - "RING_INDIRECT_CTX_PTR" - }, - { - i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), - lrc_ring_indirect_offset(engine), - "RING_INDIRECT_CTX_OFFSET" - }, - { - i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), - CTX_TIMESTAMP - 1, - "RING_CTX_TIMESTAMP" - }, - { - i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), - lrc_ring_gpr0(engine), - "RING_CS_GPR0" - }, - { - i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), - lrc_ring_cmd_buf_cctl(engine), - "RING_CMD_BUF_CCTL" - }, - { }, - }, *t; - u32 *hw; - - if (!engine->default_state) - continue; - - hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); - break; - } - hw += LRC_STATE_OFFSET / sizeof(*hw); - - for (t = tbl; t->name; t++) { - int dw = find_offset(hw, t->reg); - - if (dw != t->offset) { - pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", - engine->name, - t->name, - t->reg, - dw, - t->offset); - err = -EINVAL; - } - } - - shmem_unpin_map(engine->default_state, hw); - } - - return err; -} - -static int __live_lrc_state(struct intel_engine_cs *engine, - struct i915_vma *scratch) -{ - struct intel_context *ce; - struct i915_request *rq; - struct i915_gem_ww_ctx ww; - enum { - RING_START_IDX = 0, - RING_TAIL_IDX, - MAX_IDX - }; - u32 expected[MAX_IDX]; - u32 *cs; - int err; - int n; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - i915_gem_ww_ctx_init(&ww, false); -retry: - err = i915_gem_object_lock(scratch->obj, &ww); - if (!err) - err = intel_context_pin_ww(ce, &ww); - if (err) - goto err_put; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - cs = intel_ring_begin(rq, 4 * MAX_IDX); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - i915_request_add(rq); - goto err_unpin; - } - - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; - *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); - *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); - *cs++ = 0; - - expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); - - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; - *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); - *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); - *cs++ = 0; - - err = i915_request_await_object(rq, scratch->obj, true); - if (!err) - err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); - - i915_request_get(rq); - i915_request_add(rq); - if (err) - goto err_rq; - - intel_engine_flush_submission(engine); - expected[RING_TAIL_IDX] = ce->ring->tail; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -ETIME; - goto err_rq; - } - - cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_rq; - } - - for (n = 0; n < MAX_IDX; n++) { - if (cs[n] != expected[n]) { - pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", - engine->name, n, cs[n], expected[n]); - err = -EINVAL; - break; - } - } - - i915_gem_object_unpin_map(scratch->obj); - -err_rq: - i915_request_put(rq); -err_unpin: - intel_context_unpin(ce); -err_put: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_context_put(ce); - return err; -} - -static int live_lrc_state(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct i915_vma *scratch; - enum intel_engine_id id; - int err = 0; - - /* - * Check the live register state matches what we expect for this - * intel_context. - */ - - scratch = create_scratch(gt); - if (IS_ERR(scratch)) - return PTR_ERR(scratch); - - for_each_engine(engine, gt, id) { - err = __live_lrc_state(engine, scratch); - if (err) - break; - } - - if (igt_flush_test(gt->i915)) - err = -EIO; - - i915_vma_unpin_and_release(&scratch, 0); - return err; -} - -static int gpr_make_dirty(struct intel_context *ce) -{ - struct i915_request *rq; - u32 *cs; - int n; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); - if (IS_ERR(cs)) { - i915_request_add(rq); - return PTR_ERR(cs); - } - - *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); - for (n = 0; n < NUM_GPR_DW; n++) { - *cs++ = CS_GPR(ce->engine, n); - *cs++ = STACK_MAGIC; - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - rq->sched.attr.priority = I915_PRIORITY_BARRIER; - i915_request_add(rq); - - return 0; -} - -static struct i915_request * -__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) -{ - const u32 offset = - i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(slot); - struct i915_request *rq; - u32 *cs; - int err; - int n; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return rq; - - cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); - if (IS_ERR(cs)) { - i915_request_add(rq); - return ERR_CAST(cs); - } - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *cs++ = MI_NOOP; - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; - *cs++ = offset; - *cs++ = 0; - - for (n = 0; n < NUM_GPR_DW; n++) { - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; - *cs++ = CS_GPR(ce->engine, n); - *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); - *cs++ = 0; - } - - i915_vma_lock(scratch); - err = i915_request_await_object(rq, scratch->obj, true); - if (!err) - err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(scratch); - - i915_request_get(rq); - i915_request_add(rq); - if (err) { - i915_request_put(rq); - rq = ERR_PTR(err); - } - - return rq; -} - -static int __live_lrc_gpr(struct intel_engine_cs *engine, - struct i915_vma *scratch, - bool preempt) -{ - u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); - struct intel_context *ce; - struct i915_request *rq; - u32 *cs; - int err; - int n; - - if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) - return 0; /* GPR only on rcs0 for gen8 */ - - err = gpr_make_dirty(engine->kernel_context); - if (err) - return err; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - rq = __gpr_read(ce, scratch, slot); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_put; - } - - err = wait_for_submit(engine, rq, HZ / 2); - if (err) - goto err_rq; - - if (preempt) { - err = gpr_make_dirty(engine->kernel_context); - if (err) - goto err_rq; - - err = emit_semaphore_signal(engine->kernel_context, slot); - if (err) - goto err_rq; - } else { - slot[0] = 1; - wmb(); - } - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -ETIME; - goto err_rq; - } - - cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_rq; - } - - for (n = 0; n < NUM_GPR_DW; n++) { - if (cs[n]) { - pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", - engine->name, - n / 2, n & 1 ? "udw" : "ldw", - cs[n]); - err = -EINVAL; - break; - } - } - - i915_gem_object_unpin_map(scratch->obj); - -err_rq: - memset32(&slot[0], -1, 4); - wmb(); - i915_request_put(rq); -err_put: - intel_context_put(ce); - return err; -} - -static int live_lrc_gpr(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct i915_vma *scratch; - enum intel_engine_id id; - int err = 0; - - /* - * Check that GPR registers are cleared in new contexts as we need - * to avoid leaking any information from previous contexts. - */ - - scratch = create_scratch(gt); - if (IS_ERR(scratch)) - return PTR_ERR(scratch); - - for_each_engine(engine, gt, id) { - st_engine_heartbeat_disable(engine); - - err = __live_lrc_gpr(engine, scratch, false); - if (err) - goto err; - - err = __live_lrc_gpr(engine, scratch, true); - if (err) - goto err; - -err: - st_engine_heartbeat_enable(engine); - if (igt_flush_test(gt->i915)) - err = -EIO; - if (err) - break; - } - - i915_vma_unpin_and_release(&scratch, 0); - return err; -} - -static struct i915_request * -create_timestamp(struct intel_context *ce, void *slot, int idx) -{ - const u32 offset = - i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(slot); - struct i915_request *rq; - u32 *cs; - int err; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return rq; - - cs = intel_ring_begin(rq, 10); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err; - } - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *cs++ = MI_NOOP; - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; - *cs++ = offset; - *cs++ = 0; - - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; - *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); - *cs++ = offset + idx * sizeof(u32); - *cs++ = 0; - - intel_ring_advance(rq, cs); - - rq->sched.attr.priority = I915_PRIORITY_MASK; - err = 0; -err: - i915_request_get(rq); - i915_request_add(rq); - if (err) { - i915_request_put(rq); - return ERR_PTR(err); - } - - return rq; -} - -struct lrc_timestamp { - struct intel_engine_cs *engine; - struct intel_context *ce[2]; - u32 poison; -}; - -static bool timestamp_advanced(u32 start, u32 end) -{ - return (s32)(end - start) > 0; -} - -static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) -{ - u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); - struct i915_request *rq; - u32 timestamp; - int err = 0; - - arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; - rq = create_timestamp(arg->ce[0], slot, 1); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - err = wait_for_submit(rq->engine, rq, HZ / 2); - if (err) - goto err; - - if (preempt) { - arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; - err = emit_semaphore_signal(arg->ce[1], slot); - if (err) - goto err; - } else { - slot[0] = 1; - wmb(); - } - - /* And wait for switch to kernel (to save our context to memory) */ - err = context_flush(arg->ce[0], HZ / 2); - if (err) - goto err; - - if (!timestamp_advanced(arg->poison, slot[1])) { - pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", - arg->engine->name, preempt ? "preempt" : "simple", - arg->poison, slot[1]); - err = -EINVAL; - } - - timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); - if (!timestamp_advanced(slot[1], timestamp)) { - pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", - arg->engine->name, preempt ? "preempt" : "simple", - slot[1], timestamp); - err = -EINVAL; - } - -err: - memset32(slot, -1, 4); - i915_request_put(rq); - return err; -} - -static int live_lrc_timestamp(void *arg) -{ - struct lrc_timestamp data = {}; - struct intel_gt *gt = arg; - enum intel_engine_id id; - const u32 poison[] = { - 0, - S32_MAX, - (u32)S32_MAX + 1, - U32_MAX, - }; - - /* - * We want to verify that the timestamp is saved and restore across - * context switches and is monotonic. - * - * So we do this with a little bit of LRC poisoning to check various - * boundary conditions, and see what happens if we preempt the context - * with a second request (carrying more poison into the timestamp). - */ - - for_each_engine(data.engine, gt, id) { - int i, err = 0; - - st_engine_heartbeat_disable(data.engine); - - for (i = 0; i < ARRAY_SIZE(data.ce); i++) { - struct intel_context *tmp; - - tmp = intel_context_create(data.engine); - if (IS_ERR(tmp)) { - err = PTR_ERR(tmp); - goto err; - } - - err = intel_context_pin(tmp); - if (err) { - intel_context_put(tmp); - goto err; - } - - data.ce[i] = tmp; - } - - for (i = 0; i < ARRAY_SIZE(poison); i++) { - data.poison = poison[i]; - - err = __lrc_timestamp(&data, false); - if (err) - break; - - err = __lrc_timestamp(&data, true); - if (err) - break; - } - -err: - st_engine_heartbeat_enable(data.engine); - for (i = 0; i < ARRAY_SIZE(data.ce); i++) { - if (!data.ce[i]) - break; - - intel_context_unpin(data.ce[i]); - intel_context_put(data.ce[i]); - } - - if (igt_flush_test(gt->i915)) - err = -EIO; - if (err) - return err; - } - - return 0; -} - -static struct i915_vma * -create_user_vma(struct i915_address_space *vm, unsigned long size) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create_internal(vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - return vma; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) { - i915_gem_object_put(obj); - return ERR_PTR(err); - } - - return vma; -} - -static struct i915_vma * -store_context(struct intel_context *ce, struct i915_vma *scratch) -{ - struct i915_vma *batch; - u32 dw, x, *cs, *hw; - u32 *defaults; - - batch = create_user_vma(ce->vm, SZ_64K); - if (IS_ERR(batch)) - return batch; - - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); - if (IS_ERR(cs)) { - i915_vma_put(batch); - return ERR_CAST(cs); - } - - defaults = shmem_pin_map(ce->engine->default_state); - if (!defaults) { - i915_gem_object_unpin_map(batch->obj); - i915_vma_put(batch); - return ERR_PTR(-ENOMEM); - } - - x = 0; - dw = 0; - hw = defaults; - hw += LRC_STATE_OFFSET / sizeof(*hw); - do { - u32 len = hw[dw] & 0x7f; - - if (hw[dw] == 0) { - dw++; - continue; - } - - if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { - dw += len + 2; - continue; - } - - dw++; - len = (len + 1) / 2; - while (len--) { - *cs++ = MI_STORE_REGISTER_MEM_GEN8; - *cs++ = hw[dw]; - *cs++ = lower_32_bits(scratch->node.start + x); - *cs++ = upper_32_bits(scratch->node.start + x); - - dw += 2; - x += 4; - } - } while (dw < PAGE_SIZE / sizeof(u32) && - (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); - - *cs++ = MI_BATCH_BUFFER_END; - - shmem_unpin_map(ce->engine->default_state, defaults); - - i915_gem_object_flush_map(batch->obj); - i915_gem_object_unpin_map(batch->obj); - - return batch; -} - -static int move_to_active(struct i915_request *rq, - struct i915_vma *vma, - unsigned int flags) -{ - int err; - - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, flags); - if (!err) - err = i915_vma_move_to_active(vma, rq, flags); - i915_vma_unlock(vma); - - return err; -} - -static struct i915_request * -record_registers(struct intel_context *ce, - struct i915_vma *before, - struct i915_vma *after, - u32 *sema) -{ - struct i915_vma *b_before, *b_after; - struct i915_request *rq; - u32 *cs; - int err; - - b_before = store_context(ce, before); - if (IS_ERR(b_before)) - return ERR_CAST(b_before); - - b_after = store_context(ce, after); - if (IS_ERR(b_after)) { - rq = ERR_CAST(b_after); - goto err_before; - } - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - goto err_after; - - err = move_to_active(rq, before, EXEC_OBJECT_WRITE); - if (err) - goto err_rq; - - err = move_to_active(rq, b_before, 0); - if (err) - goto err_rq; - - err = move_to_active(rq, after, EXEC_OBJECT_WRITE); - if (err) - goto err_rq; - - err = move_to_active(rq, b_after, 0); - if (err) - goto err_rq; - - cs = intel_ring_begin(rq, 14); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_rq; - } - - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); - *cs++ = lower_32_bits(b_before->node.start); - *cs++ = upper_32_bits(b_before->node.start); - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; - *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(sema); - *cs++ = 0; - *cs++ = MI_NOOP; - - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); - *cs++ = lower_32_bits(b_after->node.start); - *cs++ = upper_32_bits(b_after->node.start); - - intel_ring_advance(rq, cs); - - WRITE_ONCE(*sema, 0); - i915_request_get(rq); - i915_request_add(rq); -err_after: - i915_vma_put(b_after); -err_before: - i915_vma_put(b_before); - return rq; - -err_rq: - i915_request_add(rq); - rq = ERR_PTR(err); - goto err_after; -} - -static struct i915_vma *load_context(struct intel_context *ce, u32 poison) -{ - struct i915_vma *batch; - u32 dw, *cs, *hw; - u32 *defaults; - - batch = create_user_vma(ce->vm, SZ_64K); - if (IS_ERR(batch)) - return batch; - - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); - if (IS_ERR(cs)) { - i915_vma_put(batch); - return ERR_CAST(cs); - } - - defaults = shmem_pin_map(ce->engine->default_state); - if (!defaults) { - i915_gem_object_unpin_map(batch->obj); - i915_vma_put(batch); - return ERR_PTR(-ENOMEM); - } - - dw = 0; - hw = defaults; - hw += LRC_STATE_OFFSET / sizeof(*hw); - do { - u32 len = hw[dw] & 0x7f; - - if (hw[dw] == 0) { - dw++; - continue; - } - - if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { - dw += len + 2; - continue; - } - - dw++; - len = (len + 1) / 2; - *cs++ = MI_LOAD_REGISTER_IMM(len); - while (len--) { - *cs++ = hw[dw]; - *cs++ = poison; - dw += 2; - } - } while (dw < PAGE_SIZE / sizeof(u32) && - (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); - - *cs++ = MI_BATCH_BUFFER_END; - - shmem_unpin_map(ce->engine->default_state, defaults); - - i915_gem_object_flush_map(batch->obj); - i915_gem_object_unpin_map(batch->obj); - - return batch; -} - -static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) -{ - struct i915_request *rq; - struct i915_vma *batch; - u32 *cs; - int err; - - batch = load_context(ce, poison); - if (IS_ERR(batch)) - return PTR_ERR(batch); - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - err = move_to_active(rq, batch, 0); - if (err) - goto err_rq; - - cs = intel_ring_begin(rq, 8); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_rq; - } - - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); - *cs++ = lower_32_bits(batch->node.start); - *cs++ = upper_32_bits(batch->node.start); - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(sema); - *cs++ = 0; - *cs++ = 1; - - intel_ring_advance(rq, cs); - - rq->sched.attr.priority = I915_PRIORITY_BARRIER; -err_rq: - i915_request_add(rq); -err_batch: - i915_vma_put(batch); - return err; -} - -static bool is_moving(u32 a, u32 b) -{ - return a != b; -} - -static int compare_isolation(struct intel_engine_cs *engine, - struct i915_vma *ref[2], - struct i915_vma *result[2], - struct intel_context *ce, - u32 poison) -{ - u32 x, dw, *hw, *lrc; - u32 *A[2], *B[2]; - u32 *defaults; - int err = 0; - - A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); - if (IS_ERR(A[0])) - return PTR_ERR(A[0]); - - A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); - if (IS_ERR(A[1])) { - err = PTR_ERR(A[1]); - goto err_A0; - } - - B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); - if (IS_ERR(B[0])) { - err = PTR_ERR(B[0]); - goto err_A1; - } - - B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); - if (IS_ERR(B[1])) { - err = PTR_ERR(B[1]); - goto err_B0; - } - - lrc = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(engine->i915)); - if (IS_ERR(lrc)) { - err = PTR_ERR(lrc); - goto err_B1; - } - lrc += LRC_STATE_OFFSET / sizeof(*hw); - - defaults = shmem_pin_map(ce->engine->default_state); - if (!defaults) { - err = -ENOMEM; - goto err_lrc; - } - - x = 0; - dw = 0; - hw = defaults; - hw += LRC_STATE_OFFSET / sizeof(*hw); - do { - u32 len = hw[dw] & 0x7f; - - if (hw[dw] == 0) { - dw++; - continue; - } - - if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { - dw += len + 2; - continue; - } - - dw++; - len = (len + 1) / 2; - while (len--) { - if (!is_moving(A[0][x], A[1][x]) && - (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { - switch (hw[dw] & 4095) { - case 0x30: /* RING_HEAD */ - case 0x34: /* RING_TAIL */ - break; - - default: - pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", - engine->name, dw, - hw[dw], hw[dw + 1], - A[0][x], B[0][x], B[1][x], - poison, lrc[dw + 1]); - err = -EINVAL; - } - } - dw += 2; - x++; - } - } while (dw < PAGE_SIZE / sizeof(u32) && - (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); - - shmem_unpin_map(ce->engine->default_state, defaults); -err_lrc: - i915_gem_object_unpin_map(ce->state->obj); -err_B1: - i915_gem_object_unpin_map(result[1]->obj); -err_B0: - i915_gem_object_unpin_map(result[0]->obj); -err_A1: - i915_gem_object_unpin_map(ref[1]->obj); -err_A0: - i915_gem_object_unpin_map(ref[0]->obj); - return err; -} - -static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) -{ - u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); - struct i915_vma *ref[2], *result[2]; - struct intel_context *A, *B; - struct i915_request *rq; - int err; - - A = intel_context_create(engine); - if (IS_ERR(A)) - return PTR_ERR(A); - - B = intel_context_create(engine); - if (IS_ERR(B)) { - err = PTR_ERR(B); - goto err_A; - } - - ref[0] = create_user_vma(A->vm, SZ_64K); - if (IS_ERR(ref[0])) { - err = PTR_ERR(ref[0]); - goto err_B; - } - - ref[1] = create_user_vma(A->vm, SZ_64K); - if (IS_ERR(ref[1])) { - err = PTR_ERR(ref[1]); - goto err_ref0; - } - - rq = record_registers(A, ref[0], ref[1], sema); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ref1; - } - - WRITE_ONCE(*sema, 1); - wmb(); - - if (i915_request_wait(rq, 0, HZ / 2) < 0) { - i915_request_put(rq); - err = -ETIME; - goto err_ref1; - } - i915_request_put(rq); - - result[0] = create_user_vma(A->vm, SZ_64K); - if (IS_ERR(result[0])) { - err = PTR_ERR(result[0]); - goto err_ref1; - } - - result[1] = create_user_vma(A->vm, SZ_64K); - if (IS_ERR(result[1])) { - err = PTR_ERR(result[1]); - goto err_result0; - } - - rq = record_registers(A, result[0], result[1], sema); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_result1; - } - - err = poison_registers(B, poison, sema); - if (err) { - WRITE_ONCE(*sema, -1); - i915_request_put(rq); - goto err_result1; - } - - if (i915_request_wait(rq, 0, HZ / 2) < 0) { - i915_request_put(rq); - err = -ETIME; - goto err_result1; - } - i915_request_put(rq); - - err = compare_isolation(engine, ref, result, A, poison); - -err_result1: - i915_vma_put(result[1]); -err_result0: - i915_vma_put(result[0]); -err_ref1: - i915_vma_put(ref[1]); -err_ref0: - i915_vma_put(ref[0]); -err_B: - intel_context_put(B); -err_A: - intel_context_put(A); - return err; -} - -static bool skip_isolation(const struct intel_engine_cs *engine) -{ - if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) - return true; - - if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) - return true; - - return false; -} - -static int live_lrc_isolation(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - const u32 poison[] = { - STACK_MAGIC, - 0x3a3a3a3a, - 0x5c5c5c5c, - 0xffffffff, - 0xffff0000, - }; - int err = 0; - - /* - * Our goal is try and verify that per-context state cannot be - * tampered with by another non-privileged client. - * - * We take the list of context registers from the LRI in the default - * context image and attempt to modify that list from a remote context. - */ - - for_each_engine(engine, gt, id) { - int i; - - /* Just don't even ask */ - if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && - skip_isolation(engine)) - continue; - - intel_engine_pm_get(engine); - for (i = 0; i < ARRAY_SIZE(poison); i++) { - int result; - - result = __lrc_isolation(engine, poison[i]); - if (result && !err) - err = result; - - result = __lrc_isolation(engine, ~poison[i]); - if (result && !err) - err = result; - } - intel_engine_pm_put(engine); - if (igt_flush_test(gt->i915)) { - err = -EIO; - break; - } - } - - return err; -} - -static int indirect_ctx_submit_req(struct intel_context *ce) -{ - struct i915_request *rq; - int err = 0; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - i915_request_get(rq); - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -ETIME; - - i915_request_put(rq); - - return err; -} - -#define CTX_BB_CANARY_OFFSET (3 * 1024) -#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) - -static u32 * -emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT | - MI_LRI_LRM_CS_MMIO; - *cs++ = i915_mmio_reg_offset(RING_START(0)); - *cs++ = i915_ggtt_offset(ce->state) + - context_wa_bb_offset(ce) + - CTX_BB_CANARY_OFFSET; - *cs++ = 0; - - return cs; -} - -static void -indirect_ctx_bb_setup(struct intel_context *ce) -{ - u32 *cs = context_indirect_bb(ce); - - cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; - - setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); -} - -static bool check_ring_start(struct intel_context *ce) -{ - const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - - LRC_STATE_OFFSET + context_wa_bb_offset(ce); - - if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) - return true; - - pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", - ctx_bb[CTX_BB_CANARY_INDEX], - ce->lrc_reg_state[CTX_RING_START]); - - return false; -} - -static int indirect_ctx_bb_check(struct intel_context *ce) -{ - int err; - - err = indirect_ctx_submit_req(ce); - if (err) - return err; - - if (!check_ring_start(ce)) - return -EINVAL; - - return 0; -} - -static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) -{ - struct intel_context *a, *b; - int err; - - a = intel_context_create(engine); - if (IS_ERR(a)) - return PTR_ERR(a); - err = intel_context_pin(a); - if (err) - goto put_a; - - b = intel_context_create(engine); - if (IS_ERR(b)) { - err = PTR_ERR(b); - goto unpin_a; - } - err = intel_context_pin(b); - if (err) - goto put_b; - - /* We use the already reserved extra page in context state */ - if (!a->wa_bb_page) { - GEM_BUG_ON(b->wa_bb_page); - GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); - goto unpin_b; - } - - /* - * In order to test that our per context bb is truly per context, - * and executes at the intended spot on context restoring process, - * make the batch store the ring start value to memory. - * As ring start is restored apriori of starting the indirect ctx bb and - * as it will be different for each context, it fits to this purpose. - */ - indirect_ctx_bb_setup(a); - indirect_ctx_bb_setup(b); - - err = indirect_ctx_bb_check(a); - if (err) - goto unpin_b; - - err = indirect_ctx_bb_check(b); - -unpin_b: - intel_context_unpin(b); -put_b: - intel_context_put(b); -unpin_a: - intel_context_unpin(a); -put_a: - intel_context_put(a); - - return err; -} - -static int live_lrc_indirect_ctx_bb(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - for_each_engine(engine, gt, id) { - intel_engine_pm_get(engine); - err = __live_lrc_indirect_ctx_bb(engine); - intel_engine_pm_put(engine); - - if (igt_flush_test(gt->i915)) - err = -EIO; - - if (err) - break; - } - - return err; -} - -static void garbage_reset(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - const unsigned int bit = I915_RESET_ENGINE + engine->id; - unsigned long *lock = &engine->gt->reset.flags; - - if (test_and_set_bit(bit, lock)) - return; - - tasklet_disable(&engine->execlists.tasklet); - - if (!rq->fence.error) - intel_engine_reset(engine, NULL); - - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(bit, lock); -} - -static struct i915_request *garbage(struct intel_context *ce, - struct rnd_state *prng) -{ - struct i915_request *rq; - int err; - - err = intel_context_pin(ce); - if (err) - return ERR_PTR(err); - - prandom_bytes_state(prng, - ce->lrc_reg_state, - ce->engine->context_size - - LRC_STATE_OFFSET); - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - i915_request_get(rq); - i915_request_add(rq); - return rq; - -err_unpin: - intel_context_unpin(ce); - return ERR_PTR(err); -} - -static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) -{ - struct intel_context *ce; - struct i915_request *hang; - int err = 0; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - hang = garbage(ce, prng); - if (IS_ERR(hang)) { - err = PTR_ERR(hang); - goto err_ce; - } - - if (wait_for_submit(engine, hang, HZ / 2)) { - i915_request_put(hang); - err = -ETIME; - goto err_ce; - } - - intel_context_set_banned(ce); - garbage_reset(engine, hang); - - intel_engine_flush_submission(engine); - if (!hang->fence.error) { - i915_request_put(hang); - pr_err("%s: corrupted context was not reset\n", - engine->name); - err = -EINVAL; - goto err_ce; - } - - if (i915_request_wait(hang, 0, HZ / 2) < 0) { - pr_err("%s: corrupted context did not recover\n", - engine->name); - i915_request_put(hang); - err = -EIO; - goto err_ce; - } - i915_request_put(hang); - -err_ce: - intel_context_put(ce); - return err; -} - -static int live_lrc_garbage(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * Verify that we can recover if one context state is completely - * corrupted. - */ - - if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) - return 0; - - for_each_engine(engine, gt, id) { - I915_RND_STATE(prng); - int err = 0, i; - - if (!intel_has_reset_engine(engine->gt)) - continue; - - intel_engine_pm_get(engine); - for (i = 0; i < 3; i++) { - err = __lrc_garbage(engine, &prng); - if (err) - break; - } - intel_engine_pm_put(engine); - - if (igt_flush_test(gt->i915)) - err = -EIO; - if (err) - return err; - } - - return 0; -} - -static int __live_pphwsp_runtime(struct intel_engine_cs *engine) -{ - struct intel_context *ce; - struct i915_request *rq; - IGT_TIMEOUT(end_time); - int err; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - ce->runtime.num_underflow = 0; - ce->runtime.max_underflow = 0; - - do { - unsigned int loop = 1024; - - while (loop) { - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_rq; - } - - if (--loop == 0) - i915_request_get(rq); - - i915_request_add(rq); - } - - if (__igt_timeout(end_time, NULL)) - break; - - i915_request_put(rq); - } while (1); - - err = i915_request_wait(rq, 0, HZ / 5); - if (err < 0) { - pr_err("%s: request not completed!\n", engine->name); - goto err_wait; - } - - igt_flush_test(engine->i915); - - pr_info("%s: pphwsp runtime %lluns, average %lluns\n", - engine->name, - intel_context_get_total_runtime_ns(ce), - intel_context_get_avg_runtime_ns(ce)); - - err = 0; - if (ce->runtime.num_underflow) { - pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", - engine->name, - ce->runtime.num_underflow, - ce->runtime.max_underflow); - GEM_TRACE_DUMP(); - err = -EOVERFLOW; - } - -err_wait: - i915_request_put(rq); -err_rq: - intel_context_put(ce); - return err; -} - -static int live_pphwsp_runtime(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* - * Check that cumulative context runtime as stored in the pphwsp[16] - * is monotonic. - */ - - for_each_engine(engine, gt, id) { - err = __live_pphwsp_runtime(engine); - if (err) - break; - } - - if (igt_flush_test(gt->i915)) - err = -EIO; - - return err; -} - -int intel_lrc_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_lrc_layout), - SUBTEST(live_lrc_fixed), - SUBTEST(live_lrc_state), - SUBTEST(live_lrc_gpr), - SUBTEST(live_lrc_isolation), - SUBTEST(live_lrc_timestamp), - SUBTEST(live_lrc_garbage), - SUBTEST(live_pphwsp_runtime), - SUBTEST(live_lrc_indirect_ctx_bb), - }; - - if (!HAS_LOGICAL_RING_CONTEXTS(i915)) - return 0; - - return intel_gt_live_subtests(tests, &i915->gt); -} diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c new file mode 100644 index 000000000000..b7617731d2cd --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -0,0 +1,1861 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "i915_selftest.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_reset.h" +#include "intel_ring.h" +#include "selftest_engine_heartbeat.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_live_test.h" +#include "selftests/igt_spinner.h" +#include "selftests/lib_sw_fence.h" +#include "shmem_utils.h" + +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) +#define NUM_GPR 16 +#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static bool is_active(struct i915_request *rq) +{ + if (i915_request_is_active(rq)) + return true; + + if (i915_request_on_hold(rq)) + return true; + + if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) + return true; + + return false; +} + +static int wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) +{ + timeout += jiffies; + do { + bool done = time_after(jiffies, timeout); + + if (i915_request_completed(rq)) /* that was quick! */ + return 0; + + /* Wait until the HW has acknowleged the submission (or err) */ + intel_engine_flush_submission(engine); + if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) + return 0; + + if (done) + return -ETIME; + + cond_resched(); + } while (1); +} + +static int emit_semaphore_signal(struct intel_context *ce, void *slot) +{ + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); + struct i915_request *rq; + u32 *cs; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + return 0; +} + +static int context_flush(struct intel_context *ce, long timeout) +{ + struct i915_request *rq; + struct dma_fence *fence; + int err = 0; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (fence) { + i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + rq = i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) + err = -ETIME; + i915_request_put(rq); + + rmb(); /* We know the request is written, make sure all state is too! */ + return err; +} + +static int live_lrc_layout(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 *lrc; + int err; + + /* + * Check the registers offsets we use to create the initial reg state + * match the layout saved by HW. + */ + + lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!lrc) + return -ENOMEM; + + err = 0; + for_each_engine(engine, gt, id) { + u32 *hw; + int dw; + + if (!engine->default_state) + continue; + + hw = shmem_pin_map(engine->default_state); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_OFFSET / sizeof(*hw); + + __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE), + engine->kernel_context, engine, true); + + dw = 0; + do { + u32 lri = hw[dw]; + + if (lri == 0) { + dw++; + continue; + } + + if (lrc[dw] == 0) { + pr_debug("%s: skipped instruction %x at dword %d\n", + engine->name, lri, dw); + dw++; + continue; + } + + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + pr_err("%s: Expected LRI command at dword %d, found %08x\n", + engine->name, dw, lri); + err = -EINVAL; + break; + } + + if (lrc[dw] != lri) { + pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", + engine->name, dw, lri, lrc[dw]); + err = -EINVAL; + break; + } + + lri &= 0x7f; + lri++; + dw++; + + while (lri) { + if (hw[dw] != lrc[dw]) { + pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", + engine->name, dw, hw[dw], lrc[dw]); + err = -EINVAL; + break; + } + + /* + * Skip over the actual register value as we + * expect that to differ. + */ + dw += 2; + lri -= 2; + } + } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + if (err) { + pr_info("%s: HW register image:\n", engine->name); + igt_hexdump(hw, PAGE_SIZE); + + pr_info("%s: SW register image:\n", engine->name); + igt_hexdump(lrc, PAGE_SIZE); + } + + shmem_unpin_map(engine->default_state, hw); + if (err) + break; + } + + kfree(lrc); + return err; +} + +static int find_offset(const u32 *lri, u32 offset) +{ + int i; + + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) + if (lri[i] == offset) + return i; + + return -1; +} + +static int live_lrc_fixed(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check the assumed register offsets match the actual locations in + * the context image. + */ + + for_each_engine(engine, gt, id) { + const struct { + u32 reg; + u32 offset; + const char *name; + } tbl[] = { + { + i915_mmio_reg_offset(RING_START(engine->mmio_base)), + CTX_RING_START - 1, + "RING_START" + }, + { + i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), + CTX_RING_CTL - 1, + "RING_CTL" + }, + { + i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), + CTX_RING_HEAD - 1, + "RING_HEAD" + }, + { + i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), + CTX_RING_TAIL - 1, + "RING_TAIL" + }, + { + i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), + lrc_ring_mi_mode(engine), + "RING_MI_MODE" + }, + { + i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), + CTX_BB_STATE - 1, + "BB_STATE" + }, + { + i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)), + lrc_ring_wa_bb_per_ctx(engine), + "RING_BB_PER_CTX_PTR" + }, + { + i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)), + lrc_ring_indirect_ptr(engine), + "RING_INDIRECT_CTX_PTR" + }, + { + i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)), + lrc_ring_indirect_offset(engine), + "RING_INDIRECT_CTX_OFFSET" + }, + { + i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), + CTX_TIMESTAMP - 1, + "RING_CTX_TIMESTAMP" + }, + { + i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)), + lrc_ring_gpr0(engine), + "RING_CS_GPR0" + }, + { + i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)), + lrc_ring_cmd_buf_cctl(engine), + "RING_CMD_BUF_CCTL" + }, + { }, + }, *t; + u32 *hw; + + if (!engine->default_state) + continue; + + hw = shmem_pin_map(engine->default_state); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_OFFSET / sizeof(*hw); + + for (t = tbl; t->name; t++) { + int dw = find_offset(hw, t->reg); + + if (dw != t->offset) { + pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", + engine->name, + t->name, + t->reg, + dw, + t->offset); + err = -EINVAL; + } + } + + shmem_unpin_map(engine->default_state, hw); + } + + return err; +} + +static int __live_lrc_state(struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + struct i915_gem_ww_ctx ww; + enum { + RING_START_IDX = 0, + RING_TAIL_IDX, + MAX_IDX + }; + u32 expected[MAX_IDX]; + u32 *cs; + int err; + int n; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(scratch->obj, &ww); + if (!err) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto err_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + cs = intel_ring_begin(rq, 4 * MAX_IDX); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_unpin; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); + *cs++ = 0; + + expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); + *cs++ = 0; + + err = i915_request_await_object(rq, scratch->obj, true); + if (!err) + err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); + + i915_request_get(rq); + i915_request_add(rq); + if (err) + goto err_rq; + + intel_engine_flush_submission(engine); + expected[RING_TAIL_IDX] = ce->ring->tail; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < MAX_IDX; n++) { + if (cs[n] != expected[n]) { + pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", + engine->name, n, cs[n], expected[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_unpin: + intel_context_unpin(ce); +err_put: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_context_put(ce); + return err; +} + +static int live_lrc_state(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check the live register state matches what we expect for this + * intel_context. + */ + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + for_each_engine(engine, gt, id) { + err = __live_lrc_state(engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +static int gpr_make_dirty(struct intel_context *ce) +{ + struct i915_request *rq; + u32 *cs; + int n; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = CS_GPR(ce->engine, n); + *cs++ = STACK_MAGIC; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + + return 0; +} + +static struct i915_request * +__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) +{ + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); + struct i915_request *rq; + u32 *cs; + int err; + int n; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); + if (IS_ERR(cs)) { + i915_request_add(rq); + return ERR_CAST(cs); + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = offset; + *cs++ = 0; + + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(ce->engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + } + + i915_vma_lock(scratch); + err = i915_request_await_object(rq, scratch->obj, true); + if (!err) + err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(scratch); + + i915_request_get(rq); + i915_request_add(rq); + if (err) { + i915_request_put(rq); + rq = ERR_PTR(err); + } + + return rq; +} + +static int __live_lrc_gpr(struct intel_engine_cs *engine, + struct i915_vma *scratch, + bool preempt) +{ + u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); + struct intel_context *ce; + struct i915_request *rq; + u32 *cs; + int err; + int n; + + if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) + return 0; /* GPR only on rcs0 for gen8 */ + + err = gpr_make_dirty(engine->kernel_context); + if (err) + return err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + rq = __gpr_read(ce, scratch, slot); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_put; + } + + err = wait_for_submit(engine, rq, HZ / 2); + if (err) + goto err_rq; + + if (preempt) { + err = gpr_make_dirty(engine->kernel_context); + if (err) + goto err_rq; + + err = emit_semaphore_signal(engine->kernel_context, slot); + if (err) + goto err_rq; + } else { + slot[0] = 1; + wmb(); + } + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n]) { + pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", + engine->name, + n / 2, n & 1 ? "udw" : "ldw", + cs[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + memset32(&slot[0], -1, 4); + wmb(); + i915_request_put(rq); +err_put: + intel_context_put(ce); + return err; +} + +static int live_lrc_gpr(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check that GPR registers are cleared in new contexts as we need + * to avoid leaking any information from previous contexts. + */ + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + for_each_engine(engine, gt, id) { + st_engine_heartbeat_disable(engine); + + err = __live_lrc_gpr(engine, scratch, false); + if (err) + goto err; + + err = __live_lrc_gpr(engine, scratch, true); + if (err) + goto err; + +err: + st_engine_heartbeat_enable(engine); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +static struct i915_request * +create_timestamp(struct intel_context *ce, void *slot, int idx) +{ + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); + struct i915_request *rq; + u32 *cs; + int err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + cs = intel_ring_begin(rq, 10); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = offset; + *cs++ = 0; + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = offset + idx * sizeof(u32); + *cs++ = 0; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_MASK; + err = 0; +err: + i915_request_get(rq); + i915_request_add(rq); + if (err) { + i915_request_put(rq); + return ERR_PTR(err); + } + + return rq; +} + +struct lrc_timestamp { + struct intel_engine_cs *engine; + struct intel_context *ce[2]; + u32 poison; +}; + +static bool timestamp_advanced(u32 start, u32 end) +{ + return (s32)(end - start) > 0; +} + +static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) +{ + u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); + struct i915_request *rq; + u32 timestamp; + int err = 0; + + arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; + rq = create_timestamp(arg->ce[0], slot, 1); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + err = wait_for_submit(rq->engine, rq, HZ / 2); + if (err) + goto err; + + if (preempt) { + arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; + err = emit_semaphore_signal(arg->ce[1], slot); + if (err) + goto err; + } else { + slot[0] = 1; + wmb(); + } + + /* And wait for switch to kernel (to save our context to memory) */ + err = context_flush(arg->ce[0], HZ / 2); + if (err) + goto err; + + if (!timestamp_advanced(arg->poison, slot[1])) { + pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", + arg->engine->name, preempt ? "preempt" : "simple", + arg->poison, slot[1]); + err = -EINVAL; + } + + timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); + if (!timestamp_advanced(slot[1], timestamp)) { + pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", + arg->engine->name, preempt ? "preempt" : "simple", + slot[1], timestamp); + err = -EINVAL; + } + +err: + memset32(slot, -1, 4); + i915_request_put(rq); + return err; +} + +static int live_lrc_timestamp(void *arg) +{ + struct lrc_timestamp data = {}; + struct intel_gt *gt = arg; + enum intel_engine_id id; + const u32 poison[] = { + 0, + S32_MAX, + (u32)S32_MAX + 1, + U32_MAX, + }; + + /* + * We want to verify that the timestamp is saved and restore across + * context switches and is monotonic. + * + * So we do this with a little bit of LRC poisoning to check various + * boundary conditions, and see what happens if we preempt the context + * with a second request (carrying more poison into the timestamp). + */ + + for_each_engine(data.engine, gt, id) { + int i, err = 0; + + st_engine_heartbeat_disable(data.engine); + + for (i = 0; i < ARRAY_SIZE(data.ce); i++) { + struct intel_context *tmp; + + tmp = intel_context_create(data.engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err; + } + + data.ce[i] = tmp; + } + + for (i = 0; i < ARRAY_SIZE(poison); i++) { + data.poison = poison[i]; + + err = __lrc_timestamp(&data, false); + if (err) + break; + + err = __lrc_timestamp(&data, true); + if (err) + break; + } + +err: + st_engine_heartbeat_enable(data.engine); + for (i = 0; i < ARRAY_SIZE(data.ce); i++) { + if (!data.ce[i]) + break; + + intel_context_unpin(data.ce[i]); + intel_context_put(data.ce[i]); + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static struct i915_vma * +create_user_vma(struct i915_address_space *vm, unsigned long size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static struct i915_vma * +store_context(struct intel_context *ce, struct i915_vma *scratch) +{ + struct i915_vma *batch; + u32 dw, x, *cs, *hw; + u32 *defaults; + + batch = create_user_vma(ce->vm, SZ_64K); + if (IS_ERR(batch)) + return batch; + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(batch); + return ERR_CAST(cs); + } + + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + + x = 0; + dw = 0; + hw = defaults; + hw += LRC_STATE_OFFSET / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + while (len--) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = hw[dw]; + *cs++ = lower_32_bits(scratch->node.start + x); + *cs++ = upper_32_bits(scratch->node.start + x); + + dw += 2; + x += 4; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + *cs++ = MI_BATCH_BUFFER_END; + + shmem_unpin_map(ce->engine->default_state, defaults); + + i915_gem_object_flush_map(batch->obj); + i915_gem_object_unpin_map(batch->obj); + + return batch; +} + +static int move_to_active(struct i915_request *rq, + struct i915_vma *vma, + unsigned int flags) +{ + int err; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, flags); + if (!err) + err = i915_vma_move_to_active(vma, rq, flags); + i915_vma_unlock(vma); + + return err; +} + +static struct i915_request * +record_registers(struct intel_context *ce, + struct i915_vma *before, + struct i915_vma *after, + u32 *sema) +{ + struct i915_vma *b_before, *b_after; + struct i915_request *rq; + u32 *cs; + int err; + + b_before = store_context(ce, before); + if (IS_ERR(b_before)) + return ERR_CAST(b_before); + + b_after = store_context(ce, after); + if (IS_ERR(b_after)) { + rq = ERR_CAST(b_after); + goto err_before; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto err_after; + + err = move_to_active(rq, before, EXEC_OBJECT_WRITE); + if (err) + goto err_rq; + + err = move_to_active(rq, b_before, 0); + if (err) + goto err_rq; + + err = move_to_active(rq, after, EXEC_OBJECT_WRITE); + if (err) + goto err_rq; + + err = move_to_active(rq, b_after, 0); + if (err) + goto err_rq; + + cs = intel_ring_begin(rq, 14); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(b_before->node.start); + *cs++ = upper_32_bits(b_before->node.start); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(sema); + *cs++ = 0; + *cs++ = MI_NOOP; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(b_after->node.start); + *cs++ = upper_32_bits(b_after->node.start); + + intel_ring_advance(rq, cs); + + WRITE_ONCE(*sema, 0); + i915_request_get(rq); + i915_request_add(rq); +err_after: + i915_vma_put(b_after); +err_before: + i915_vma_put(b_before); + return rq; + +err_rq: + i915_request_add(rq); + rq = ERR_PTR(err); + goto err_after; +} + +static struct i915_vma *load_context(struct intel_context *ce, u32 poison) +{ + struct i915_vma *batch; + u32 dw, *cs, *hw; + u32 *defaults; + + batch = create_user_vma(ce->vm, SZ_64K); + if (IS_ERR(batch)) + return batch; + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(batch); + return ERR_CAST(cs); + } + + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + + dw = 0; + hw = defaults; + hw += LRC_STATE_OFFSET / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + *cs++ = MI_LOAD_REGISTER_IMM(len); + while (len--) { + *cs++ = hw[dw]; + *cs++ = poison; + dw += 2; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + *cs++ = MI_BATCH_BUFFER_END; + + shmem_unpin_map(ce->engine->default_state, defaults); + + i915_gem_object_flush_map(batch->obj); + i915_gem_object_unpin_map(batch->obj); + + return batch; +} + +static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) +{ + struct i915_request *rq; + struct i915_vma *batch; + u32 *cs; + int err; + + batch = load_context(ce, poison); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = move_to_active(rq, batch, 0); + if (err) + goto err_rq; + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(batch->node.start); + *cs++ = upper_32_bits(batch->node.start); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(sema); + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; +err_rq: + i915_request_add(rq); +err_batch: + i915_vma_put(batch); + return err; +} + +static bool is_moving(u32 a, u32 b) +{ + return a != b; +} + +static int compare_isolation(struct intel_engine_cs *engine, + struct i915_vma *ref[2], + struct i915_vma *result[2], + struct intel_context *ce, + u32 poison) +{ + u32 x, dw, *hw, *lrc; + u32 *A[2], *B[2]; + u32 *defaults; + int err = 0; + + A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); + if (IS_ERR(A[0])) + return PTR_ERR(A[0]); + + A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); + if (IS_ERR(A[1])) { + err = PTR_ERR(A[1]); + goto err_A0; + } + + B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); + if (IS_ERR(B[0])) { + err = PTR_ERR(B[0]); + goto err_A1; + } + + B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); + if (IS_ERR(B[1])) { + err = PTR_ERR(B[1]); + goto err_B0; + } + + lrc = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(engine->i915)); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); + goto err_B1; + } + lrc += LRC_STATE_OFFSET / sizeof(*hw); + + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + err = -ENOMEM; + goto err_lrc; + } + + x = 0; + dw = 0; + hw = defaults; + hw += LRC_STATE_OFFSET / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + while (len--) { + if (!is_moving(A[0][x], A[1][x]) && + (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { + switch (hw[dw] & 4095) { + case 0x30: /* RING_HEAD */ + case 0x34: /* RING_TAIL */ + break; + + default: + pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", + engine->name, dw, + hw[dw], hw[dw + 1], + A[0][x], B[0][x], B[1][x], + poison, lrc[dw + 1]); + err = -EINVAL; + } + } + dw += 2; + x++; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + shmem_unpin_map(ce->engine->default_state, defaults); +err_lrc: + i915_gem_object_unpin_map(ce->state->obj); +err_B1: + i915_gem_object_unpin_map(result[1]->obj); +err_B0: + i915_gem_object_unpin_map(result[0]->obj); +err_A1: + i915_gem_object_unpin_map(ref[1]->obj); +err_A0: + i915_gem_object_unpin_map(ref[0]->obj); + return err; +} + +static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) +{ + u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); + struct i915_vma *ref[2], *result[2]; + struct intel_context *A, *B; + struct i915_request *rq; + int err; + + A = intel_context_create(engine); + if (IS_ERR(A)) + return PTR_ERR(A); + + B = intel_context_create(engine); + if (IS_ERR(B)) { + err = PTR_ERR(B); + goto err_A; + } + + ref[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[0])) { + err = PTR_ERR(ref[0]); + goto err_B; + } + + ref[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[1])) { + err = PTR_ERR(ref[1]); + goto err_ref0; + } + + rq = record_registers(A, ref[0], ref[1], sema); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ref1; + } + + WRITE_ONCE(*sema, 1); + wmb(); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_ref1; + } + i915_request_put(rq); + + result[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[0])) { + err = PTR_ERR(result[0]); + goto err_ref1; + } + + result[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[1])) { + err = PTR_ERR(result[1]); + goto err_result0; + } + + rq = record_registers(A, result[0], result[1], sema); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_result1; + } + + err = poison_registers(B, poison, sema); + if (err) { + WRITE_ONCE(*sema, -1); + i915_request_put(rq); + goto err_result1; + } + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_result1; + } + i915_request_put(rq); + + err = compare_isolation(engine, ref, result, A, poison); + +err_result1: + i915_vma_put(result[1]); +err_result0: + i915_vma_put(result[0]); +err_ref1: + i915_vma_put(ref[1]); +err_ref0: + i915_vma_put(ref[0]); +err_B: + intel_context_put(B); +err_A: + intel_context_put(A); + return err; +} + +static bool skip_isolation(const struct intel_engine_cs *engine) +{ + if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) + return true; + + if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) + return true; + + return false; +} + +static int live_lrc_isolation(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const u32 poison[] = { + STACK_MAGIC, + 0x3a3a3a3a, + 0x5c5c5c5c, + 0xffffffff, + 0xffff0000, + }; + int err = 0; + + /* + * Our goal is try and verify that per-context state cannot be + * tampered with by another non-privileged client. + * + * We take the list of context registers from the LRI in the default + * context image and attempt to modify that list from a remote context. + */ + + for_each_engine(engine, gt, id) { + int i; + + /* Just don't even ask */ + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && + skip_isolation(engine)) + continue; + + intel_engine_pm_get(engine); + for (i = 0; i < ARRAY_SIZE(poison); i++) { + int result; + + result = __lrc_isolation(engine, poison[i]); + if (result && !err) + err = result; + + result = __lrc_isolation(engine, ~poison[i]); + if (result && !err) + err = result; + } + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + return err; +} + +static int indirect_ctx_submit_req(struct intel_context *ce) +{ + struct i915_request *rq; + int err = 0; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + + i915_request_put(rq); + + return err; +} + +#define CTX_BB_CANARY_OFFSET (3 * 1024) +#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) + +static u32 * +emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT | + MI_LRI_LRM_CS_MMIO; + *cs++ = i915_mmio_reg_offset(RING_START(0)); + *cs++ = i915_ggtt_offset(ce->state) + + context_wa_bb_offset(ce) + + CTX_BB_CANARY_OFFSET; + *cs++ = 0; + + return cs; +} + +static void +indirect_ctx_bb_setup(struct intel_context *ce) +{ + u32 *cs = context_indirect_bb(ce); + + cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; + + setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); +} + +static bool check_ring_start(struct intel_context *ce) +{ + const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - + LRC_STATE_OFFSET + context_wa_bb_offset(ce); + + if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) + return true; + + pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n", + ctx_bb[CTX_BB_CANARY_INDEX], + ce->lrc_reg_state[CTX_RING_START]); + + return false; +} + +static int indirect_ctx_bb_check(struct intel_context *ce) +{ + int err; + + err = indirect_ctx_submit_req(ce); + if (err) + return err; + + if (!check_ring_start(ce)) + return -EINVAL; + + return 0; +} + +static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) +{ + struct intel_context *a, *b; + int err; + + a = intel_context_create(engine); + if (IS_ERR(a)) + return PTR_ERR(a); + err = intel_context_pin(a); + if (err) + goto put_a; + + b = intel_context_create(engine); + if (IS_ERR(b)) { + err = PTR_ERR(b); + goto unpin_a; + } + err = intel_context_pin(b); + if (err) + goto put_b; + + /* We use the already reserved extra page in context state */ + if (!a->wa_bb_page) { + GEM_BUG_ON(b->wa_bb_page); + GEM_BUG_ON(INTEL_GEN(engine->i915) == 12); + goto unpin_b; + } + + /* + * In order to test that our per context bb is truly per context, + * and executes at the intended spot on context restoring process, + * make the batch store the ring start value to memory. + * As ring start is restored apriori of starting the indirect ctx bb and + * as it will be different for each context, it fits to this purpose. + */ + indirect_ctx_bb_setup(a); + indirect_ctx_bb_setup(b); + + err = indirect_ctx_bb_check(a); + if (err) + goto unpin_b; + + err = indirect_ctx_bb_check(b); + +unpin_b: + intel_context_unpin(b); +put_b: + intel_context_put(b); +unpin_a: + intel_context_unpin(a); +put_a: + intel_context_put(a); + + return err; +} + +static int live_lrc_indirect_ctx_bb(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + for_each_engine(engine, gt, id) { + intel_engine_pm_get(engine); + err = __live_lrc_indirect_ctx_bb(engine); + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + + if (err) + break; + } + + return err; +} + +static void garbage_reset(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (test_and_set_bit(bit, lock)) + return; + + tasklet_disable(&engine->execlists.tasklet); + + if (!rq->fence.error) + intel_engine_reset(engine, NULL); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static struct i915_request *garbage(struct intel_context *ce, + struct rnd_state *prng) +{ + struct i915_request *rq; + int err; + + err = intel_context_pin(ce); + if (err) + return ERR_PTR(err); + + prandom_bytes_state(prng, + ce->lrc_reg_state, + ce->engine->context_size - + LRC_STATE_OFFSET); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + i915_request_get(rq); + i915_request_add(rq); + return rq; + +err_unpin: + intel_context_unpin(ce); + return ERR_PTR(err); +} + +static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) +{ + struct intel_context *ce; + struct i915_request *hang; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + hang = garbage(ce, prng); + if (IS_ERR(hang)) { + err = PTR_ERR(hang); + goto err_ce; + } + + if (wait_for_submit(engine, hang, HZ / 2)) { + i915_request_put(hang); + err = -ETIME; + goto err_ce; + } + + intel_context_set_banned(ce); + garbage_reset(engine, hang); + + intel_engine_flush_submission(engine); + if (!hang->fence.error) { + i915_request_put(hang); + pr_err("%s: corrupted context was not reset\n", + engine->name); + err = -EINVAL; + goto err_ce; + } + + if (i915_request_wait(hang, 0, HZ / 2) < 0) { + pr_err("%s: corrupted context did not recover\n", + engine->name); + i915_request_put(hang); + err = -EIO; + goto err_ce; + } + i915_request_put(hang); + +err_ce: + intel_context_put(ce); + return err; +} + +static int live_lrc_garbage(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Verify that we can recover if one context state is completely + * corrupted. + */ + + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) + return 0; + + for_each_engine(engine, gt, id) { + I915_RND_STATE(prng); + int err = 0, i; + + if (!intel_has_reset_engine(engine->gt)) + continue; + + intel_engine_pm_get(engine); + for (i = 0; i < 3; i++) { + err = __lrc_garbage(engine, &prng); + if (err) + break; + } + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static int __live_pphwsp_runtime(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + IGT_TIMEOUT(end_time); + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + ce->runtime.num_underflow = 0; + ce->runtime.max_underflow = 0; + + do { + unsigned int loop = 1024; + + while (loop) { + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_rq; + } + + if (--loop == 0) + i915_request_get(rq); + + i915_request_add(rq); + } + + if (__igt_timeout(end_time, NULL)) + break; + + i915_request_put(rq); + } while (1); + + err = i915_request_wait(rq, 0, HZ / 5); + if (err < 0) { + pr_err("%s: request not completed!\n", engine->name); + goto err_wait; + } + + igt_flush_test(engine->i915); + + pr_info("%s: pphwsp runtime %lluns, average %lluns\n", + engine->name, + intel_context_get_total_runtime_ns(ce), + intel_context_get_avg_runtime_ns(ce)); + + err = 0; + if (ce->runtime.num_underflow) { + pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", + engine->name, + ce->runtime.num_underflow, + ce->runtime.max_underflow); + GEM_TRACE_DUMP(); + err = -EOVERFLOW; + } + +err_wait: + i915_request_put(rq); +err_rq: + intel_context_put(ce); + return err; +} + +static int live_pphwsp_runtime(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check that cumulative context runtime as stored in the pphwsp[16] + * is monotonic. + */ + + for_each_engine(engine, gt, id) { + err = __live_pphwsp_runtime(engine); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + return err; +} + +int intel_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_fixed), + SUBTEST(live_lrc_state), + SUBTEST(live_lrc_gpr), + SUBTEST(live_lrc_isolation), + SUBTEST(live_lrc_timestamp), + SUBTEST(live_lrc_garbage), + SUBTEST(live_pphwsp_runtime), + SUBTEST(live_lrc_indirect_ctx_bb), + }; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 1a2e4f631763..17526717368c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -3,8 +3,8 @@ * Copyright © 2014-2019 Intel Corporation */ -#include "gt/intel_execlists_submission.h" /* lrc layout */ #include "gt/intel_gt.h" +#include "gt/intel_lrc.h" #include "intel_guc_ads.h" #include "intel_uc.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 8528ab574dbe..694ee424b4ee 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -11,7 +11,7 @@ #include "gt/intel_execlists_submission.h" /* XXX */ #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "gt/intel_lrc_reg.h" +#include "gt/intel_lrc.h" #include "gt/intel_ring.h" #include "intel_guc_submission.h" @@ -402,6 +402,28 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) memset(execlists->inflight, 0, sizeof(execlists->inflight)); } +static void guc_reset_state(struct intel_context *ce, + struct intel_engine_cs *engine, + u32 head, + bool scrub) +{ + GEM_BUG_ON(!intel_context_is_pinned(ce)); + + /* + * We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ + if (scrub) + lrc_init_regs(ce, engine, true); + + /* Rerun the request; its payload has been neutered (if guilty). */ + lrc_update_regs(ce, engine, head); +} + static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -421,7 +443,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) stalled = false; __i915_request_reset(rq, stalled); - intel_lr_context_reset(engine, rq->context, rq->head, stalled); + guc_reset_state(rq->context, engine, rq->head, stalled); out_unlock: spin_unlock_irqrestore(&engine->active.lock, flags); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index ed30fdde4114..6af5c06caee0 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -38,6 +38,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" #include "gt/intel_execlists_submission.h" +#include "gt/intel_lrc.h" #include "gt/intel_ring.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 58caa3f1a38b..f65c32bd970e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -201,7 +201,7 @@ #include "gt/intel_execlists_submission.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" -#include "gt/intel_lrc_reg.h" +#include "gt/intel_lrc.h" #include "gt/intel_ring.h" #include "i915_drv.h" -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx