Quoting Tvrtko Ursulin (2018-12-14 12:34:49) > +static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) > +{ > + struct drm_i915_gem_object *obj; > + u32 *cmd; > + int err; > + > + if (INTEL_GEN(vma->vm->i915) < 8) > + return ERR_PTR(-EINVAL); > + > + obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); > + if (IS_ERR(obj)) > + return ERR_CAST(obj); > + > + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); > + if (IS_ERR(cmd)) { > + err = PTR_ERR(cmd); > + goto err; > + } > + > + *cmd++ = MI_STORE_REGISTER_MEM_GEN8; > + *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); > + *cmd++ = lower_32_bits(vma->node.start); > + *cmd++ = upper_32_bits(vma->node.start); > + *cmd = MI_BATCH_BUFFER_END; > + > + i915_gem_object_unpin_map(obj); > + > + err = i915_gem_object_set_to_gtt_domain(obj, false); > + if (err) > + goto err; > + > + vma = i915_vma_instance(obj, vma->vm, NULL); > + if (IS_ERR(vma)) { > + err = PTR_ERR(vma); > + goto err; > + } > + > + err = i915_vma_pin(vma, 0, 0, PIN_USER); > + if (err) > + goto err; > + > + return vma; > + > +err: > + i915_gem_object_put(obj); > + return ERR_PTR(err); > +} > + > +static int > +emit_rpcs_query(struct drm_i915_gem_object *obj, > + struct i915_gem_context *ctx, > + struct intel_engine_cs *engine, > + struct i915_request **rq_out) > +{ > + struct i915_address_space *vm; > + struct i915_request *rq; > + struct i915_vma *batch; > + struct i915_vma *vma; > + int err; > + > + GEM_BUG_ON(!ctx->ppgtt); > + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); > + > + vm = &ctx->ppgtt->vm; > + > + vma = i915_vma_instance(obj, vm, NULL); > + if (IS_ERR(vma)) > + return PTR_ERR(vma); > + > + err = i915_gem_object_set_to_gtt_domain(obj, false); > + if (err) > + return err; > + > + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); > + if (err) > + return err; > + > + batch = rpcs_query_batch(vma); > + if (IS_ERR(batch)) { > + err = PTR_ERR(batch); > + goto err_vma; > + } Simpler would be to just emit the SRM into the ring. One less object+vma to handle. > + > + rq = i915_request_alloc(engine, ctx); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto err_batch; > + } > + > + err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0); > + if (err) > + goto err_request; > + > + err = i915_vma_move_to_active(batch, rq, 0); > + if (err) > + goto skip_request; > + > + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); > + if (err) > + goto skip_request; > + > + i915_gem_object_set_active_reference(batch->obj); > + i915_vma_unpin(batch); > + i915_vma_close(batch); > + > + i915_vma_unpin(vma); > + > + *rq_out = i915_request_get(rq); > + > + i915_request_add(rq); > + > + return 0; > + > +skip_request: > + i915_request_skip(rq, err); > +err_request: > + i915_request_add(rq); > +err_batch: > + i915_vma_unpin(batch); > +err_vma: > + i915_vma_unpin(vma); > + > + return err; > +} > + > +#define TEST_IDLE (1 << 0) > +#define TEST_BUSY (1 << 1) > +#define TEST_RESET (1 << 2) > + > +int > +__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, > + struct intel_engine_cs *engine, > + struct intel_sseu sseu); > + > +static int > +__pre_set(struct drm_i915_private *i915, > + unsigned int flags, > + struct i915_gem_context *ctx, > + struct intel_engine_cs *engine, > + struct igt_spinner **spin_out) > +{ > + int ret = 0; > + > + if (flags & (TEST_BUSY | TEST_RESET)) { > + struct igt_spinner *spin; > + struct i915_request *rq; > + > + spin = kzalloc(sizeof(*spin), GFP_KERNEL); > + if (!spin) { > + ret = -ENOMEM; > + goto out; > + } > + > + ret = igt_spinner_init(spin, i915); > + if (ret) > + return ret; > + > + rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); > + if (IS_ERR(rq)) { > + ret = PTR_ERR(rq); > + igt_spinner_fini(spin); > + kfree(spin); > + goto out; > + } > + > + i915_request_add(rq); > + > + if (!igt_wait_for_spinner(spin, rq)) { > + pr_err("Spinner failed to start!\n"); > + igt_spinner_end(spin); > + igt_spinner_fini(spin); > + kfree(spin); > + ret = -ETIMEDOUT; > + goto out; > + } > + > + *spin_out = spin; > + } > + > +out: > + return ret; > +} > + > +static int > +__read_slice_count(struct drm_i915_private *i915, > + struct i915_gem_context *ctx, > + struct intel_engine_cs *engine, > + struct drm_i915_gem_object *obj, > + struct igt_spinner *spin, > + u32 *rpcs) > +{ > + struct i915_request *rq = NULL; > + u32 s_mask, s_shift; > + unsigned int cnt; > + u32 *buf, val; > + long ret; > + > + ret = emit_rpcs_query(obj, ctx, engine, &rq); > + if (ret) > + return ret; > + > + if (spin) > + igt_spinner_end(spin); > + > + ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); > + if (ret <= 0) > + return ret < 0 ? ret : -ETIME; > + > + i915_request_put(rq); > + > + buf = i915_gem_object_pin_map(obj, I915_MAP_WB); > + if (IS_ERR(buf)) { > + ret = PTR_ERR(buf); > + return ret; > + } You can replace the get + request_wait + put with i915_gem_obj_prepare_shmem_read. Yes the _shmem_ there is misleading. > + > + if (INTEL_GEN(i915) >= 11) { > + s_mask = GEN11_RPCS_S_CNT_MASK; > + s_shift = GEN11_RPCS_S_CNT_SHIFT; > + } else { > + s_mask = GEN8_RPCS_S_CNT_MASK; > + s_shift = GEN8_RPCS_S_CNT_SHIFT; > + } > + > + val = *buf; > + cnt = (val & s_mask) >> s_shift; > + *rpcs = val; > + > + i915_gem_object_unpin_map(obj); > + > + return cnt; > +} > + > +static void __err_rpcs(u32 rpcs, unsigned int slices) > +{ > + pr_info("RPCS=0x%x; %u%sx%u%s\n", > + rpcs, slices, > + (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", > + (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, > + (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); > +} > + > +static int > +__post_set(struct drm_i915_private *i915, > + unsigned int flags, > + struct i915_gem_context *ctx, > + struct i915_gem_context *kctx, > + struct intel_engine_cs *engine, > + struct drm_i915_gem_object *obj, > + unsigned int expected, > + struct igt_spinner *spin) > +{ > + unsigned int slices = > + hweight32(intel_device_default_sseu(i915).slice_mask); > + u32 rpcs = 0; > + int ret = 0; > + > + if (flags & TEST_RESET) { > + ret = i915_reset_engine(engine, "sseu"); > + if (ret) > + goto out; > + } > + > + ret = __read_slice_count(i915, ctx, engine, obj, > + flags & TEST_RESET ? NULL : spin, &rpcs); > + if (ret < 0) { > + goto out; > + } else if (ret != expected) { > + pr_err("Context slice count %d is not %u!\n", > + ret, expected); Include stage + phase name (idle, busy, reset, etc) in the error message. > + __err_rpcs(rpcs, ret); > + ret = -EINVAL; > + goto out; > + } else { > + ret = 0; > + } > + > + ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs); > + if (ret < 0) { > + goto out; > + } else if (ret != slices) { > + pr_err("Kernel context slice count %d is not %u!\n", > + ret, slices); > + __err_rpcs(rpcs, ret); > + ret = -EINVAL; > + goto out; > + } else { > + ret = 0; > + } > + > +out: > + if (spin) > + igt_spinner_end(spin); > + > + if (flags & TEST_IDLE) { > + ret = i915_gem_wait_for_idle(i915, > + I915_WAIT_LOCKED, > + MAX_SCHEDULE_TIMEOUT); > + if (ret) > + return ret; Do we want to go full idle (rpm) here? That shouldn't matter for ctx images, so forget that. Hmm, but we would like to kickstart the powercontext I guess. I wonder if we can call __i915_gem_park directly... > + > + ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs); > + if (ret < 0) { > + return ret; > + } else if (ret != expected) { > + pr_err("Context slice count %d is not %u after idle!\n", > + ret, expected); > + __err_rpcs(rpcs, ret); > + return -EINVAL; > + } else { > + ret = 0; > + } > + } > + > + return ret; Ok, that should check across context switches and restoration from reset. And reset should work so long as rpcs SRM as separate requests from user batches. > + > +} > + > +static int > +__sseu_test(struct drm_i915_private *i915, > + unsigned int flags, > + struct i915_gem_context *ctx, > + struct intel_engine_cs *engine, > + struct drm_i915_gem_object *obj, > + struct intel_sseu sseu) > +{ > + struct igt_spinner *spin = NULL; > + struct i915_gem_context *kctx; > + int ret; > + > + kctx = kernel_context(i915); > + if (IS_ERR(kctx)) > + return PTR_ERR(kctx); > + > + ret = __pre_set(i915, flags, ctx, engine, &spin); __sseu_prepare & __sseu_finish Just thinking that pre_set is a little generic and you'll wish you called it something else next time you add a selftest. > + if (ret) > + goto out; > + > + ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu); > + if (ret) > + goto out; > + > + ret = __post_set(i915, flags, ctx, kctx, engine, obj, > + hweight32(sseu.slice_mask), spin); > + > +out: > + if (spin) { > + igt_spinner_end(spin); > + igt_spinner_fini(spin); > + kfree(spin); > + } > + > + kernel_context_close(kctx); > + > + return ret; > +} > + > +static int __igt_ctx_sseu(struct drm_i915_private *i915, unsigned int flags) > +{ > + struct intel_sseu default_sseu = intel_device_default_sseu(i915); > + struct intel_engine_cs *engine = i915->engine[RCS]; > + struct drm_i915_gem_object *obj; > + struct i915_gem_context *ctx; > + struct intel_sseu pg_sseu; > + struct drm_file *file; > + int ret; > + > + if (INTEL_GEN(i915) < 9) > + return 0; > + > + if (!INTEL_INFO(i915)->sseu.has_slice_pg) > + return 0; > + > + if (hweight32(default_sseu.slice_mask) < 2) > + return 0; > + > + /* > + * Gen11 VME friendly power-gated configuration with half enabled > + * sub-slices. > + */ > + pg_sseu = default_sseu; > + pg_sseu.slice_mask = 1; > + pg_sseu.subslice_mask = > + ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); > + > + pr_info("SSE subtest flags=%x, def_slices=%u, pg_slices=%u\n", > + flags, hweight32(default_sseu.slice_mask), > + hweight32(pg_sseu.slice_mask)); > + > + file = mock_file(i915); > + if (IS_ERR(file)) > + return PTR_ERR(file); > + > + if (flags & TEST_RESET) > + igt_global_reset_lock(i915); > + > + mutex_lock(&i915->drm.struct_mutex); > + > + ctx = i915_gem_create_context(i915, file->driver_priv); > + if (IS_ERR(ctx)) { > + ret = PTR_ERR(ctx); > + goto out_unlock; > + } > + > + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); > + if (IS_ERR(obj)) { > + ret = PTR_ERR(obj); > + goto out_unlock; > + } > + > + intel_runtime_pm_get(i915); > + > + /* First set the default mask. */ > + ret = __sseu_test(i915, flags, ctx, engine, obj, default_sseu); > + if (ret) > + goto out_fail; > + > + /* Then set a power-gated configuration. */ > + ret = __sseu_test(i915, flags, ctx, engine, obj, pg_sseu); > + if (ret) > + goto out_fail; > + > + /* Back to defaults. */ > + ret = __sseu_test(i915, flags, ctx, engine, obj, default_sseu); > + if (ret) > + goto out_fail; > + > + /* One last power-gated configuration for the road. */ > + ret = __sseu_test(i915, flags, ctx, engine, obj, pg_sseu); > + if (ret) > + goto out_fail; > + > +out_fail: > + if (igt_flush_test(i915, I915_WAIT_LOCKED)) > + ret = -EIO; > + > + i915_gem_object_put(obj); > + > + intel_runtime_pm_put(i915); > + > +out_unlock: > + mutex_unlock(&i915->drm.struct_mutex); > + > + if (flags & TEST_RESET) > + igt_global_reset_unlock(i915); > + > + mock_file_free(i915, file); > + > + return ret; > +} > + > +static int igt_ctx_sseu(void *arg) > +{ > + return __igt_ctx_sseu(arg, 0); > +} > + > +static int igt_ctx_sseu_idle(void *arg) > +{ > + return __igt_ctx_sseu(arg, TEST_IDLE); > +} > + > +static int igt_ctx_sseu_busy(void *arg) > +{ > + return __igt_ctx_sseu(arg, TEST_BUSY); > +} > + > +static int igt_ctx_sseu_busy_reset(void *arg) > +{ > + return __igt_ctx_sseu(arg, TEST_BUSY | TEST_RESET); > +} > + > +static int igt_ctx_sseu_busy_idle(void *arg) > +{ > + return __igt_ctx_sseu(arg, TEST_BUSY | TEST_IDLE); > +} > + > +static int igt_ctx_sseu_reset_idle(void *arg) > +{ > + return __igt_ctx_sseu(arg, TEST_RESET | TEST_IDLE); > +} Looks ok. The only point I would like to reiterate is to pass down the subtest names so that we can include them in the error messages for easier comprehension. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx