Based on upstream feedback [1] the current set_parallel extension isn't suitable. Add a single patch to DII implementing the new interface agreed two upstream [2]. Intended to enable the UMDs with the upstream interface while maintaining the old interface on DII. Quick IGT to prove this is working should be list shortly. v2: Move single patch in GuC section on pile, align with agreed to upstream interface, only include prelim* definitions. v3: Enable set_parallel2 via SET_PARAM IOCTL, resend for CI v4: Fix regression when patch was merge - only do parallel checks on user engine sets Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> [1] https://patchwork.freedesktop.org/patch/432205/?series=89840&rev=1 [2] https://patchwork.freedesktop.org/patch/438911/?series=91417&rev=1 Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> --- baseline: b7227afd06bac1fe6719136e2ddd2bfed1d85feb pile-commit: b7a2c9136977a385659a71df837cbe5a1f775b32 range-diff: -: ------------ > 930: ad12b87b91af INTEL_DII/NOT_UPSTREAM: drm/i915: Introduce set_parallel2 extension 1083: 73e59e150cde ! 1084: 79b296835b1c INTEL_DII/FIXME: drm/i915/perf: add a parameter to control the size of OA buffer 1120: edbc20ae1355 ! 1121: 30d02d618229 INTEL_DII/FIXME: drm/i915: Add context parameter for debug flags 1293: 997b317fc408 ! 1294: 016b5903b0a0 INTEL_DII: drm/i915/perf: Add OA formats for XEHPSDV 1364: 136064b76b92 ! 1365: 5f564d553dc8 INTEL_DII: drm/i915/xehpsdv: Expand total numbers of supported engines up to 256 1403: 67b729033e82 ! 1404: 4398a2322f2f INTEL_DII: drm/i915/xehpsdv: Impose ULLS context restrictions 1405: b8dd2a22a952 ! 1406: dd2fab232cf1 INTEL_DII: drm/i915: Add context methods to suspend and resume requests 1670: b4633106fa13 ! 1671: 53b4a54ee2cc INTEL_DII: drm/i915/pxp: interface for marking contexts as using protected content 1671: 22369ab70556 ! 1672: 42234590cdf5 INTEL_DII: drm/i915/pxp: start the arb session on demand series | 1 + ...IXME-drm-i915-perf-add-a-parameter-to-con.patch | 4 +- ...IXME-drm-i915-Add-context-parameter-for-d.patch | 18 +- ...-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch | 4 +- ...rm-i915-xehpsdv-Expand-total-numbers-of-s.patch | 2 +- ...rm-i915-xehpsdv-Impose-ULLS-context-restr.patch | 12 +- ...rm-i915-Add-context-methods-to-suspend-an.patch | 38 +- ...rm-i915-pxp-interface-for-marking-context.patch | 16 +- ...rm-i915-pxp-start-the-arb-session-on-dema.patch | 2 +- ...OT_UPSTREAM-drm-i915-Introduce-set_parall.patch | 676 +++++++++++++++++++++ 10 files changed, 725 insertions(+), 48 deletions(-) diff --git a/series b/series index 8b77d52df40c..7db508ea974d 100644 --- a/series +++ b/series @@ -929,6 +929,7 @@ 0001-INTEL_DII-drm-i915-guc-Increase-GuC-log-size-for-CON.patch 0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Dump-error-capture-t.patch 0001-INTEL_DII-NOT_UPSTREAM-drm-i915-guc-Dump-error-captu.patch +0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch 0001-INTEL_DII-END-GuC-submission-and-slpc-support.patch 0001-INTEL_DII-BEGIN-SR-IOV-ENABLING.patch 0001-INTEL_DII-drm-i915-guc-Update-GuC-to-62.0.3.patch diff --git a/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch b/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch index dd654f144374..b7a637b3813f 100644 --- a/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch +++ b/0001-INTEL_DII-FIXME-drm-i915-perf-add-a-parameter-to-con.patch @@ -384,8 +384,8 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h --- a/include/uapi/drm/i915_drm_prelim.h +++ b/include/uapi/drm/i915_drm_prelim.h -@@ -393,6 +393,36 @@ struct prelim_i915_context_param_engines { - #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ +@@ -508,6 +508,36 @@ struct prelim_i915_context_param_engines { + #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */ }; +enum prelim_drm_i915_perf_property_id { diff --git a/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch b/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch index dfd5790ac2b8..71a5943b5536 100644 --- a/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch +++ b/0001-INTEL_DII-FIXME-drm-i915-Add-context-parameter-for-d.patch @@ -44,7 +44,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ } static void __free_engines(struct i915_gem_engines *e, unsigned int count) -@@ -2252,6 +2255,76 @@ static int set_priority(struct i915_gem_context *ctx, +@@ -2436,6 +2439,76 @@ static int set_priority(struct i915_gem_context *ctx, return 0; } @@ -121,7 +121,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) -@@ -2321,6 +2394,11 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, +@@ -2505,6 +2578,11 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_ringsize(ctx, args); break; @@ -133,7 +133,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; -@@ -2777,6 +2855,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, +@@ -2961,6 +3039,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ringsize(ctx, args); break; @@ -184,7 +184,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h -@@ -285,6 +285,24 @@ intel_context_clear_nopreempt(struct intel_context *ce) +@@ -296,6 +296,24 @@ intel_context_clear_nopreempt(struct intel_context *ce) ce->emit_bb_start = ce->engine->emit_bb_start; } @@ -212,19 +212,19 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/i diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h -@@ -114,6 +114,7 @@ struct intel_context { - #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 +@@ -115,6 +115,7 @@ struct intel_context { #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 -+#define CONTEXT_DEBUG 10 + #define CONTEXT_NO_PREEMPT_MID_BATCH 10 ++#define CONTEXT_DEBUG 11 struct { u64 timeout_us; diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h --- a/include/uapi/drm/i915_drm_prelim.h +++ b/include/uapi/drm/i915_drm_prelim.h -@@ -395,6 +395,32 @@ struct prelim_i915_context_param_engines { - #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ +@@ -510,6 +510,32 @@ struct prelim_i915_context_param_engines { + #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */ }; +struct prelim_drm_i915_gem_context_param { diff --git a/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch b/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch index 19a07b3926ae..f62d7848e091 100644 --- a/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch +++ b/0001-INTEL_DII-drm-i915-perf-Add-OA-formats-for-XEHPSDV.patch @@ -204,8 +204,8 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h --- a/include/uapi/drm/i915_drm_prelim.h +++ b/include/uapi/drm/i915_drm_prelim.h -@@ -435,6 +435,27 @@ struct prelim_i915_context_param_engines { - #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ +@@ -550,6 +550,27 @@ struct prelim_i915_context_param_engines { + #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */ }; +enum prelim_drm_i915_oa_format { diff --git a/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch b/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch index 05a84884a3d1..ee486b95d11e 100644 --- a/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch +++ b/0001-INTEL_DII-drm-i915-xehpsdv-Expand-total-numbers-of-s.patch @@ -76,7 +76,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9 /* Kernel clipping was a DRI1 misfeature */ if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { -@@ -3233,9 +3235,12 @@ eb_select_engine(struct i915_execbuffer *eb) +@@ -3233,9 +3235,12 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number) int err; if (i915_gem_context_user_engines(eb->gem_context)) diff --git a/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch b/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch index 38ad84c4dc12..80880e3008cc 100644 --- a/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch +++ b/0001-INTEL_DII-drm-i915-xehpsdv-Impose-ULLS-context-restr.patch @@ -76,7 +76,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9 if (intel_context_nopreempt(eb->context) || intel_context_debug(eb->context)) __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); -@@ -3453,6 +3462,13 @@ static int eb_request_add(struct i915_execbuffer *eb, int err) +@@ -3463,6 +3472,13 @@ static int eb_request_add(struct i915_execbuffer *eb, int err) trace_i915_request_add(rq); @@ -90,7 +90,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9 prev = __i915_request_commit(rq); /* Check that the context wasn't destroyed before submission */ -@@ -3531,6 +3547,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, +@@ -3541,6 +3557,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, int err; bool first = batch_number == 0; bool last = batch_number + 1 == num_batches; @@ -98,7 +98,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & -@@ -3582,6 +3599,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, +@@ -3592,6 +3609,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; @@ -109,7 +109,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9 + goto err_context; + } + - err = eb_select_engine(&eb); + err = eb_select_engine(&eb, batch_number); if (unlikely(err)) goto err_context; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -239,7 +239,7 @@ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_req /* * Requests on the same timeline are explicitly ordered, along * with their dependencies, by i915_request_add() which ensures -@@ -2126,6 +2181,7 @@ long i915_request_wait(struct i915_request *rq, +@@ -2121,6 +2176,7 @@ long i915_request_wait(struct i915_request *rq, { might_sleep(); GEM_BUG_ON(timeout < 0); @@ -247,7 +247,7 @@ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_req if (dma_fence_is_signaled(&rq->fence)) return timeout; -@@ -2331,6 +2387,8 @@ static struct i915_global_request global = { { +@@ -2326,6 +2382,8 @@ static struct i915_global_request global = { { int __init i915_global_request_init(void) { diff --git a/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch b/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch index 7d523c8dadba..44fd93184b8a 100644 --- a/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch +++ b/0001-INTEL_DII-drm-i915-Add-context-methods-to-suspend-an.patch @@ -52,7 +52,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { -@@ -475,6 +481,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) +@@ -476,6 +482,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->guc_id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id_link); @@ -62,7 +62,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i i915_active_init(&ce->active, __intel_context_active, __intel_context_retire); } -@@ -485,6 +494,7 @@ void intel_context_fini(struct intel_context *ce) +@@ -486,6 +495,7 @@ void intel_context_fini(struct intel_context *ce) if (ce->last_rq) i915_request_put(ce->last_rq); @@ -73,7 +73,7 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/i diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h -@@ -252,6 +252,54 @@ static inline bool intel_context_ban(struct intel_context *ce, +@@ -263,6 +263,54 @@ static inline bool intel_context_ban(struct intel_context *ce, return ret; } @@ -152,10 +152,10 @@ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i91 void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce); -@@ -241,6 +248,9 @@ struct intel_context { +@@ -245,6 +252,9 @@ struct intel_context { - /* Last request submitted on a parent */ - struct i915_request *last_rq; + /* Parallel submit mutex */ + struct mutex parallel_submit; + + /* GuC context blocked fence */ + struct i915_sw_fence guc_blocked; @@ -231,7 +231,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm if (!enabled) { GEM_BUG_ON(context_pending_enable(ce)); -@@ -1103,6 +1137,8 @@ static void __guc_context_destroy(struct intel_context *ce); +@@ -1102,6 +1136,8 @@ static void __guc_context_destroy(struct intel_context *ce); static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); static void guc_signal_context_fence(struct intel_context *ce); static void guc_cancel_context_requests(struct intel_context *ce); @@ -240,7 +240,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) { -@@ -1143,6 +1179,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) +@@ -1142,6 +1178,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) /* Not mutualy exclusive with above if statement. */ if (pending_disable) { @@ -249,7 +249,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm guc_signal_context_fence(ce); if (banned) { guc_cancel_context_requests(ce); -@@ -1150,7 +1188,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) +@@ -1149,7 +1187,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) } intel_context_sched_disable_unpin(ce); atomic_dec(&guc->outstanding_submission_g2h); @@ -262,7 +262,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm } } } -@@ -2549,6 +2592,22 @@ static void guc_parent_context_unpin(struct intel_context *ce) +@@ -2551,6 +2594,22 @@ static void guc_parent_context_unpin(struct intel_context *ce) __guc_context_unpin(ce); } @@ -285,7 +285,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm static void __guc_context_sched_disable(struct intel_guc *guc, struct intel_context *ce, u16 guc_id) -@@ -2576,10 +2635,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc, +@@ -2578,10 +2637,13 @@ static void __guc_context_sched_disable(struct intel_guc *guc, G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); } @@ -299,7 +299,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm intel_context_get(ce); return ce->guc_id; -@@ -2677,6 +2739,132 @@ static void guc_context_sched_disable(struct intel_context *ce) +@@ -2679,6 +2741,132 @@ static void guc_context_sched_disable(struct intel_context *ce) intel_context_sched_disable_unpin(ce); } @@ -432,7 +432,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm int intel_guc_modify_scheduling(struct intel_guc *guc, bool enable) { struct intel_gt *gt = guc_to_gt(guc); -@@ -2991,6 +3179,9 @@ static const struct intel_context_ops guc_context_ops = { +@@ -2993,6 +3181,9 @@ static const struct intel_context_ops guc_context_ops = { .ban = guc_context_ban, @@ -442,7 +442,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm .enter = intel_context_enter_engine, .exit = guc_context_exit, -@@ -3380,6 +3571,9 @@ static const struct intel_context_ops virtual_guc_context_ops = { +@@ -3382,6 +3573,9 @@ static const struct intel_context_ops virtual_guc_context_ops = { .ban = guc_context_ban, @@ -452,7 +452,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm .enter = guc_virtual_context_enter, .exit = guc_virtual_context_exit, -@@ -3457,6 +3651,9 @@ static const struct intel_context_ops parent_context_ops = { +@@ -3459,6 +3653,9 @@ static const struct intel_context_ops parent_context_ops = { .ban = guc_context_ban, @@ -462,7 +462,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, -@@ -3476,6 +3673,9 @@ static const struct intel_context_ops virtual_parent_context_ops = { +@@ -3478,6 +3675,9 @@ static const struct intel_context_ops virtual_parent_context_ops = { .ban = guc_context_ban, @@ -472,7 +472,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm .enter = guc_virtual_context_enter, .exit = guc_virtual_context_exit, -@@ -3487,6 +3687,9 @@ static const struct intel_context_ops virtual_parent_context_ops = { +@@ -3489,6 +3689,9 @@ static const struct intel_context_ops virtual_parent_context_ops = { static const struct intel_context_ops child_context_ops = { .alloc = guc_context_alloc, @@ -482,7 +482,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm .enter = intel_context_enter_engine, .exit = guc_context_exit, -@@ -3497,6 +3700,9 @@ static const struct intel_context_ops child_context_ops = { +@@ -3499,6 +3702,9 @@ static const struct intel_context_ops child_context_ops = { static const struct intel_context_ops virtual_child_context_ops = { .alloc = guc_virtual_context_alloc, @@ -492,7 +492,7 @@ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm .enter = guc_virtual_context_enter, .exit = guc_virtual_context_exit, -@@ -4440,6 +4646,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, +@@ -4441,6 +4647,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, clr_context_banned(ce); clr_context_pending_disable(ce); __guc_signal_context_fence(ce); diff --git a/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch b/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch index 6b38bd36d21b..8a6b9561eb24 100644 --- a/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch +++ b/0001-INTEL_DII-drm-i915-pxp-interface-for-marking-context.patch @@ -56,7 +56,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ #include "i915_gem_context.h" #include "i915_gem_ioctls.h" #include "i915_globals.h" -@@ -2574,6 +2576,40 @@ static int set_acc(struct i915_gem_context *ctx, +@@ -2769,6 +2771,40 @@ static int set_acc(struct i915_gem_context *ctx, return 0; } @@ -97,7 +97,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args, -@@ -2607,6 +2643,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, +@@ -2802,6 +2838,8 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = -EPERM; else if (args->value) i915_gem_context_set_bannable(ctx); @@ -106,7 +106,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ else i915_gem_context_clear_bannable(ctx); break; -@@ -2614,10 +2652,12 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, +@@ -2809,10 +2847,12 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -122,7 +122,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ break; case I915_CONTEXT_PARAM_PRIORITY: -@@ -2664,6 +2704,9 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, +@@ -2865,6 +2905,9 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_DEBUG_FLAGS: ret = set_debug_flags(ctx, args); break; @@ -132,7 +132,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ case I915_CONTEXT_PARAM_BAN_PERIOD: default: -@@ -3157,6 +3200,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, +@@ -3358,6 +3401,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_DEBUG_FLAGS: ret = get_debug_flags(ctx, args); break; @@ -142,7 +142,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/ case I915_CONTEXT_PARAM_BAN_PERIOD: default: -@@ -3281,6 +3327,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, +@@ -3482,6 +3528,11 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, args->batch_active = atomic_read(&ctx->guilty_count); args->batch_pending = atomic_read(&ctx->active_count); @@ -225,7 +225,7 @@ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i9 eb->gem_context = ctx; if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; -@@ -3301,6 +3308,17 @@ eb_select_engine(struct i915_execbuffer *eb) +@@ -3311,6 +3318,17 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number) intel_gt_pm_get(ce->engine->gt); @@ -348,7 +348,7 @@ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h --- a/include/uapi/drm/i915_drm_prelim.h +++ b/include/uapi/drm/i915_drm_prelim.h -@@ -893,6 +893,26 @@ struct prelim_drm_i915_gem_context_param { +@@ -1003,6 +1003,26 @@ struct prelim_drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_ACC 0xd }; diff --git a/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch b/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch index 5ee627b00811..4b4326057959 100644 --- a/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch +++ b/0001-INTEL_DII-drm-i915-pxp-start-the-arb-session-on-dema.patch @@ -22,7 +22,7 @@ Reviewed-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c -@@ -3309,9 +3309,11 @@ eb_select_engine(struct i915_execbuffer *eb) +@@ -3319,9 +3319,11 @@ eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number) intel_gt_pm_get(ce->engine->gt); if (i915_gem_context_uses_protected_content(eb->gem_context)) { diff --git a/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch b/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch new file mode 100644 index 000000000000..415fbd930383 --- /dev/null +++ b/0001-INTEL_DII-NOT_UPSTREAM-drm-i915-Introduce-set_parall.patch @@ -0,0 +1,676 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Matthew Brost <matthew.brost@xxxxxxxxx> +Date: Wed, 7 Jul 2021 16:55:03 -0700 +Subject: [PATCH] INTEL_DII/NOT_UPSTREAM: drm/i915: Introduce set_parallel2 + extension + +Based on upstream feedback the set_parallel extension isn't suitable as +it looks a bit too much like the bonding extension. Introduce a +set_parallel2 extension which configures parallel submission in a single +extension and in a single slot. This compares to old set_parallel +extension which configured parallel submission across multiple slots. + +Also remove the ability for the user to pass in the number of BBs in +the execbuf IOCTL. The number of BBs is now implied based on the width +of the context in the slot. + +This patch is intended in enable UMDs for the upstream direction while +maintaining the old set_parallel extension to not break UMDs. Once UMDs +have been updated to use new extension the old one can be removed from +DII. + +v2: Only enable parallel submission on engines set by user + +Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> +--- + drivers/gpu/drm/i915/gem/i915_gem_context.c | 190 +++++++++++++++++- + .../gpu/drm/i915/gem/i915_gem_context_types.h | 6 - + .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 73 +++++-- + drivers/gpu/drm/i915/gt/intel_context.c | 2 + + drivers/gpu/drm/i915/gt/intel_context.h | 11 + + drivers/gpu/drm/i915/gt/intel_context_types.h | 4 + + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +- + drivers/gpu/drm/i915/i915_request.c | 7 +- + include/uapi/drm/i915_drm_prelim.h | 115 +++++++++++ + 9 files changed, 376 insertions(+), 36 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c +--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c +@@ -374,7 +374,6 @@ void i915_gem_context_release(struct kref *ref) + mutex_destroy(&ctx->engines_mutex); + mutex_destroy(&ctx->lut_mutex); + mutex_destroy(&ctx->mutex); +- mutex_destroy(&ctx->parallel_submit); + + kfree_rcu(ctx, rcu); + } +@@ -699,8 +698,6 @@ __create_context(struct drm_i915_private *i915) + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); + +- mutex_init(&ctx->parallel_submit); +- + spin_lock_init(&ctx->stale.lock); + INIT_LIST_HEAD(&ctx->stale.engines); + +@@ -1857,6 +1854,48 @@ static bool validate_parallel_engines_layout(const struct set_engines *set) + return true; + } + ++/* ++ * Engine must be same class and form a logically contiguous mask. ++ * ++ * FIXME: Logical mask check not 100% correct but good enough for the PoC ++ */ ++static bool __validate_parallel_engines_layout(struct drm_i915_private *i915, ++ struct intel_context *parent) ++{ ++ u8 engine_class = parent->engine->class; ++ u8 num_siblings = hweight_long(parent->engine->logical_mask); ++ struct intel_context *child; ++ intel_engine_mask_t logical_mask = parent->engine->logical_mask; ++ ++ for_each_child(parent, child) { ++ if (child->engine->class != engine_class) { ++ drm_dbg(&i915->drm, "Class mismatch: %u, %u", ++ engine_class, child->engine->class); ++ return false; ++ } ++ if (hweight_long(child->engine->logical_mask) != num_siblings) { ++ drm_dbg(&i915->drm, "Sibling mismatch: %u, %lu", ++ num_siblings, ++ hweight_long(child->engine->logical_mask)); ++ return false; ++ } ++ if (logical_mask & child->engine->logical_mask) { ++ drm_dbg(&i915->drm, "Overlapping logical mask: 0x%04x, 0x%04x", ++ logical_mask, child->engine->logical_mask); ++ return false; ++ } ++ logical_mask |= child->engine->logical_mask; ++ } ++ ++ if (!is_power_of_2((logical_mask >> (ffs(logical_mask) - 1)) + 1)) { ++ drm_dbg(&i915->drm, "Non-contiguous logical mask: 0x%04x", ++ logical_mask); ++ return false; ++ } ++ ++ return true; ++} ++ + static int + set_engines__parallel_submit(struct i915_user_extension __user *base, void *data) + { +@@ -2009,11 +2048,156 @@ set_engines__parallel_submit(struct i915_user_extension __user *base, void *data + return err; + } + ++static int ++set_engines__parallel2_submit(struct i915_user_extension __user *base, ++ void *data) ++{ ++ struct prelim_drm_i915_context_engines_parallel2_submit __user *ext = ++ container_of_user(base, typeof(*ext), base); ++ const struct set_engines *set = data; ++ struct drm_i915_private *i915 = set->ctx->i915; ++ struct intel_context *parent, *child, *ce; ++ u64 flags; ++ int err = 0, n, i, j; ++ u16 slot, width, num_siblings; ++ struct intel_engine_cs **siblings = NULL; ++ ++ if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) ++ return -ENODEV; ++ ++ if (get_user(slot, &ext->engine_index)) ++ return -EFAULT; ++ ++ if (get_user(width, &ext->width)) ++ return -EFAULT; ++ ++ if (get_user(num_siblings, &ext->num_siblings)) ++ return -EFAULT; ++ ++ if (slot >= set->engines->num_engines) { ++ drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", ++ slot, set->engines->num_engines); ++ return -EINVAL; ++ } ++ ++ parent = set->engines->engines[slot]; ++ if (parent) { ++ drm_dbg(&i915->drm, "Context index[%d] not NULL\n", slot); ++ return -EINVAL; ++ } ++ ++ if (get_user(flags, &ext->flags)) ++ return -EFAULT; ++ ++ if (flags) { ++ drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags); ++ return -EINVAL; ++ } ++ ++ for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { ++ err = check_user_mbz(&ext->mbz64[n]); ++ if (err) ++ return err; ++ } ++ ++ if (width < 1) { ++ drm_dbg(&i915->drm, "Width (%d) < 1 \n", width); ++ return -EINVAL; ++ } ++ ++ if (num_siblings < 1) { ++ drm_dbg(&i915->drm, "Number siblings (%d) < 1 \n", ++ num_siblings); ++ return -EINVAL; ++ } ++ ++ siblings = kmalloc_array(num_siblings, ++ sizeof(*siblings), ++ GFP_KERNEL); ++ if (!siblings) ++ return -ENOMEM; ++ ++ mutex_lock(&set->ctx->mutex); ++ ++ /* Create contexts / engines */ ++ for (i = 0; i < width; ++i) { ++ for (j = 0; j < num_siblings; ++j) { ++ struct i915_engine_class_instance ci; ++ ++ if (copy_from_user(&ci, &ext->engines[i * num_siblings + j], ++ sizeof(ci))) { ++ err = -EFAULT; ++ goto out_err; ++ } ++ ++ siblings[j] = intel_engine_lookup_user(i915, ++ ci.engine_class, ++ ci.engine_instance); ++ if (!siblings[j]) { ++ drm_dbg(&i915->drm, ++ "Invalid sibling[%d]: { class:%d, inst:%d }\n", ++ n, ci.engine_class, ci.engine_instance); ++ err = -EINVAL; ++ goto out_err; ++ } ++ } ++ ++ ce = intel_engine_create_virtual(siblings, num_siblings, ++ FORCE_VIRTUAL); ++ if (IS_ERR(ce)) { ++ err = PTR_ERR(ce); ++ goto out_err; ++ } ++ intel_context_set_gem(ce, set->ctx); ++ ++ if (i == 0) { ++ parent = ce; ++ __set_bit(CONTEXT_NO_PREEMPT_MID_BATCH, &ce->flags); ++ } else { ++ intel_context_bind_parent_child(parent, ce); ++ err = intel_context_alloc_state(ce); ++ if (err) ++ goto out_err; ++ } ++ } ++ ++ if (!__validate_parallel_engines_layout(i915, parent)) { ++ drm_dbg(&i915->drm, "Invalidate parallel context layout"); ++ err = -EINVAL; ++ goto out_err; ++ } ++ ++ intel_guc_configure_parent_context(parent); ++ if (cmpxchg(&set->engines->engines[slot], NULL, parent)) { ++ err = -EEXIST; ++ goto out_err; ++ } ++ ++ kfree(siblings); ++ mutex_unlock(&set->ctx->mutex); ++ ++ return 0; ++ ++out_err: ++ if (parent) { ++ for_each_child(parent, child) ++ intel_context_put(child); ++ intel_context_put(parent); ++ set->engines->engines[slot] = NULL; ++ } ++ kfree(siblings); ++ mutex_unlock(&set->ctx->mutex); ++ ++ return err; ++} ++ + static const i915_user_extension_fn set_engines__extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, + [PRELIM_I915_USER_EXT_MASK(PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)] = + set_engines__parallel_submit, ++ [PRELIM_I915_USER_EXT_MASK(PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT)] = ++ set_engines__parallel2_submit, + }; + + static int +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h ++++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +@@ -194,12 +194,6 @@ struct i915_gem_context { + */ + u64 fence_context; + +- /** +- * @parallel_submit: Ensure only 1 parallel submission is happening on +- * this context at a time. +- */ +- struct mutex parallel_submit; +- + /** + * @seqno: Seqno when using when a parallel context. + */ +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +@@ -1633,7 +1633,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, + goto err_unmap; + + if (engine == eb->context->engine && +- !i915_gem_context_is_parallel(eb->gem_context)) { ++ !intel_context_is_parallel(eb->context)) { + rq = i915_request_create(eb->context); + } else { + struct intel_context *ce = eb->reloc_context; +@@ -1727,7 +1727,7 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, + struct intel_engine_cs *engine = eb->engine; + + if (!reloc_can_use_engine(engine) || +- i915_gem_context_is_parallel(eb->gem_context)) { ++ intel_context_is_parallel(eb->context)) { + engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; + if (!engine) + return ERR_PTR(-ENODEV); +@@ -3223,7 +3223,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb) + } + + static int +-eb_select_engine(struct i915_execbuffer *eb) ++eb_select_engine(struct i915_execbuffer *eb, unsigned int batch_number) + { + struct intel_context *ce; + unsigned int idx; +@@ -3238,6 +3238,16 @@ eb_select_engine(struct i915_execbuffer *eb) + if (IS_ERR(ce)) + return PTR_ERR(ce); + ++ if (batch_number > 0 && ++ !i915_gem_context_is_parallel(eb->gem_context)) { ++ struct intel_context *parent = ce; ++ for_each_child(parent, ce) ++ if (!--batch_number) ++ break; ++ intel_context_put(parent); ++ intel_context_get(ce); ++ } ++ + intel_gt_pm_get(ce->engine->gt); + + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { +@@ -3562,7 +3572,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, + if (unlikely(err)) + goto err_destroy; + +- err = eb_select_engine(&eb); ++ err = eb_select_engine(&eb, batch_number); + if (unlikely(err)) + goto err_context; + +@@ -3751,6 +3761,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + const size_t count = args->buffer_count; + unsigned int num_batches, i; + int err, start_context; ++ bool is_parallel = false; ++ struct intel_context *parent = NULL; + + if (!check_buffer_count(count)) { + drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); +@@ -3782,15 +3794,35 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + I915_EXEC_NUMBER_BB_LSB) + + ((args->flags & PRELIM_I915_EXEC_NUMBER_BB_MASK) >> + PRELIM_I915_EXEC_NUMBER_BB_LSB)) + 1; +- if (i915_gem_context_is_parallel(ctx)) { +- if (num_batches > count || +- start_context + num_batches > ctx->width) { +- err = -EINVAL; +- goto err_context; ++ ++ if (i915_gem_context_user_engines(ctx)) { ++ parent = i915_gem_context_get_engine(ctx, start_context); ++ if (IS_ERR(parent)) { ++ i915_gem_context_put(ctx); ++ return PTR_ERR(parent); + } + +- if (i915_gem_context_is_bb_preempt_boundary(ctx) && +- (start_context || num_batches != ctx->width)) { ++ is_parallel = i915_gem_context_is_parallel(ctx) || ++ intel_context_is_parallel(parent); ++ if (i915_gem_context_is_parallel(ctx)) { ++ if (num_batches > count || ++ start_context + num_batches > ctx->width) { ++ err = -EINVAL; ++ goto err_context; ++ } ++ ++ if (i915_gem_context_is_bb_preempt_boundary(ctx) && ++ (start_context || num_batches != ctx->width)) { ++ err = -EINVAL; ++ goto err_context; ++ } ++ } else if (intel_context_is_parallel(parent)) { ++ if (num_batches != 1) ++ return -EINVAL; ++ num_batches = parent->guc_number_children + 1; ++ if (num_batches > count) ++ return -EINVAL; ++ } else if(num_batches > 1) { + err = -EINVAL; + goto err_context; + } +@@ -3827,8 +3859,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + * properly, also this is needed to create an excl fence for an dma buf + * objects these BBs touch. + */ +- if (args->flags & I915_EXEC_FENCE_OUT || +- i915_gem_context_is_parallel(ctx)) { ++ if (args->flags & I915_EXEC_FENCE_OUT || is_parallel) { + out_fences = kcalloc(num_batches, sizeof(*out_fences), + GFP_KERNEL); + if (!out_fences) { +@@ -3874,8 +3905,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + * in intel_context sequence, thus only 1 submission can happen at a + * time. + */ +- if (i915_gem_context_is_parallel(ctx)) +- mutex_lock(&ctx->parallel_submit); ++ if (is_parallel) ++ mutex_lock(&parent->parallel_submit); + + err = i915_gem_do_execbuffer(dev, file, args, exec2_list, + args->flags & I915_EXEC_BATCH_FIRST ? +@@ -3889,8 +3920,10 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + &ww); + + for (i = 1; err == 0 && i < num_batches; i++) { +- args->flags &= ~I915_EXEC_RING_MASK; +- args->flags |= start_context + i; ++ if (i915_gem_context_is_parallel(ctx)) { ++ args->flags &= ~I915_EXEC_RING_MASK; ++ args->flags |= start_context + i; ++ } + args->batch_len = 0; + + err = i915_gem_do_execbuffer(dev, file, args, exec2_list, +@@ -3905,8 +3938,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + &ww); + } + +- if (i915_gem_context_is_parallel(ctx)) +- mutex_unlock(&ctx->parallel_submit); ++ if (is_parallel) ++ mutex_unlock(&parent->parallel_submit); + + /* + * Now that we have begun execution of the batchbuffer, we ignore +@@ -4009,6 +4042,8 @@ end:; + dma_fence_put(in_fence); + err_context: + i915_gem_context_put(ctx); ++ if (parent) ++ intel_context_put(parent); + + return err; + } +diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c +--- a/drivers/gpu/drm/i915/gt/intel_context.c ++++ b/drivers/gpu/drm/i915/gt/intel_context.c +@@ -460,6 +460,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) + INIT_LIST_HEAD(&ce->signals); + + mutex_init(&ce->pin_mutex); ++ mutex_init(&ce->parallel_submit); + + spin_lock_init(&ce->guc_state.lock); + INIT_LIST_HEAD(&ce->guc_state.fences); +@@ -491,6 +492,7 @@ void intel_context_fini(struct intel_context *ce) + intel_context_put(child); + + mutex_destroy(&ce->pin_mutex); ++ mutex_destroy(&ce->parallel_submit); + i915_active_fini(&ce->active); + } + +diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h +--- a/drivers/gpu/drm/i915/gt/intel_context.h ++++ b/drivers/gpu/drm/i915/gt/intel_context.h +@@ -52,6 +52,11 @@ static inline bool intel_context_is_parent(struct intel_context *ce) + return !!ce->guc_number_children; + } + ++static inline bool intel_context_is_parallel(struct intel_context *ce) ++{ ++ return intel_context_is_child(ce) || intel_context_is_parent(ce); ++} ++ + /* Only should be called directly by selftests */ + void __intel_context_bind_parent_child(struct intel_context *parent, + struct intel_context *child); +@@ -204,6 +209,12 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) + return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); + } + ++static inline bool ++intel_context_is_no_preempt_mid_batch(const struct intel_context *ce) ++{ ++ return test_bit(CONTEXT_NO_PREEMPT_MID_BATCH, &ce->flags); ++} ++ + static inline bool intel_context_is_closed(const struct intel_context *ce) + { + return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); +diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h +--- a/drivers/gpu/drm/i915/gt/intel_context_types.h ++++ b/drivers/gpu/drm/i915/gt/intel_context_types.h +@@ -114,6 +114,7 @@ struct intel_context { + #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 + #define CONTEXT_NOPREEMPT 8 + #define CONTEXT_LRCA_DIRTY 9 ++#define CONTEXT_NO_PREEMPT_MID_BATCH 10 + + struct { + u64 timeout_us; +@@ -239,6 +240,9 @@ struct intel_context { + + /* Last request submitted on a parent */ + struct i915_request *last_rq; ++ ++ /* Parallel submit mutex */ ++ struct mutex parallel_submit; + }; + + #endif /* __INTEL_CONTEXT_TYPES__ */ +diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c ++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +@@ -798,8 +798,7 @@ static inline int rq_prio(const struct i915_request *rq) + + static inline bool is_multi_lrc(struct intel_context *ce) + { +- return intel_context_is_child(ce) || +- intel_context_is_parent(ce); ++ return intel_context_is_parallel(ce); + } + + static inline bool is_multi_lrc_rq(struct i915_request *rq) +@@ -3458,6 +3457,7 @@ void intel_guc_configure_parent_context(struct intel_context *ce) + bb_preempt_boundary = + i915_gem_context_is_bb_preempt_boundary(ctx); + rcu_read_unlock(); ++ bb_preempt_boundary |= intel_context_is_no_preempt_mid_batch(ce); + if (bb_preempt_boundary) { + ce->emit_bb_start = emit_bb_start_parent_bb_preempt_boundary; + ce->emit_fini_breadcrumb = +diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c +--- a/drivers/gpu/drm/i915/i915_request.c ++++ b/drivers/gpu/drm/i915/i915_request.c +@@ -1606,14 +1606,9 @@ i915_request_await_object(struct i915_request *to, + return ret; + } + +-static inline bool is_parallel(struct intel_context *ce) +-{ +- return intel_context_is_child(ce) || intel_context_is_parent(ce); +-} +- + static inline bool is_parallel_rq(struct i915_request *rq) + { +- return is_parallel(rq->context); ++ return intel_context_is_parallel(rq->context); + } + + static inline struct intel_context *request_to_parent(struct i915_request *rq) +diff --git a/include/uapi/drm/i915_drm_prelim.h b/include/uapi/drm/i915_drm_prelim.h +--- a/include/uapi/drm/i915_drm_prelim.h ++++ b/include/uapi/drm/i915_drm_prelim.h +@@ -370,9 +370,124 @@ struct prelim_i915_context_engines_parallel_submit { + } __attribute__ ((packed)); + #define i915_context_engines_parallel_submit prelim_i915_context_engines_parallel_submit + ++/** ++ * struct prelim_drm_i915_context_engines_parallel2_submit - Configure engine ++ * for parallel submission. ++ * ++ * Setup a slot in the context engine map to allow multiple BBs to be submitted ++ * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU ++ * in parallel. Multiple hardware contexts are created internally in the i915 ++ * run these BBs. Once a slot is configured for N BBs only N BBs can be ++ * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user ++ * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how ++ * many BBs there are based on the slot's configuration. The N BBs are the last ++ * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. ++ * ++ * The default placement behavior is to create implicit bonds between each ++ * context if each context maps to more than 1 physical engine (e.g. context is ++ * a virtual engine). Also we only allow contexts of same engine class and these ++ * contexts must be in logically contiguous order. Examples of the placement ++ * behavior described below. Lastly, the default is to not allow BBs to ++ * preempted mid BB rather insert coordinated preemption on all hardware ++ * contexts between each set of BBs. Flags may be added in the future to change ++ * both of these default behaviors. ++ * ++ * Returns -EINVAL if hardware context placement configuration is invalid or if ++ * the placement configuration isn't supported on the platform / submission ++ * interface. ++ * Returns -ENODEV if extension isn't supported on the platform / submission ++ * inteface. ++ * ++ * .. code-block:: ++ * ++ * Example 1 pseudo code: ++ * CS[X] = generic engine of same class, logical instance X ++ * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE ++ * set_engines(INVALID) ++ * set_parallel(engine_index=0, width=2, num_siblings=1, ++ * engines=CS[0],CS[1]) ++ * ++ * Results in the following valid placement: ++ * CS[0], CS[1] ++ * ++ * Example 2 pseudo code: ++ * CS[X] = generic engine of same class, logical instance X ++ * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE ++ * set_engines(INVALID) ++ * set_parallel(engine_index=0, width=2, num_siblings=2, ++ * engines=CS[0],CS[2],CS[1],CS[3]) ++ * ++ * Results in the following valid placements: ++ * CS[0], CS[1] ++ * CS[2], CS[3] ++ * ++ * This can also be thought of as 2 virtual engines described by 2-D array ++ * in the engines the field with bonds placed between each index of the ++ * virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to ++ * CS[3]. ++ * VE[0] = CS[0], CS[2] ++ * VE[1] = CS[1], CS[3] ++ * ++ * Example 3 pseudo code: ++ * CS[X] = generic engine of same class, logical instance X ++ * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE ++ * set_engines(INVALID) ++ * set_parallel(engine_index=0, width=2, num_siblings=2, ++ * engines=CS[0],CS[1],CS[1],CS[3]) ++ * ++ * Results in the following valid and invalid placements: ++ * CS[0], CS[1] ++ * CS[1], CS[3] - Not logical contiguous, return -EINVAL ++ */ ++struct prelim_drm_i915_context_engines_parallel2_submit { ++ /** ++ * @base: base user extension. ++ */ ++ struct i915_user_extension base; ++ ++ /** ++ * @engine_index: slot for parallel engine ++ */ ++ __u16 engine_index; ++ ++ /** ++ * @width: number of contexts per parallel engine ++ */ ++ __u16 width; ++ ++ /** ++ * @num_siblings: number of siblings per context ++ */ ++ __u16 num_siblings; ++ ++ /** ++ * @mbz16: reserved for future use; must be zero ++ */ ++ __u16 mbz16; ++ ++ /** ++ * @flags: all undefined flags must be zero, currently not defined flags ++ */ ++ __u64 flags; ++ ++ /** ++ * @mbz64: reserved for future use; must be zero ++ */ ++ __u64 mbz64[3]; ++ ++ /** ++ * @engines: 2-d array of engine instances to configure parallel engine ++ * ++ * length = width (i) * num_siblings (j) ++ * index = j + i * num_siblings ++ */ ++ struct i915_engine_class_instance engines[0]; ++} __attribute__ ((packed)); ++ + struct prelim_i915_context_param_engines { + #define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT (PRELIM_I915_USER_EXT | 2) /* see prelim_i915_context_engines_parallel_submit */ + #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ ++#define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */ + }; + + enum prelim_drm_i915_gem_memory_class { -- git-pile 0.97