On 13.01.2022 17:27, Matthew Brost wrote: > Move the multi-lrc guc_id from the lower allocation partition (0 to > number of multi-lrc guc_ids) to upper allocation partition (number of > single-lrc to max guc_ids). > > This will help when a native driver transitions to a PF after driver > load time. If the perma-pin guc_ids (kernel contexts) are in a low range > it is easy reduce total number of guc_ids as the allocated slrc are in a > valid range the mlrc range moves to an unused range. Assuming no mlrc > are allocated and few slrc are used the native to PF transition is > seamless for the guc_id resource. > > v2: > (Michal / Tvrtko) > - Add an explaination to commit message of why this patch is needed > (Michal / Piotr) > - Replace marcos with functions > (Michal) > - Rework logic flow in new_mlrc_guc_id > - Unconditionally call bitmap_free > v3: > (Michal) > - Move allocation of mlrc bitmap back submission init > (CI) > - Resend for CI > > Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > --- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 77 ++++++++++++++----- > 1 file changed, 56 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 23a40f10d376d..fce58365b3ff8 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines, > > #define GUC_REQUEST_SIZE 64 /* bytes */ > > -/* > - * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous > - * per the GuC submission interface. A different allocation algorithm is used > - * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to > - * partition the guc_id space. We believe the number of multi-lrc contexts in > - * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for > - * multi-lrc. > - */ > -#define NUMBER_MULTI_LRC_GUC_ID(guc) \ > - ((guc)->submission_state.num_guc_ids / 16) > - > /* > * Below is a set of functions which control the GuC scheduling state which > * require a lock. > @@ -1746,6 +1735,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) > } > > static void destroyed_worker_func(struct work_struct *w); > +static int number_mlrc_guc_id(struct intel_guc *guc); > > /* > * Set up the memory resources to be shared with the GuC (via the GGTT) > @@ -1778,7 +1768,7 @@ int intel_guc_submission_init(struct intel_guc *guc) > destroyed_worker_func); > > guc->submission_state.guc_ids_bitmap = > - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > + bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL); to fully benefit from the id partition flip we likely will have to allocate bitmap 'just-in-time' when first mlrc id is needed so something like you had in early rev but abandon to avoid alloc inside spinlock - but I'm wondering why we can't alloc bitmap for mlrc case, while we allow allocation for slrc (as ida_simple_get may alloc, no? > if (!guc->submission_state.guc_ids_bitmap) > return -ENOMEM; > > @@ -1864,6 +1854,57 @@ static void guc_submit_request(struct i915_request *rq) > spin_unlock_irqrestore(&sched_engine->lock, flags); > } > > +/* > + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous > + * per the GuC submission interface. A different allocation algorithm is used > + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to > + * partition the guc_id space. We believe the number of multi-lrc contexts in > + * use should be low and 1/16 should be sufficient. do we have any other numbers as guideline ? while it is easy assumption that 1/16 from 64K contexts may be sufficient, what about 1/16 of 1K contexts ? will that work too ? also, do we have to make hard split ? what if there will be no users for mlrc but more slrc contexts would be beneficial ? or the opposite ? > + */ > +#define MLRC_GUC_ID_RATIO 16 > + > +static int number_mlrc_guc_id(struct intel_guc *guc) > +{ > + return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO; > +} > + > +static int number_slrc_guc_id(struct intel_guc *guc) > +{ > + return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc); > +} > + > +static int mlrc_guc_id_base(struct intel_guc *guc) > +{ > + return number_slrc_guc_id(guc); > +} > + > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > +{ > + int ret; > + > + GEM_BUG_ON(!intel_context_is_parent(ce)); > + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); > + > + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > + number_mlrc_guc_id(guc), > + order_base_2(ce->parallel.number_children > + + 1)); > + if (unlikely(ret < 0)) > + return ret; > + > + return ret + mlrc_guc_id_base(guc); > +} > + > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > +{ > + GEM_BUG_ON(intel_context_is_parent(ce)); > + > + return ida_simple_get(&guc->submission_state.guc_ids, > + 0, number_slrc_guc_id(guc), > + GFP_KERNEL | __GFP_RETRY_MAYFAIL | > + __GFP_NOWARN); are you sure that these gfp flags are safe for use under spinlock ? -Michal > +} > + > static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > { > int ret; > @@ -1871,16 +1912,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > GEM_BUG_ON(intel_context_is_child(ce)); > > if (intel_context_is_parent(ce)) > - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > - NUMBER_MULTI_LRC_GUC_ID(guc), > - order_base_2(ce->parallel.number_children > - + 1)); > + ret = new_mlrc_guc_id(guc, ce); > else > - ret = ida_simple_get(&guc->submission_state.guc_ids, > - NUMBER_MULTI_LRC_GUC_ID(guc), > - guc->submission_state.num_guc_ids, > - GFP_KERNEL | __GFP_RETRY_MAYFAIL | > - __GFP_NOWARN); > + ret = new_slrc_guc_id(guc, ce); > + > if (unlikely(ret < 0)) > return ret; >