Zeng, Oak <oak.zeng@xxxxxxxxx> wrote on pią [2023-wrz-15 17:56:56 +0200]: > > > Thanks, > Oak > > > -----Original Message----- > > From: Das, Nirmoy <nirmoy.das@xxxxxxxxx> > > Sent: Friday, September 15, 2023 4:34 AM > > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > Cc: Zeng, Oak <oak.zeng@xxxxxxxxx>; chris.p.wilson@xxxxxxxxxxxxxxx; Piorkowski, > > Piotr <piotr.piorkowski@xxxxxxxxx>; Shyti, Andi <andi.shyti@xxxxxxxxx>; Mun, > > Gwan-gyeong <gwan-gyeong.mun@xxxxxxxxx>; Roper, Matthew D > > <matthew.d.roper@xxxxxxxxx>; Das, Nirmoy <nirmoy.das@xxxxxxxxx> > > Subject: [PATCH 5/7] drm/i915: Implement GGTT update method with > > MI_UPDATE_GTT > > > > Implement GGTT update method with blitter command, MI_UPDATE_GTT > > and install those handlers if a platform requires that. > > > > v2: Make sure we hold the GT wakeref and Blitter engine wakeref before > > we call mutex_lock/intel_context_enter below. When GT/engine are not > > awake, the intel_context_enter calls into some runtime pm function which > > can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a > > mutex lock is not allowed because shrinker can also try to hold the same > > mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref > > before calling mutex_lock, to fix the circular lock. > > > > Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxxxx> > > Signed-off-by: Oak Zeng <oak.zeng@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/gt/intel_ggtt.c | 235 +++++++++++++++++++++++++++ > > 1 file changed, 235 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c > > b/drivers/gpu/drm/i915/gt/intel_ggtt.c > > index dd0ed941441a..b94de7cebfce 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c > > @@ -15,18 +15,23 @@ > > #include "display/intel_display.h" > > #include "gem/i915_gem_lmem.h" > > > > +#include "intel_context.h" > > #include "intel_ggtt_gmch.h" > > +#include "intel_gpu_commands.h" > > #include "intel_gt.h" > > #include "intel_gt_regs.h" > > #include "intel_pci_config.h" > > +#include "intel_ring.h" > > #include "i915_drv.h" > > #include "i915_pci.h" > > +#include "i915_request.h" > > #include "i915_scatterlist.h" > > #include "i915_utils.h" > > #include "i915_vgpu.h" > > > > #include "intel_gtt.h" > > #include "gen8_ppgtt.h" > > +#include "intel_engine_pm.h" > > > > static void i915_ggtt_color_adjust(const struct drm_mm_node *node, > > unsigned long color, > > @@ -252,6 +257,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, > > return pte; > > } > > > > +static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) > > +{ > > + struct intel_gt *gt = ggtt->vm.gt; > > + > > + return intel_gt_is_bind_context_ready(gt); > > +} > > + > > +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) > > +{ > > + struct intel_context *ce; > > + struct intel_gt *gt = ggtt->vm.gt; > > + > > + if (intel_gt_is_wedged(gt)) > > + return NULL; > > + > > + ce = gt->engine[BCS0]->bind_context; > > + GEM_BUG_ON(!ce); > > + > > + /* > > + * If the GT is not awake already at this stage then fallback > > + * to pci based GGTT update otherwise __intel_wakeref_get_first() > > + * would conflict with fs_reclaim trying to allocate memory while > > + * doing rpm_resume(). > > + */ > > + if (!intel_gt_pm_get_if_awake(gt)) > > + return NULL; > > + > > + intel_engine_pm_get(ce->engine); > > + > > + return ce; > > +} > > + > > +static void gen8_ggtt_bind_put_ce(struct intel_context *ce) > > +{ > > + intel_engine_pm_put(ce->engine); > > + intel_gt_pm_put(ce->engine->gt); > > +} > > + > > +static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, > > + struct sg_table *pages, u32 num_entries, > > + const gen8_pte_t pte) > > +{ > > + struct i915_sched_attr attr = {}; > > + struct intel_gt *gt = ggtt->vm.gt; > > + const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode; > > + struct sgt_iter iter; > > + struct i915_request *rq; > > + struct intel_context *ce; > > + u32 *cs; > > + > > + if (!num_entries) > > + return true; > > + > > + ce = gen8_ggtt_bind_get_ce(ggtt); > > + if (!ce) > > + return false; > > + > > + if (pages) > > + iter = __sgt_iter(pages->sgl, true); > > + > > + while (num_entries) { > > + int count = 0; > > + dma_addr_t addr; > > + /* > > + * MI_UPDATE_GTT can update 512 entries in a single command > > but > > + * that end up with engine reset, 511 works. > > + */ > > + u32 n_ptes = min_t(u32, 511, num_entries); > > + > > + if (mutex_lock_interruptible(&ce->timeline->mutex)) > > + goto put_ce; > > + > > + intel_context_enter(ce); > > + rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC); > > + intel_context_exit(ce); > > + if (IS_ERR(rq)) { > > + GT_TRACE(gt, "Failed to get bind request\n"); > > + mutex_unlock(&ce->timeline->mutex); > > + goto put_ce; > > + } > > + > > + cs = intel_ring_begin(rq, 2 * n_ptes + 2); > > + if (IS_ERR(cs)) { > > + GT_TRACE(gt, "Failed to ring space for GGTT bind\n"); > > + i915_request_set_error_once(rq, PTR_ERR(cs)); > > + /* once a request is created, it must be queued */ > > + goto queue_err_rq; > > + } > > + > > + *cs++ = MI_UPDATE_GTT | (2 * n_ptes); > > + *cs++ = offset << 12; > > + > > + if (pages) { > > + for_each_sgt_daddr_next(addr, iter) { > > + if (count == n_ptes) > > + break; > > + *cs++ = lower_32_bits(pte | addr); > > + *cs++ = upper_32_bits(pte | addr); > > + count++; > > + } > > + /* fill remaining with scratch pte, if any */ > > + if (count < n_ptes) { > > + memset64((u64 *)cs, scratch_pte, > > + n_ptes - count); > > + cs += (n_ptes - count) * 2; > > + } > > Should we return error instead of silently fill pte with scratch? Or maybe even gem_bug_on on this case? The caller didn't allocate enough pages, means something wrong happened... We should avoid GEM_BUG_ON, however, maybe it's a good idea for this function to return an int instead of a bool, because there are a lot of places here that return an err. In that case, ret > 0 could mean the number of PTEs we updated correctly and ret < 0 would be an error. Piotr > > Oak > > > + } else { > > + memset64((u64 *)cs, pte, n_ptes); > > + cs += n_ptes * 2; > > + } > > + > > + intel_ring_advance(rq, cs); > > +queue_err_rq: > > + i915_request_get(rq); > > + __i915_request_commit(rq); > > + __i915_request_queue(rq, &attr); > > + > > + mutex_unlock(&ce->timeline->mutex); > > + /* This will break if the request is complete or after engine reset > > */ > > + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); > > + if (rq->fence.error) > > + goto err_rq; > > + > > + i915_request_put(rq); > > + > > + num_entries -= n_ptes; > > + offset += n_ptes; > > + } > > + > > + gen8_ggtt_bind_put_ce(ce); > > + return true; > > + > > +err_rq: > > + i915_request_put(rq); > > +put_ce: > > + gen8_ggtt_bind_put_ce(ce); > > + return false; > > +} > > + > > static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) > > { > > writeq(pte, addr); > > @@ -272,6 +416,21 @@ static void gen8_ggtt_insert_page(struct > > i915_address_space *vm, > > ggtt->invalidate(ggtt); > > } > > > > +static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, > > + dma_addr_t addr, u64 offset, > > + unsigned int pat_index, u32 flags) > > +{ > > + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > > + gen8_pte_t pte; > > + > > + pte = ggtt->vm.pte_encode(addr, pat_index, flags); > > + if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && > > + gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte)) > > + return ggtt->invalidate(ggtt); > > + > > + gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags); > > +} > > + > > static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > > struct i915_vma_resource *vma_res, > > unsigned int pat_index, > > @@ -311,6 +470,50 @@ static void gen8_ggtt_insert_entries(struct > > i915_address_space *vm, > > ggtt->invalidate(ggtt); > > } > > > > +static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, > > + struct i915_vma_resource *vma_res, > > + unsigned int pat_index, u32 flags) > > +{ > > + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > > + gen8_pte_t scratch_pte = vm->scratch[0]->encode; > > + gen8_pte_t pte_encode; > > + u64 start, end; > > + > > + pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); > > + start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; > > + end = start + vma_res->guard / I915_GTT_PAGE_SIZE; > > + if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) > > + goto err; > > + > > + start = end; > > + end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; > > + if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages, > > + vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode)) > > + goto err; > > + > > + start += vma_res->node_size / I915_GTT_PAGE_SIZE; > > + if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) > > + goto err; > > + > > + return true; > > + > > +err: > > + return false; > > +} > > + > > +static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, > > + struct i915_vma_resource *vma_res, > > + unsigned int pat_index, u32 flags) > > +{ > > + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > > + > > + if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && > > + __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags)) > > + return ggtt->invalidate(ggtt); > > + > > + gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags); > > +} > > + > > static void gen8_ggtt_clear_range(struct i915_address_space *vm, > > u64 start, u64 length) > > { > > @@ -332,6 +535,27 @@ static void gen8_ggtt_clear_range(struct > > i915_address_space *vm, > > gen8_set_pte(>t_base[i], scratch_pte); > > } > > > > +static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm, > > + u64 start, u64 length) > > +{ > > + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > > + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; > > + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; > > + const gen8_pte_t scratch_pte = vm->scratch[0]->encode; > > + const int max_entries = ggtt_total_entries(ggtt) - first_entry; > > + > > + if (WARN(num_entries > max_entries, > > + "First entry = %d; Num entries = %d (max=%d)\n", > > + first_entry, num_entries, max_entries)) > > + num_entries = max_entries; > > + > > + if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, > > first_entry, > > + NULL, num_entries, scratch_pte)) > > + return ggtt->invalidate(ggtt); > > + > > + gen8_ggtt_clear_range(vm, start, length); > > +} > > + > > static void gen6_ggtt_insert_page(struct i915_address_space *vm, > > dma_addr_t addr, > > u64 offset, > > @@ -997,6 +1221,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) > > I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; > > } > > > > + if (i915_ggtt_require_binder(i915)) { > > + ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind; > > + ggtt->vm.insert_page = gen8_ggtt_insert_page_bind; > > + ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind; > > + /* > > + * On GPU is hung, we might bind VMAs for error capture. > > + * Fallback to CPU GGTT updates in that case. > > + */ > > + ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; > > + } > > + > > if (intel_uc_wants_guc(&ggtt->vm.gt->uc)) > > ggtt->invalidate = guc_ggtt_invalidate; > > else > > -- > > 2.41.0 > --