Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Currently, we perform a locked update of the shadow entry when > allocating a page directory entry such that if two clients are > concurrently allocating neighbouring ranges we only insert one new entry > for the pair of them. However, we also need to serialise both clients > wrt to the actual entry in the HW table, or else we may allow one client > or even a third client to proceed ahead of the HW write. My handwave > before was that under the _pathological_ condition we would see the > scratch entry instead of the expected entry, causing a temporary > glitch. That starvation condition will eventually show up in practice, so > fix it. > > The reason for the previous cheat was to avoid having to free the extra > allocation while under the spinlock. Now, we keep the extra entry > allocated until the end instead. > > v2: Fix error paths for gen6 > > Fixes: 1d1b5490b91c ("drm/i915/gtt: Replace struct_mutex serialisation for allocation") > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Matthew Auld <matthew.auld@xxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 130 +++++++++++++++------------- > 1 file changed, 72 insertions(+), 58 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 0392a4c4bb9b..0987748d327b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -1387,82 +1387,88 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, > struct i915_page_directory *pd, > u64 start, u64 length) > { > + struct i915_page_table *alloc = NULL; > struct i915_page_table *pt; > u64 from = start; > unsigned int pde; > + int ret = 0; > > spin_lock(&pd->lock); > gen8_for_each_pde(pt, pd, start, length, pde) { > const int count = gen8_pte_count(start, length); > > if (pt == vm->scratch_pt) { > - struct i915_page_table *old; > - > spin_unlock(&pd->lock); > > - pt = alloc_pt(vm); > - if (IS_ERR(pt)) > + pt = fetch_and_zero(&alloc); > + if (!pt) > + pt = alloc_pt(vm); > + if (IS_ERR(pt)) { > + ret = PTR_ERR(pt); > goto unwind; > + } > > if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) > gen8_initialize_pt(vm, pt); > > - old = cmpxchg(&pd->page_table[pde], vm->scratch_pt, pt); > - if (old == vm->scratch_pt) { > + spin_lock(&pd->lock); > + if (pd->page_table[pde] == vm->scratch_pt) { > gen8_ppgtt_set_pde(vm, pd, pt, pde); > + pd->page_table[pde] = pt; > atomic_inc(&pd->used_pdes); > } else { > - free_pt(vm, pt); > - pt = old; > + alloc = pt; > + pt = pd->page_table[pde]; > } > - > - spin_lock(&pd->lock); > } > > atomic_add(count, &pt->used_ptes); > } > spin_unlock(&pd->lock); > - > - return 0; > + goto out; > > unwind: > gen8_ppgtt_clear_pd(vm, pd, from, start - from); > - return -ENOMEM; > +out: > + if (alloc) > + free_pt(vm, alloc); > + return ret; > } > > static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > struct i915_page_directory_pointer *pdp, > u64 start, u64 length) > { > + struct i915_page_directory *alloc = NULL; > struct i915_page_directory *pd; > u64 from = start; > unsigned int pdpe; > - int ret; > + int ret = 0; > > spin_lock(&pdp->lock); > gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { > if (pd == vm->scratch_pd) { > - struct i915_page_directory *old; > - > spin_unlock(&pdp->lock); > > - pd = alloc_pd(vm); > - if (IS_ERR(pd)) > + pd = fetch_and_zero(&alloc); > + if (!pd) > + pd = alloc_pd(vm); > + if (IS_ERR(pd)) { > + ret = PTR_ERR(pd); > goto unwind; > + } > > gen8_initialize_pd(vm, pd); > > - old = cmpxchg(&pdp->page_directory[pdpe], > - vm->scratch_pd, pd); > - if (old == vm->scratch_pd) { > + spin_lock(&pdp->lock); > + if (pdp->page_directory[pdpe] == vm->scratch_pd) { > gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); > + pdp->page_directory[pdpe] = pd; > atomic_inc(&pdp->used_pdpes); > } else { > - free_pd(vm, pd); > - pd = old; > + alloc = pd; > + pd = pdp->page_directory[pdpe]; > } > - > - spin_lock(&pdp->lock); > } > atomic_inc(&pd->used_pdes); > spin_unlock(&pdp->lock); > @@ -1475,8 +1481,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > atomic_dec(&pd->used_pdes); > } > spin_unlock(&pdp->lock); > - > - return 0; > + goto out; > > unwind_pd: > spin_lock(&pdp->lock); > @@ -1489,7 +1494,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > spin_unlock(&pdp->lock); > unwind: > gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); > - return -ENOMEM; > +out: > + if (alloc) > + free_pd(vm, alloc); > + return ret; > } > > static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, > @@ -1504,33 +1512,35 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > { > struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > struct i915_pml4 *pml4 = &ppgtt->pml4; > + struct i915_page_directory_pointer *alloc = NULL; > struct i915_page_directory_pointer *pdp; > u64 from = start; > + int ret = 0; > u32 pml4e; > - int ret; > > spin_lock(&pml4->lock); > gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { > if (pdp == vm->scratch_pdp) { > - struct i915_page_directory_pointer *old; > - > spin_unlock(&pml4->lock); > > - pdp = alloc_pdp(vm); > - if (IS_ERR(pdp)) > + pdp = fetch_and_zero(&alloc); > + if (!pdp) > + pdp = alloc_pdp(vm); > + if (IS_ERR(pdp)) { > + ret = PTR_ERR(pdp); > goto unwind; > + } > > gen8_initialize_pdp(vm, pdp); > > - old = cmpxchg(&pml4->pdps[pml4e], vm->scratch_pdp, pdp); > - if (old == vm->scratch_pdp) { > + spin_lock(&pml4->lock); > + if (pml4->pdps[pml4e] == vm->scratch_pdp) { > gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); > + pml4->pdps[pml4e] = pdp; > } else { > - free_pdp(vm, pdp); > - pdp = old; > + alloc = pdp; > + pdp = pml4->pdps[pml4e]; > } > - > - spin_lock(&pml4->lock); > } > atomic_inc(&pdp->used_pdpes); > spin_unlock(&pml4->lock); > @@ -1543,8 +1553,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > atomic_dec(&pdp->used_pdpes); > } > spin_unlock(&pml4->lock); > - > - return 0; > + goto out; > > unwind_pdp: > spin_lock(&pml4->lock); > @@ -1555,7 +1564,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > spin_unlock(&pml4->lock); > unwind: > gen8_ppgtt_clear_4lvl(vm, from, start - from); > - return -ENOMEM; > +out: > + if (alloc) > + free_pdp(vm, alloc); > + return ret; > } > > static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) > @@ -1820,11 +1832,13 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > u64 start, u64 length) > { > struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); > + struct i915_page_table *alloc = NULL; > struct i915_page_table *pt; > intel_wakeref_t wakeref; > u64 from = start; > unsigned int pde; > bool flush = false; > + int ret = 0; > > wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); > > @@ -1833,19 +1847,20 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > const unsigned int count = gen6_pte_count(start, length); > > if (pt == vm->scratch_pt) { > - struct i915_page_table *old; > - > spin_unlock(&ppgtt->base.pd.lock); > > - pt = alloc_pt(vm); > - if (IS_ERR(pt)) > + pt = fetch_and_zero(&alloc); > + if (!pt) > + pt = alloc_pt(vm); > + if (IS_ERR(pt)) { > + ret = PTR_ERR(pt); > goto unwind_out; > + } > > gen6_initialize_pt(vm, pt); > > - old = cmpxchg(&ppgtt->base.pd.page_table[pde], > - vm->scratch_pt, pt); > - if (old == vm->scratch_pt) { > + spin_lock(&ppgtt->base.pd.lock); > + if (ppgtt->base.pd.page_table[pde] == vm->scratch_pt) { > ppgtt->base.pd.page_table[pde] = pt; > if (i915_vma_is_bound(ppgtt->vma, > I915_VMA_GLOBAL_BIND)) { > @@ -1853,11 +1868,9 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > flush = true; > } > } else { > - free_pt(vm, pt); > - pt = old; > + alloc = pt; > + pt = ppgtt->base.pd.page_table[pde]; > } > - > - spin_lock(&ppgtt->base.pd.lock); > } > > atomic_add(count, &pt->used_ptes); > @@ -1869,14 +1882,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > gen6_ggtt_invalidate(vm->i915); > } > > - intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); > - > - return 0; > + goto out; > > unwind_out: > - intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); > gen6_ppgtt_clear_range(vm, from, start - from); > - return -ENOMEM; > +out: > + if (alloc) > + free_pt(vm, alloc); > + intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); > + return ret; > } > > static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx