Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Previously, we were able to rely on the recursive properties of > struct_mutex to allow us to serialise revoking mmaps and reacquiring the > FENCE registers with them being clobbered over a global device reset. > I then proceeded to throw out the baby with the bath water in order to > pursue a struct_mutex-less reset. > > Perusing LWN for alternative strategies, the dilemma on how to serialise > access to a global resource on one side was answered by > https://lwn.net/Articles/202847/ -- Sleepable RCU: > > 1 int readside(void) { > 2 int idx; > 3 rcu_read_lock(); > 4 if (nomoresrcu) { > 5 rcu_read_unlock(); > 6 return -EINVAL; > 7 } > 8 idx = srcu_read_lock(&ss); > 9 rcu_read_unlock(); > 10 /* SRCU read-side critical section. */ > 11 srcu_read_unlock(&ss, idx); > 12 return 0; > 13 } > 14 > 15 void cleanup(void) > 16 { > 17 nomoresrcu = 1; > 18 synchronize_rcu(); > 19 synchronize_srcu(&ss); > 20 cleanup_srcu_struct(&ss); > 21 } > > No more worrying about stop_machine, just an uber-complex mutex, > optimised for reads, with the overhead pushed to the rare reset path. > > However, we do run the risk of a deadlock as we allocate underneath the > SRCU read lock, and the allocation may require a GPU reset, causing a > dependency cycle via the in-flight requests. We resolve that by declaring > the driver wedged and cancelling all in-flight rendering. > > v2: Use expedited rcu barriers to match our earlier timing > characteristics. > v3: Try to annotate locking contexts for sparse > v4: Reduce selftest lock duration to avoid a reset deadlock with fences > > Testcase: igt/gem_mmap_gtt/hang > Fixes: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex") > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 12 +- > drivers/gpu/drm/i915/i915_drv.h | 18 +-- > drivers/gpu/drm/i915/i915_gem.c | 56 +++------ > drivers/gpu/drm/i915/i915_gem_fence_reg.c | 31 +---- > drivers/gpu/drm/i915/i915_gpu_error.h | 12 +- > drivers/gpu/drm/i915/i915_reset.c | 107 +++++++++++------- > drivers/gpu/drm/i915/i915_reset.h | 4 + > .../gpu/drm/i915/selftests/intel_hangcheck.c | 5 +- > .../gpu/drm/i915/selftests/mock_gem_device.c | 1 + > 9 files changed, 109 insertions(+), 137 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index fa2c226fc779..2cea263b4d79 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1281,14 +1281,11 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) > intel_wakeref_t wakeref; > enum intel_engine_id id; > > + seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags); > if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) > - seq_puts(m, "Wedged\n"); > + seq_puts(m, "\tWedged\n"); > if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) > - seq_puts(m, "Reset in progress: struct_mutex backoff\n"); > - if (waitqueue_active(&dev_priv->gpu_error.wait_queue)) > - seq_puts(m, "Waiter holding struct mutex\n"); > - if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) > - seq_puts(m, "struct_mutex blocked for reset\n"); > + seq_puts(m, "\tDevice (global) reset in progress\n"); > > if (!i915_modparams.enable_hangcheck) { > seq_puts(m, "Hangcheck disabled\n"); > @@ -3885,9 +3882,6 @@ i915_wedged_set(void *data, u64 val) > * while it is writing to 'i915_wedged' > */ > > - if (i915_reset_backoff(&i915->gpu_error)) > - return -EAGAIN; > - On checking the set side, I noticed there is a stale comment about full reset needing the mutex in the i915_handle_error() > i915_handle_error(i915, val, I915_ERROR_CAPTURE, > "Manually set wedged engine mask = %llx", val); > return 0; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 534e52e3a8da..3e4538ce5276 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2989,7 +2989,12 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) > i915_gem_object_unpin_pages(obj); > } > > -int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > +static inline int __must_check > +i915_mutex_lock_interruptible(struct drm_device *dev) > +{ > + return mutex_lock_interruptible(&dev->struct_mutex); > +} > + > int i915_gem_dumb_create(struct drm_file *file_priv, > struct drm_device *dev, > struct drm_mode_create_dumb *args); > @@ -3006,21 +3011,11 @@ int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); > struct i915_request * > i915_gem_find_active_request(struct intel_engine_cs *engine); > > -static inline bool i915_reset_backoff(struct i915_gpu_error *error) > -{ > - return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); > -} > - > static inline bool i915_terminally_wedged(struct i915_gpu_error *error) > { > return unlikely(test_bit(I915_WEDGED, &error->flags)); > } > > -static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error) > -{ > - return i915_reset_backoff(error) | i915_terminally_wedged(error); > -} > - > static inline u32 i915_reset_count(struct i915_gpu_error *error) > { > return READ_ONCE(error->reset_count); > @@ -3093,7 +3088,6 @@ struct drm_i915_fence_reg * > i915_reserve_fence(struct drm_i915_private *dev_priv); > void i915_unreserve_fence(struct drm_i915_fence_reg *fence); > > -void i915_gem_revoke_fences(struct drm_i915_private *dev_priv); > void i915_gem_restore_fences(struct drm_i915_private *dev_priv); > > void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index bc7d1338b69a..2c6161c89cc7 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -100,47 +100,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, > spin_unlock(&dev_priv->mm.object_stat_lock); > } > > -static int > -i915_gem_wait_for_error(struct i915_gpu_error *error) > -{ > - int ret; > - > - might_sleep(); > - > - /* > - * Only wait 10 seconds for the gpu reset to complete to avoid hanging > - * userspace. If it takes that long something really bad is going on and > - * we should simply try to bail out and fail as gracefully as possible. > - */ > - ret = wait_event_interruptible_timeout(error->reset_queue, > - !i915_reset_backoff(error), > - I915_RESET_TIMEOUT); > - if (ret == 0) { > - DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); > - return -EIO; > - } else if (ret < 0) { > - return ret; > - } else { > - return 0; > - } > -} > - > -int i915_mutex_lock_interruptible(struct drm_device *dev) > -{ > - struct drm_i915_private *dev_priv = to_i915(dev); > - int ret; > - > - ret = i915_gem_wait_for_error(&dev_priv->gpu_error); > - if (ret) > - return ret; > - > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > - > - return 0; > -} > - > static u32 __i915_gem_park(struct drm_i915_private *i915) > { > intel_wakeref_t wakeref; > @@ -1869,6 +1828,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) > intel_wakeref_t wakeref; > struct i915_vma *vma; > pgoff_t page_offset; > + int srcu; > int ret; > > /* Sanity check that we allow writing into this object */ > @@ -1908,7 +1868,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) > goto err_unlock; > } > > - > /* Now pin it into the GTT as needed */ > vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, > PIN_MAPPABLE | > @@ -1946,9 +1905,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) > if (ret) > goto err_unpin; > > + srcu = i915_reset_trylock(dev_priv); > + if (srcu < 0) { > + ret = srcu; > + goto err_unpin; > + } > + > ret = i915_vma_pin_fence(vma); > if (ret) > - goto err_unpin; > + goto err_reset; > > /* Finally, remap it using the new GTT offset */ > ret = remap_io_mapping(area, > @@ -1969,6 +1934,8 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) > > err_fence: > i915_vma_unpin_fence(vma); > +err_reset: > + i915_reset_unlock(dev_priv, srcu); > err_unpin: > __i915_vma_unpin(vma); > err_unlock: > @@ -5326,6 +5293,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) > init_waitqueue_head(&dev_priv->gpu_error.wait_queue); > init_waitqueue_head(&dev_priv->gpu_error.reset_queue); > mutex_init(&dev_priv->gpu_error.wedge_mutex); > + init_srcu_struct(&dev_priv->gpu_error.srcu); > > atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); > > @@ -5358,6 +5326,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) > GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); > WARN_ON(dev_priv->mm.object_count); > > + cleanup_srcu_struct(&dev_priv->gpu_error.srcu); > + > kmem_cache_destroy(dev_priv->priorities); > kmem_cache_destroy(dev_priv->dependencies); > kmem_cache_destroy(dev_priv->requests); > diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c > index 46e259661294..bd0d5b8d6c96 100644 > --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c > +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c > @@ -240,6 +240,10 @@ static int fence_update(struct drm_i915_fence_reg *fence, > i915_vma_flush_writes(old); > } > > + ret = i915_reset_trylock(fence->i915); > + if (ret < 0) > + return ret; > + > if (fence->vma && fence->vma != vma) { > /* Ensure that all userspace CPU access is completed before > * stealing the fence. > @@ -272,6 +276,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, > list_move_tail(&fence->link, &fence->i915->mm.fence_list); > } > > + i915_reset_unlock(fence->i915, ret); > return 0; > } > > @@ -435,32 +440,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence) > list_add(&fence->link, &fence->i915->mm.fence_list); > } > > -/** > - * i915_gem_revoke_fences - revoke fence state > - * @dev_priv: i915 device private > - * > - * Removes all GTT mmappings via the fence registers. This forces any user > - * of the fence to reacquire that fence before continuing with their access. > - * One use is during GPU reset where the fence register is lost and we need to > - * revoke concurrent userspace access via GTT mmaps until the hardware has been > - * reset and the fence registers have been restored. > - */ > -void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) > -{ > - int i; > - > - lockdep_assert_held(&dev_priv->drm.struct_mutex); > - > - for (i = 0; i < dev_priv->num_fence_regs; i++) { > - struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; > - > - GEM_BUG_ON(fence->vma && fence->vma->fence != fence); > - > - if (fence->vma) > - i915_vma_revoke_mmap(fence->vma); > - } > -} > - > /** > * i915_gem_restore_fences - restore fence state > * @dev_priv: i915 device private > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h > index 53b1f22dd365..4e797c552b96 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.h > +++ b/drivers/gpu/drm/i915/i915_gpu_error.h > @@ -231,12 +231,10 @@ struct i915_gpu_error { > /** > * flags: Control various stages of the GPU reset > * > - * #I915_RESET_BACKOFF - When we start a reset, we want to stop any > - * other users acquiring the struct_mutex. To do this we set the > - * #I915_RESET_BACKOFF bit in the error flags when we detect a reset > - * and then check for that bit before acquiring the struct_mutex (in > - * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a > - * secondary role in preventing two concurrent global reset attempts. > + * #I915_RESET_BACKOFF - When we start a global reset, we need to > + * serialise with any other users attempting to do the same, and > + * any global resources that may be clobber by the reset (such as > + * FENCE registers). > * > * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to > * acquire the struct_mutex to reset an engine, we need an explicit > @@ -272,6 +270,8 @@ struct i915_gpu_error { > */ > wait_queue_head_t reset_queue; > > + struct srcu_struct srcu; > + It is the only one in here so not causing confusion but could have been reset_backoff_srcu; > struct i915_gpu_restart *restart; > }; > > diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c > index 4462007a681c..f58fae457ec6 100644 > --- a/drivers/gpu/drm/i915/i915_reset.c > +++ b/drivers/gpu/drm/i915/i915_reset.c > @@ -639,6 +639,31 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) > engine->reset.prepare(engine); > } > > +static void revoke_mmaps(struct drm_i915_private *i915) > +{ > + int i; > + > + for (i = 0; i < i915->num_fence_regs; i++) { > + struct i915_vma *vma = i915->fence_regs[i].vma; > + struct drm_vma_offset_node *node; > + u64 vma_offset; > + > + if (!vma) > + continue; > + > + GEM_BUG_ON(vma->fence != &i915->fence_regs[i]); > + if (!i915_vma_has_userfault(vma)) > + continue; > + > + node = &vma->obj->base.vma_node; > + vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; > + unmap_mapping_range(i915->drm.anon_inode->i_mapping, > + drm_vma_node_offset_addr(node) + vma_offset, > + vma->size, > + 1); > + } > +} > + > static void reset_prepare(struct drm_i915_private *i915) > { > struct intel_engine_cs *engine; > @@ -648,6 +673,7 @@ static void reset_prepare(struct drm_i915_private *i915) > reset_prepare_engine(engine); > > intel_uc_sanitize(i915); > + revoke_mmaps(i915); > } > > static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask) > @@ -911,50 +937,22 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) > return ret; > } > > -struct __i915_reset { > - struct drm_i915_private *i915; > - unsigned int stalled_mask; > -}; > - > -static int __i915_reset__BKL(void *data) > -{ > - struct __i915_reset *arg = data; > - int err; > - > - err = intel_gpu_reset(arg->i915, ALL_ENGINES); > - if (err) > - return err; > - > - return gt_reset(arg->i915, arg->stalled_mask); > -} > - > -#if RESET_UNDER_STOP_MACHINE > -/* > - * XXX An alternative to using stop_machine would be to park only the > - * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP) > - * we should be able to prevent their memmory accesses via the lost fence > - * registers over the course of the reset without the potential recursive > - * of mutexes between the pagefault handler and reset. > - * > - * See igt/gem_mmap_gtt/hang > - */ > -#define __do_reset(fn, arg) stop_machine(fn, arg, NULL) > -#else > -#define __do_reset(fn, arg) fn(arg) > -#endif > - > static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask) > { > - struct __i915_reset arg = { i915, stalled_mask }; > int err, i; > > - err = __do_reset(__i915_reset__BKL, &arg); > + /* Flush everyone currently using a resource about to be clobbered */ > + synchronize_srcu(&i915->gpu_error.srcu); > + > + err = intel_gpu_reset(i915, ALL_ENGINES); > for (i = 0; err && i < RESET_MAX_RETRIES; i++) { > - msleep(100); > - err = __do_reset(__i915_reset__BKL, &arg); > + msleep(10 * (i + 1)); > + err = intel_gpu_reset(i915, ALL_ENGINES); > } > + if (err) > + return err; > > - return err; > + return gt_reset(i915, stalled_mask); > } > > /** > @@ -1274,9 +1272,12 @@ void i915_handle_error(struct drm_i915_private *i915, > wait_event(i915->gpu_error.reset_queue, > !test_bit(I915_RESET_BACKOFF, > &i915->gpu_error.flags)); > - goto out; > + goto out; /* piggy-back on the other reset */ > } > > + /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ > + synchronize_rcu_expedited(); Is the expedite here to minimize the time the faulted client can try to reaquire? > + > /* Prevent any other reset-engine attempt. */ > for_each_engine(engine, i915, tmp) { > while (test_and_set_bit(I915_RESET_ENGINE + engine->id, > @@ -1300,6 +1301,36 @@ void i915_handle_error(struct drm_i915_private *i915, > intel_runtime_pm_put(i915, wakeref); > } > > +int i915_reset_trylock(struct drm_i915_private *i915) > +{ > + struct i915_gpu_error *error = &i915->gpu_error; > + int srcu; > + > + rcu_read_lock(); > + while (test_bit(I915_RESET_BACKOFF, &error->flags)) { > + rcu_read_unlock(); > + > + if (wait_event_interruptible(error->reset_queue, > + !test_bit(I915_RESET_BACKOFF, > + &error->flags))) > + return -EINTR; > + > + rcu_read_lock(); > + } > + srcu = srcu_read_lock(&error->srcu); In here are we piggybacking the graze period from srcu into the rcu domain by nesting? The srcu is ours, but the rcu is everyone. So this bothers me. -Mika > + rcu_read_unlock(); > + > + return srcu; > +} > + > +void i915_reset_unlock(struct drm_i915_private *i915, int tag) > +__releases(&i915->gpu_error.srcu) > +{ > + struct i915_gpu_error *error = &i915->gpu_error; > + > + srcu_read_unlock(&error->srcu, tag); > +} > + > bool i915_reset_flush(struct drm_i915_private *i915) > { > int err; > diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h > index f2d347f319df..893c5d1c2eb8 100644 > --- a/drivers/gpu/drm/i915/i915_reset.h > +++ b/drivers/gpu/drm/i915/i915_reset.h > @@ -9,6 +9,7 @@ > > #include <linux/compiler.h> > #include <linux/types.h> > +#include <linux/srcu.h> > > struct drm_i915_private; > struct intel_engine_cs; > @@ -32,6 +33,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, > void i915_reset_request(struct i915_request *rq, bool guilty); > bool i915_reset_flush(struct drm_i915_private *i915); > > +int __must_check i915_reset_trylock(struct drm_i915_private *i915); > +void i915_reset_unlock(struct drm_i915_private *i915, int tag); > + > bool intel_has_gpu_reset(struct drm_i915_private *i915); > bool intel_has_reset_engine(struct drm_i915_private *i915); > > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > index 7b6f3bea9ef8..4886fac12628 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > @@ -1039,8 +1039,6 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, > > /* Check that we can recover an unbind stuck on a hanging request */ > > - igt_global_reset_lock(i915); > - > mutex_lock(&i915->drm.struct_mutex); > err = hang_init(&h, i915); > if (err) > @@ -1138,7 +1136,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, > } > > out_reset: > + igt_global_reset_lock(i915); > fake_hangcheck(rq->i915, intel_engine_flag(rq->engine)); > + igt_global_reset_unlock(i915); > > if (tsk) { > struct igt_wedge_me w; > @@ -1159,7 +1159,6 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, > hang_fini(&h); > unlock: > mutex_unlock(&i915->drm.struct_mutex); > - igt_global_reset_unlock(i915); > > if (i915_terminally_wedged(&i915->gpu_error)) > return -EIO; > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > index 14ae46fda49f..074a0d9cbf26 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > @@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void) > > init_waitqueue_head(&i915->gpu_error.wait_queue); > init_waitqueue_head(&i915->gpu_error.reset_queue); > + init_srcu_struct(&i915->gpu_error.srcu); > mutex_init(&i915->gpu_error.wedge_mutex); > > i915->wq = alloc_ordered_workqueue("mock", 0); > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx