Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> writes: > "Zanoni, Paulo R" <paulo.r.zanoni@xxxxxxxxx> writes: > >> Hi >> >> Em Sex, 2016-09-09 às 14:11 +0100, Chris Wilson escreveu: >>> In the next patch we want to handle reset directly by a locked waiter >>> in >>> order to avoid issues with returning before the reset is handled. To >>> handle the reset, we must first know whether we hold the >>> struct_mutex. >>> If we do not hold the struct_mtuex we can not perform the reset, but >>> we do >>> not block the reset worker either (and so we can just continue to >>> wait for >>> request completion) - otherwise we must relinquish the mutex. >>> >>> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >>> Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> >>> --- >>> drivers/gpu/drm/i915/i915_debugfs.c | 4 +++- >>> drivers/gpu/drm/i915/i915_gem.c | 7 +++++-- >>> drivers/gpu/drm/i915/i915_gem_evict.c | 8 ++++++-- >>> drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- >>> drivers/gpu/drm/i915/i915_gem_request.c | 15 ++++++++++++--- >>> drivers/gpu/drm/i915/i915_gem_request.h | 11 ++++++++--- >>> drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- >>> drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++- >>> 8 files changed, 38 insertions(+), 14 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c >>> b/drivers/gpu/drm/i915/i915_debugfs.c >>> index 620e7daa133b..64702cc68e3a 100644 >>> --- a/drivers/gpu/drm/i915/i915_debugfs.c >>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >>> @@ -4803,7 +4803,9 @@ i915_drop_caches_set(void *data, u64 val) >>> return ret; >>> >>> if (val & DROP_ACTIVE) { >>> - ret = i915_gem_wait_for_idle(dev_priv, >>> I915_WAIT_INTERRUPTIBLE); >>> + ret = i915_gem_wait_for_idle(dev_priv, >>> + I915_WAIT_INTERRUPTIBLE >>> | >>> + I915_WAIT_LOCKED); >>> if (ret) >>> goto unlock; >>> } >>> diff --git a/drivers/gpu/drm/i915/i915_gem.c >>> b/drivers/gpu/drm/i915/i915_gem.c >>> index 4617250c3000..23069a2d2850 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem.c >>> +++ b/drivers/gpu/drm/i915/i915_gem.c >>> @@ -2802,7 +2802,8 @@ __i915_gem_object_sync(struct >>> drm_i915_gem_request *to, >>> >>> if (!i915.semaphores) { >>> ret = i915_wait_request(from, >>> - from->i915- >>> >mm.interruptible, >>> + from->i915->mm.interruptible >>> | >>> + I915_WAIT_LOCKED, >>> NULL, >>> NO_WAITBOOST); >>> if (ret) >>> @@ -4304,7 +4305,9 @@ int i915_gem_suspend(struct drm_device *dev) >>> if (ret) >>> goto err; >>> >>> - ret = i915_gem_wait_for_idle(dev_priv, >>> I915_WAIT_INTERRUPTIBLE); >>> + ret = i915_gem_wait_for_idle(dev_priv, >>> + I915_WAIT_INTERRUPTIBLE | >>> + I915_WAIT_LOCKED); >>> if (ret) >>> goto err; >>> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c >>> b/drivers/gpu/drm/i915/i915_gem_evict.c >>> index 103085246975..5b6f81c1dbca 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c >>> @@ -170,7 +170,9 @@ search_again: >>> if (ret) >>> return ret; >>> >>> - ret = i915_gem_wait_for_idle(dev_priv, >>> I915_WAIT_INTERRUPTIBLE); >>> + ret = i915_gem_wait_for_idle(dev_priv, >>> + I915_WAIT_INTERRUPTIBLE | >>> + I915_WAIT_LOCKED); >>> if (ret) >>> return ret; >>> >>> @@ -275,7 +277,9 @@ int i915_gem_evict_vm(struct i915_address_space >>> *vm, bool do_idle) >>> return ret; >>> } >>> >>> - ret = i915_gem_wait_for_idle(dev_priv, >>> I915_WAIT_INTERRUPTIBLE); >>> + ret = i915_gem_wait_for_idle(dev_priv, >>> + I915_WAIT_INTERRUPTIBLE >>> | >>> + I915_WAIT_LOCKED); >>> if (ret) >>> return ret; >>> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c >>> b/drivers/gpu/drm/i915/i915_gem_gtt.c >>> index 9bcac52b8268..f3c6876da521 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >>> @@ -2683,7 +2683,7 @@ void i915_gem_gtt_finish_object(struct >>> drm_i915_gem_object *obj) >>> struct i915_ggtt *ggtt = &dev_priv->ggtt; >>> >>> if (unlikely(ggtt->do_idle_maps)) { >>> - if (i915_gem_wait_for_idle(dev_priv, 0)) { >>> + if (i915_gem_wait_for_idle(dev_priv, >>> I915_WAIT_LOCKED)) { >>> DRM_ERROR("Failed to wait for idle; VT'd may >>> hang.\n"); >>> /* Wait a bit, in hopes it avoids the hang >>> */ >>> udelay(10); >>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c >>> b/drivers/gpu/drm/i915/i915_gem_request.c >>> index f4c15f319d08..5f89801e6a16 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_request.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_request.c >>> @@ -260,7 +260,9 @@ static int i915_gem_init_seqno(struct >>> drm_i915_private *dev_priv, u32 seqno) >>> >>> /* Carefully retire all requests without writing to the >>> rings */ >>> for_each_engine(engine, dev_priv) { >>> - ret = intel_engine_idle(engine, >>> I915_WAIT_INTERRUPTIBLE); >>> + ret = intel_engine_idle(engine, >>> + I915_WAIT_INTERRUPTIBLE | >>> + I915_WAIT_LOCKED); >>> if (ret) >>> return ret; >>> } >>> @@ -625,6 +627,10 @@ int i915_wait_request(struct >>> drm_i915_gem_request *req, >>> int ret = 0; >>> >>> might_sleep(); >>> +#if IS_ENABLED(CONFIG_LOCKDEP) >>> + GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) >>> != >>> + !!(flags & I915_WAIT_LOCKED)); >>> +#endif >> >> I'm hitting this on my SKL. It usually happens right after the login, >> when I click the terminal icon on the toolbar in order to open it (on >> Cinnamon). When it doesn't happen at that time, I just open a browser >> and browse for like 30 seconds until it happens. >> >> I did manual reverts of this series up to this patch and obviously the >> problem goes away (no more GEM_BUG_ON to hit). I didn't try to do a >> real bisect to check if this is the first bad commit or if it's >> something later on the series. It could even be an older problem that >> just got exposed by the new BUG(). I'm available to test patches, just >> send them to me. > > No patches yet but there is a comment on > intel_prepare_plane_fb() that says that it must be called with mutex > held. > > Quite likely the new GEM_BUG_ON catches this not being true. > As Chris pointed out in irc, its about the reverse. Waiting with mutex when you shouldn't. -Mika > -Mika > >> >> Here's the backtrace: >> >> [ 71.341272] ------------[ cut here ]------------ >> [ 71.341305] kernel BUG at >> drivers/gpu/drm/i915/i915_gem_request.c:763! >> [ 71.341331] invalid opcode: 0000 [#1] SMP >> [ 71.341348] Modules linked in: snd_hda_codec_hdmi >> snd_hda_codec_realtek snd_hda_codec_generic snd_soc_skl snd_soc_skl_ipc >> snd_soc_sst_match snd_hda_ext_core snd_soc_sst_dsp snd_soc_sst_ipc >> nls_utf8 nls_cp437 vfat fat efi_pstore efivars iwlmvm sg snd_hda_intel >> i915 iwlwifi snd_hda_codec snd_hwdep snd_hda_core drm_kms_helper idma64 >> virt_dma syscopyarea sysfillrect sysimgblt fb_sys_fops drm >> intel_lpss_pci intel_pch_thermal wmi intel_lpss_acpi intel_lpss >> intel_hid acpi_pad sparse_keymap sdhci_pci pinctrl_sunrisepoint >> pinctrl_intel >> [ 71.341608] CPU: 3 PID: 3025 Comm: Xorg Tainted: >> G U 4.8.0-rc5pz+ #52 >> [ 71.341637] Hardware name: Intel Corporation Skylake Client >> platform/Skylake Y LPDDR3 RVP3, BIOS SKLSE2R1.86C.B074.R00.1502240607 >> 02/24/2015 >> [ 71.341684] task: ffff880165006140 task.stack: ffff880164394000 >> [ 71.341708] RIP: 0010:[<ffffffffa01d0377>] [<ffffffffa01d0377>] >> i915_wait_request+0x417/0x8e0 [i915] >> [ 71.341775] RSP: 0018:ffff880164397928 EFLAGS: 00010202 >> [ 71.341795] RAX: 0000000000000000 RBX: 0000000000000000 RCX: >> ffffffffffffffff >> [ 71.341821] RDX: 0000000000000401 RSI: ffff8801658900d0 RDI: >> ffff8801650068d8 >> [ 71.341849] RBP: ffff8801643979d8 R08: 0000000000000003 R09: >> 0000000000000000 >> [ 71.341875] R10: 0000000000000001 R11: 0000000000000000 R12: >> 0000000000000001 >> [ 71.341913] R13: 0000000000000000 R14: 0000000000000001 R15: >> ffff880159b125c0 >> [ 71.341940] FS: 00007fa4922eca40(0000) GS:ffff88016d4c0000(0000) >> knlGS:0000000000000000 >> [ 71.341970] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> [ 71.341992] CR2: 0000560b99a07000 CR3: 00000001641eb000 CR4: >> 00000000003406e0 >> [ 71.342019] DR0: 0000000000000000 DR1: 0000000000000000 DR2: >> 0000000000000000 >> [ 71.342046] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: >> 0000000000000400 >> [ 71.342073] Stack: >> [ 71.342083] 0000000000000046 0000000000000004 ffff880100000020 >> ffffffff827b40e0 >> [ 71.342122] ffffffffffffffff ffff880165006888 0000000000000001 >> 0000000000000000 >> [ 71.342156] ffff880165006140 ffffffff810d5560 ffff880164397978 >> ffff880164397978 >> [ 71.342190] Call Trace: >> [ 71.342205] [<ffffffff810d5560>] ? prepare_to_wait_event+0xf0/0xf0 >> [ 71.342267] [<ffffffffa01d0898>] i915_fence_wait+0x58/0xb0 [i915] >> [ 71.342294] [<ffffffff8155e6cf>] fence_wait_timeout+0x8f/0x2a0 >> [ 71.342317] [<ffffffff815601cc>] >> reservation_object_wait_timeout_rcu+0x1bc/0x570 >> [ 71.342346] [<ffffffff815600af>] ? >> reservation_object_wait_timeout_rcu+0x9f/0x570 >> [ 71.342399] [<ffffffffa0218fca>] intel_prepare_plane_fb+0xca/0x2f0 >> [i915] >> [ 71.342426] [<ffffffff81121247>] ? __module_address+0x27/0xf0 >> [ 71.342455] [<ffffffffa00e3521>] >> drm_atomic_helper_prepare_planes+0x61/0xf0 [drm_kms_helper] >> [ 71.342510] [<ffffffffa02115ae>] intel_atomic_commit+0x25e/0x4e0 >> [i915] >> [ 71.342549] [<ffffffffa00929d2>] drm_atomic_commit+0x52/0x60 [drm] >> [ 71.342592] [<ffffffffa00e4487>] >> drm_atomic_helper_update_plane+0xe7/0x130 [drm_kms_helper] >> [ 71.342633] [<ffffffffa0084e19>] __setplane_internal+0x1b9/0x2a0 >> [drm] >> [ 71.342667] [<ffffffffa0085017>] >> drm_mode_cursor_universal+0x117/0x200 [drm] >> [ 71.342703] [<ffffffffa0085184>] drm_mode_cursor_common+0x84/0x180 >> [drm] >> [ 71.342738] [<ffffffffa0086a99>] drm_mode_cursor2_ioctl+0x9/0x10 >> [drm] >> [ 71.342793] [<ffffffffa007be49>] drm_ioctl+0x389/0x4e0 [drm] >> [ 71.342823] [<ffffffffa0086a90>] ? drm_mode_cursor_ioctl+0x40/0x40 >> [drm] >> [ 71.342850] [<ffffffff8124ea49>] ? mntput_no_expire+0x89/0x3c0 >> [ 71.342873] [<ffffffff8124e9c0>] ? mnt_get_count+0x60/0x60 >> [ 71.342896] [<ffffffff8123e81f>] do_vfs_ioctl+0x8f/0x690 >> [ 71.342926] [<ffffffff810ad784>] ? task_work_run+0x94/0xb0 >> [ 71.342948] [<ffffffff810df24d>] ? >> trace_hardirqs_on_caller+0xed/0x1b0 >> [ 71.342974] [<ffffffff8123ee94>] SyS_ioctl+0x74/0x80 >> [ 71.342995] [<ffffffff8193f8e5>] >> entry_SYSCALL_64_fastpath+0x18/0xa8 >> [ 71.343019] Code: f0 e0 48 85 db 74 18 48 8b 03 48 8b 7b 08 48 83 c3 >> 18 4c 89 fe ff d0 48 8b 03 48 85 c0 75 eb 65 ff 0d 5e c1 e3 5f e9 3a fe >> ff ff <0f> 0b 65 48 8b 04 25 40 c5 00 00 48 c7 80 a8 11 00 00 79 03 1d >> [ 71.343195] RIP [<ffffffffa01d0377>] i915_wait_request+0x417/0x8e0 >> [i915] >> [ 71.343246] RSP <ffff880164397928> >> [ 71.366786] ---[ end trace 715dbae717d2187f ]--- >> >> Remember that this makes the desktop 100% unusable. >> >> Thanks, >> Paulo >> >>> >>> if (i915_gem_request_completed(req)) >>> return 0; >>> @@ -667,7 +673,8 @@ int i915_wait_request(struct drm_i915_gem_request >>> *req, >>> goto complete; >>> >>> set_current_state(state); >>> - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); >>> + if (flags & I915_WAIT_LOCKED) >>> + add_wait_queue(&req->i915->gpu_error.wait_queue, >>> &reset); >>> >>> intel_wait_init(&wait, req->fence.seqno); >>> if (intel_engine_add_wait(req->engine, &wait)) >>> @@ -707,10 +714,12 @@ wakeup: >>> if (i915_spin_request(req, state, 2)) >>> break; >>> } >>> - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); >>> >>> intel_engine_remove_wait(req->engine, &wait); >>> + if (flags & I915_WAIT_LOCKED) >>> + remove_wait_queue(&req->i915->gpu_error.wait_queue, >>> &reset); >>> __set_current_state(TASK_RUNNING); >>> + >>> complete: >>> trace_i915_gem_request_wait_end(req); >>> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h >>> b/drivers/gpu/drm/i915/i915_gem_request.h >>> index 479896ef791e..def35721e9ed 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_request.h >>> +++ b/drivers/gpu/drm/i915/i915_gem_request.h >>> @@ -222,7 +222,8 @@ int i915_wait_request(struct drm_i915_gem_request >>> *req, >>> s64 *timeout, >>> struct intel_rps_client *rps) >>> __attribute__((nonnull(1))); >>> -#define I915_WAIT_INTERRUPTIBLE BIT(0) >>> +#define I915_WAIT_INTERRUPTIBLE BIT(0) >>> +#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle >>> GPU reset */ >>> >>> static inline u32 intel_engine_get_seqno(struct intel_engine_cs >>> *engine); >>> >>> @@ -576,7 +577,9 @@ i915_gem_active_wait(const struct i915_gem_active >>> *active, struct mutex *mutex) >>> if (!request) >>> return 0; >>> >>> - return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, >>> NULL, NULL); >>> + return i915_wait_request(request, >>> + I915_WAIT_INTERRUPTIBLE | >>> I915_WAIT_LOCKED, >>> + NULL, NULL); >>> } >>> >>> /** >>> @@ -639,7 +642,9 @@ i915_gem_active_retire(struct i915_gem_active >>> *active, >>> if (!request) >>> return 0; >>> >>> - ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, >>> NULL, NULL); >>> + ret = i915_wait_request(request, >>> + I915_WAIT_INTERRUPTIBLE | >>> I915_WAIT_LOCKED, >>> + NULL, NULL); >>> if (ret) >>> return ret; >>> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c >>> b/drivers/gpu/drm/i915/i915_gem_shrinker.c >>> index 35a05f4c51c1..1c237d02f30b 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c >>> @@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, >>> unsigned long event, void *ptr >>> return NOTIFY_DONE; >>> >>> /* Force everything onto the inactive lists */ >>> - ret = i915_gem_wait_for_idle(dev_priv, 0); >>> + ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); >>> if (ret) >>> goto out; >>> >>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c >>> b/drivers/gpu/drm/i915/intel_ringbuffer.c >>> index a5bf18877068..8687a84a7ff3 100644 >>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c >>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c >>> @@ -2223,7 +2223,8 @@ static int wait_for_space(struct >>> drm_i915_gem_request *req, int bytes) >>> if (WARN_ON(&target->ring_link == &ring->request_list)) >>> return -ENOSPC; >>> >>> - ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, >>> + ret = i915_wait_request(target, >>> + I915_WAIT_INTERRUPTIBLE | >>> I915_WAIT_LOCKED, >>> NULL, NO_WAITBOOST); >>> if (ret) >>> return ret; >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx