Quoting Daniel Vetter (2017-10-05 14:22:06) > 4.14-rc1 gained the fancy new cross-release support in lockdep, which > seems to have uncovered a few more rules about what is allowed and > isn't. > > This one here seems to indicate that allocating a work-queue while > holding mmap_sem is a no-go, so let's try to preallocate it. > > Of course another way to break this chain would be somewhere in the > cpu hotplug code, since this isn't the only trace we're finding now > which goes through msr_create_device. > > Full lockdep splat: > > ====================================================== > WARNING: possible circular locking dependency detected > 4.14.0-rc3-CI-CI_DRM_3179+ #1 Tainted: G U > ------------------------------------------------------ > kworker/3:4/562 is trying to acquire lock: > (cpu_hotplug_lock.rw_sem){++++}, at: [<ffffffff8113d4bc>] stop_machine+0x1c/0x40 > > but task is already holding lock: > (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915] > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #6 (&dev->struct_mutex){+.+.}: > __lock_acquire+0x1420/0x15e0 > lock_acquire+0xb0/0x200 > __mutex_lock+0x86/0x9b0 > mutex_lock_interruptible_nested+0x1b/0x20 > i915_mutex_lock_interruptible+0x51/0x130 [i915] > i915_gem_fault+0x209/0x650 [i915] > __do_fault+0x1e/0x80 > __handle_mm_fault+0xa08/0xed0 > handle_mm_fault+0x156/0x300 > __do_page_fault+0x2c5/0x570 > do_page_fault+0x28/0x250 > page_fault+0x22/0x30 > > -> #5 (&mm->mmap_sem){++++}: > __lock_acquire+0x1420/0x15e0 > lock_acquire+0xb0/0x200 > __might_fault+0x68/0x90 > _copy_to_user+0x23/0x70 > filldir+0xa5/0x120 > dcache_readdir+0xf9/0x170 > iterate_dir+0x69/0x1a0 > SyS_getdents+0xa5/0x140 > entry_SYSCALL_64_fastpath+0x1c/0xb1 > > -> #4 (&sb->s_type->i_mutex_key#5){++++}: > down_write+0x3b/0x70 > handle_create+0xcb/0x1e0 > devtmpfsd+0x139/0x180 > kthread+0x152/0x190 > ret_from_fork+0x27/0x40 > > -> #3 ((complete)&req.done){+.+.}: > __lock_acquire+0x1420/0x15e0 > lock_acquire+0xb0/0x200 > wait_for_common+0x58/0x210 > wait_for_completion+0x1d/0x20 > devtmpfs_create_node+0x13d/0x160 > device_add+0x5eb/0x620 > device_create_groups_vargs+0xe0/0xf0 > device_create+0x3a/0x40 > msr_device_create+0x2b/0x40 > cpuhp_invoke_callback+0xc9/0xbf0 > cpuhp_thread_fun+0x17b/0x240 > smpboot_thread_fn+0x18a/0x280 > kthread+0x152/0x190 > ret_from_fork+0x27/0x40 > > -> #2 (cpuhp_state-up){+.+.}: > __lock_acquire+0x1420/0x15e0 > lock_acquire+0xb0/0x200 > cpuhp_issue_call+0x133/0x1c0 > __cpuhp_setup_state_cpuslocked+0x139/0x2a0 > __cpuhp_setup_state+0x46/0x60 > page_writeback_init+0x43/0x67 > pagecache_init+0x3d/0x42 > start_kernel+0x3a8/0x3fc > x86_64_start_reservations+0x2a/0x2c > x86_64_start_kernel+0x6d/0x70 > verify_cpu+0x0/0xfb > > -> #1 (cpuhp_state_mutex){+.+.}: > __lock_acquire+0x1420/0x15e0 > lock_acquire+0xb0/0x200 > __mutex_lock+0x86/0x9b0 > mutex_lock_nested+0x1b/0x20 > __cpuhp_setup_state_cpuslocked+0x53/0x2a0 > __cpuhp_setup_state+0x46/0x60 > page_alloc_init+0x28/0x30 > start_kernel+0x145/0x3fc > x86_64_start_reservations+0x2a/0x2c > x86_64_start_kernel+0x6d/0x70 > verify_cpu+0x0/0xfb > > -> #0 (cpu_hotplug_lock.rw_sem){++++}: > check_prev_add+0x430/0x840 > __lock_acquire+0x1420/0x15e0 > lock_acquire+0xb0/0x200 > cpus_read_lock+0x3d/0xb0 > stop_machine+0x1c/0x40 > i915_gem_set_wedged+0x1a/0x20 [i915] > i915_reset+0xb9/0x230 [i915] > i915_reset_device+0x1f6/0x260 [i915] > i915_handle_error+0x2d8/0x430 [i915] > hangcheck_declare_hang+0xd3/0xf0 [i915] > i915_hangcheck_elapsed+0x262/0x2d0 [i915] > process_one_work+0x233/0x660 > worker_thread+0x4e/0x3b0 > kthread+0x152/0x190 > ret_from_fork+0x27/0x40 > > other info that might help us debug this: > > Chain exists of: > cpu_hotplug_lock.rw_sem --> &mm->mmap_sem --> &dev->struct_mutex > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&dev->struct_mutex); > lock(&mm->mmap_sem); > lock(&dev->struct_mutex); > lock(cpu_hotplug_lock.rw_sem); > > *** DEADLOCK *** > > 3 locks held by kworker/3:4/562: > #0: ("events_long"){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660 > #1: ((&(&i915->gpu_error.hangcheck_work)->work)){+.+.}, at: [<ffffffff8109c64a>] process_one_work+0x1aa/0x660 > #2: (&dev->struct_mutex){+.+.}, at: [<ffffffffa0136588>] i915_reset_device+0x1e8/0x260 [i915] > > stack backtrace: > CPU: 3 PID: 562 Comm: kworker/3:4 Tainted: G U 4.14.0-rc3-CI-CI_DRM_3179+ #1 > Hardware name: /NUC7i5BNB, BIOS BNKBL357.86A.0048.2017.0704.1415 07/04/2017 > Workqueue: events_long i915_hangcheck_elapsed [i915] > Call Trace: > dump_stack+0x68/0x9f > print_circular_bug+0x235/0x3c0 > ? lockdep_init_map_crosslock+0x20/0x20 > check_prev_add+0x430/0x840 > ? irq_work_queue+0x86/0xe0 > ? wake_up_klogd+0x53/0x70 > __lock_acquire+0x1420/0x15e0 > ? __lock_acquire+0x1420/0x15e0 > ? lockdep_init_map_crosslock+0x20/0x20 > lock_acquire+0xb0/0x200 > ? stop_machine+0x1c/0x40 > ? i915_gem_object_truncate+0x50/0x50 [i915] > cpus_read_lock+0x3d/0xb0 > ? stop_machine+0x1c/0x40 > stop_machine+0x1c/0x40 > i915_gem_set_wedged+0x1a/0x20 [i915] > i915_reset+0xb9/0x230 [i915] > i915_reset_device+0x1f6/0x260 [i915] > ? gen8_gt_irq_ack+0x170/0x170 [i915] > ? work_on_cpu_safe+0x60/0x60 > i915_handle_error+0x2d8/0x430 [i915] > ? vsnprintf+0xd1/0x4b0 > ? scnprintf+0x3a/0x70 > hangcheck_declare_hang+0xd3/0xf0 [i915] > ? intel_runtime_pm_put+0x56/0xa0 [i915] > i915_hangcheck_elapsed+0x262/0x2d0 [i915] > process_one_work+0x233/0x660 > worker_thread+0x4e/0x3b0 > kthread+0x152/0x190 > ? process_one_work+0x660/0x660 > ? kthread_create_on_node+0x40/0x40 > ret_from_fork+0x27/0x40 > Setting dangerous option reset - tainting kernel > i915 0000:00:02.0: Resetting chip after gpu hang > Setting dangerous option reset - tainting kernel > i915 0000:00:02.0: Resetting chip after gpu hang > > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Sasha Levin <alexander.levin@xxxxxxxxxxx> > Signed-off-by: Daniel Vetter <daniel.vetter@xxxxxxxxx> > > -- > > Patch itself pretty much untested, I just want to figure out whether > we should fix this (and similar backtraces going through > msr_create_device) in i915, or whether the cpu hotplug folks will take > care of them all. Looking at the patch, we shrink the time under mmap_sem/mm_lock so seems sensible (just from that pov). > Afaict this is not because of changes in 4.14-rc1, but really only due > to the new cross-release checks. > > Note that the above trace is on top of -rc3 (plus plenty of drm/i915 > stuff), so should have Peter's recent lockdep fixes for cpu up vs. > down already. > -Daniel > --- > drivers/gpu/drm/i915/i915_gem_userptr.c | 65 +++++++++++++++------------------ > 1 file changed, 29 insertions(+), 36 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > index 2d4996de7331..afe166921572 100644 > --- a/drivers/gpu/drm/i915/i915_gem_userptr.c > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > @@ -160,36 +160,6 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = { > .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start, > }; > > -static struct i915_mmu_notifier * > -i915_mmu_notifier_create(struct mm_struct *mm) > -{ > - struct i915_mmu_notifier *mn; > - int ret; > - > - mn = kmalloc(sizeof(*mn), GFP_KERNEL); > - if (mn == NULL) > - return ERR_PTR(-ENOMEM); > - > - spin_lock_init(&mn->lock); > - mn->mn.ops = &i915_gem_userptr_notifier; > - mn->objects = RB_ROOT_CACHED; > - mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); > - if (mn->wq == NULL) { > - kfree(mn); > - return ERR_PTR(-ENOMEM); > - } > - > - /* Protected by mmap_sem (write-lock) */ > - ret = __mmu_notifier_register(&mn->mn, mm); > - if (ret) { > - destroy_workqueue(mn->wq); > - kfree(mn); > - return ERR_PTR(ret); > - } > - > - return mn; > -} > - > static void > i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) > { > @@ -210,23 +180,46 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) > static struct i915_mmu_notifier * > i915_mmu_notifier_find(struct i915_mm_struct *mm) > { > - struct i915_mmu_notifier *mn = mm->mn; > + struct i915_mmu_notifier *mn = mm->mn, *free_mn; > + int ret; > > mn = mm->mn; > if (mn) > return mn; > > + free_mn = kmalloc(sizeof(*free_mn), GFP_KERNEL); > + if (free_mn == NULL) > + return ERR_PTR(-ENOMEM); > + > + spin_lock_init(&free_mn->lock); > + free_mn->mn.ops = &i915_gem_userptr_notifier; > + free_mn->objects = RB_ROOT_CACHED; > + /* must allocate wq outside of mm->mmap_sem */ > + free_mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); > + if (free_mn->wq == NULL) { > + ret = -ENOMEM; > + goto err_kfree; > + } > + > down_write(&mm->mm->mmap_sem); > mutex_lock(&mm->i915->mm_lock); > - if ((mn = mm->mn) == NULL) { > - mn = i915_mmu_notifier_create(mm->mm); > - if (!IS_ERR(mn)) > - mm->mn = mn; > + if (mm->mn == NULL) { > + /* Protected by mmap_sem (write-lock) */ > + ret = __mmu_notifier_register(&free_mn->mn, mm->mm); > + if (ret == 0) > + mm->mn = mn =free_mn; > + } else { > + mn = mm->mn; > } > mutex_unlock(&mm->i915->mm_lock); > up_write(&mm->mm->mmap_sem); > > - return mn; > +err_wq: > + destroy_workqueue(free_mn->wq); > +err_kfree: > + kfree(free_mn); > + > + return ret == 0 ? mn : ERR_PTR(ret); > } I had a go, diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 2d4996de7331..f9b3406401af 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -164,7 +164,6 @@ static struct i915_mmu_notifier * i915_mmu_notifier_create(struct mm_struct *mm) { struct i915_mmu_notifier *mn; - int ret; mn = kmalloc(sizeof(*mn), GFP_KERNEL); if (mn == NULL) @@ -179,14 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm) return ERR_PTR(-ENOMEM); } - /* Protected by mmap_sem (write-lock) */ - ret = __mmu_notifier_register(&mn->mn, mm); - if (ret) { - destroy_workqueue(mn->wq); - kfree(mn); - return ERR_PTR(ret); - } - return mn; } @@ -210,23 +201,37 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) static struct i915_mmu_notifier * i915_mmu_notifier_find(struct i915_mm_struct *mm) { - struct i915_mmu_notifier *mn = mm->mn; + struct i915_mmu_notifier *mn; + int err; mn = mm->mn; if (mn) return mn; + mn = i915_mmu_notifier_create(mm->mm); + if (IS_ERR(mn)) + return mn; + + err = 0; down_write(&mm->mm->mmap_sem); mutex_lock(&mm->i915->mm_lock); - if ((mn = mm->mn) == NULL) { - mn = i915_mmu_notifier_create(mm->mm); - if (!IS_ERR(mn)) - mm->mn = mn; + if (mm->mn == NULL) { + /* Protected by mmap_sem (write-lock) */ + err = __mmu_notifier_register(&mn->mn, mm->mm); + if (!err) { + /* Protected by mm_lock */ + mm->mn = fetch_and_zero(&mn); + } } mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - return mn; + if (mn) { + destroy_workqueue(mn->wq); + kfree(mn); + } + + return err ? ERR_PTR(err) : mm->mn; } So not sold that we want to keep the separate single use i915_mmu_notifier_create() (the onion unwind is pretty solid). But it's safe enough to return mm->mn (it's stable once we've proved it's set) and that allows us to avoid to a second free_mn. I'm prefer the register above as I think it's that touch clearer. In particular, I believe your use of free_mn is buggy as you still free it after assigning it to mm->mn. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx