Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > igt_mmap_offset_exhaustion() wants to test what happens when the mmap > space is filled with zombie objects, objects discarded by userspace but > still active on the GPU. As they are only protected by the active > reference, we have to be certain that active reference is kept while we > peek into our dangling pointer. That active reference should not be > freed until we retire, but we do that retirement from a background > thread. This leaves us with a subtle timing problem, exacerbated and > highlighted by KASAN: > > <3>[ 132.380399] BUG: KASAN: use-after-free in drm_gem_create_mmap_offset+0x8c/0xd0 > <3>[ 132.380430] Read of size 8 at addr ffff8801e13245f8 by task drv_selftest/5822 > > <4>[ 132.380470] CPU: 0 PID: 5822 Comm: drv_selftest Tainted: G U 4.18.0-rc3-g7ae7763aa2be-kasan_48+ #1 > <4>[ 132.380473] Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 > <4>[ 132.380475] Call Trace: > <4>[ 132.380481] dump_stack+0x7c/0xbb > <4>[ 132.380487] print_address_description+0x65/0x270 > <4>[ 132.380493] kasan_report+0x25b/0x380 > <4>[ 132.380497] ? drm_gem_create_mmap_offset+0x8c/0xd0 > <4>[ 132.380503] drm_gem_create_mmap_offset+0x8c/0xd0 > <4>[ 132.380584] i915_gem_object_create_mmap_offset+0x6d/0x100 [i915] > <4>[ 132.380650] igt_mmap_offset_exhaustion+0x462/0x940 [i915] > <4>[ 132.380714] ? i915_gem_close_object+0x740/0x740 [i915] > <4>[ 132.380784] ? igt_gem_huge+0x269/0x3d0 [i915] > <4>[ 132.380865] __i915_subtests+0x5a/0x160 [i915] > <4>[ 132.380936] __run_selftests+0x1a2/0x2f0 [i915] > <4>[ 132.381008] i915_live_selftests+0x4e/0x80 [i915] > <4>[ 132.381071] i915_pci_probe+0xd8/0x1b0 [i915] > <4>[ 132.381077] pci_device_probe+0x1c5/0x3a0 > <4>[ 132.381087] driver_probe_device+0x6b6/0xcb0 > <4>[ 132.381094] __driver_attach+0x22d/0x2c0 > <4>[ 132.381100] ? driver_probe_device+0xcb0/0xcb0 > <4>[ 132.381103] bus_for_each_dev+0x113/0x1a0 > <4>[ 132.381108] ? check_flags.part.24+0x450/0x450 > <4>[ 132.381112] ? subsys_dev_iter_exit+0x10/0x10 > <4>[ 132.381123] bus_add_driver+0x38b/0x6e0 > <4>[ 132.381131] driver_register+0x189/0x400 > <4>[ 132.381136] ? 0xffffffffc12d8000 > <4>[ 132.381140] do_one_initcall+0xa0/0x4c0 > <4>[ 132.381145] ? initcall_blacklisted+0x180/0x180 > <4>[ 132.381152] ? do_init_module+0x4a/0x54c > <4>[ 132.381156] ? rcu_lockdep_current_cpu_online+0xdc/0x130 > <4>[ 132.381161] ? kasan_unpoison_shadow+0x30/0x40 > <4>[ 132.381169] do_init_module+0x1b5/0x54c > <4>[ 132.381177] load_module+0x619e/0x9b70 > <4>[ 132.381202] ? module_frob_arch_sections+0x20/0x20 > <4>[ 132.381211] ? vfs_read+0x257/0x2f0 > <4>[ 132.381214] ? vfs_read+0x257/0x2f0 > <4>[ 132.381221] ? kernel_read+0x8b/0x130 > <4>[ 132.381231] ? copy_strings_kernel+0x120/0x120 > <4>[ 132.381244] ? __se_sys_finit_module+0x17c/0x1a0 > <4>[ 132.381248] __se_sys_finit_module+0x17c/0x1a0 > <4>[ 132.381252] ? __ia32_sys_init_module+0xa0/0xa0 > <4>[ 132.381261] ? __se_sys_newstat+0x77/0xd0 > <4>[ 132.381265] ? cp_new_stat+0x590/0x590 > <4>[ 132.381269] ? kmem_cache_free+0x2f0/0x340 > <4>[ 132.381285] do_syscall_64+0x97/0x400 > <4>[ 132.381292] entry_SYSCALL_64_after_hwframe+0x49/0xbe > <4>[ 132.381295] RIP: 0033:0x7eff4af46839 > <4>[ 132.381297] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 > <4>[ 132.381426] RSP: 002b:00007ffcd84f4cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 > <4>[ 132.381432] RAX: ffffffffffffffda RBX: 000055dfdeb429a0 RCX: 00007eff4af46839 > <4>[ 132.381435] RDX: 0000000000000000 RSI: 000055dfdeb43670 RDI: 0000000000000004 > <4>[ 132.381437] RBP: 000055dfdeb43670 R08: 0000000000000004 R09: 0000000000000000 > <4>[ 132.381440] R10: 00007ffcd84f4e60 R11: 0000000000000246 R12: 0000000000000000 > <4>[ 132.381442] R13: 000055dfdeb3bec0 R14: 0000000000000000 R15: 000000000000003b > > <3>[ 132.381466] Allocated by task 5822: > <4>[ 132.381485] kmem_cache_alloc+0xdf/0x2e0 > <4>[ 132.381546] i915_gem_object_create_internal+0x24/0x1e0 [i915] > <4>[ 132.381609] igt_mmap_offset_exhaustion+0x257/0x940 [i915] > <4>[ 132.381677] __i915_subtests+0x5a/0x160 [i915] > <4>[ 132.381742] __run_selftests+0x1a2/0x2f0 [i915] > <4>[ 132.381806] i915_live_selftests+0x4e/0x80 [i915] > <4>[ 132.381865] i915_pci_probe+0xd8/0x1b0 [i915] > <4>[ 132.381868] pci_device_probe+0x1c5/0x3a0 > <4>[ 132.381871] driver_probe_device+0x6b6/0xcb0 > <4>[ 132.381874] __driver_attach+0x22d/0x2c0 > <4>[ 132.381877] bus_for_each_dev+0x113/0x1a0 > <4>[ 132.381880] bus_add_driver+0x38b/0x6e0 > <4>[ 132.381884] driver_register+0x189/0x400 > <4>[ 132.381886] do_one_initcall+0xa0/0x4c0 > <4>[ 132.381889] do_init_module+0x1b5/0x54c > <4>[ 132.381892] load_module+0x619e/0x9b70 > <4>[ 132.381895] __se_sys_finit_module+0x17c/0x1a0 > <4>[ 132.381898] do_syscall_64+0x97/0x400 > <4>[ 132.381901] entry_SYSCALL_64_after_hwframe+0x49/0xbe > > <3>[ 132.381914] Freed by task 150: > <4>[ 132.381931] kmem_cache_free+0xb7/0x340 > <4>[ 132.381995] __i915_gem_free_objects+0x875/0xf50 [i915] > <4>[ 132.382054] __i915_gem_free_work+0x69/0xb0 [i915] > <4>[ 132.382058] process_one_work+0x78b/0x1740 > <4>[ 132.382061] worker_thread+0x82/0xb80 > <4>[ 132.382064] kthread+0x30c/0x3d0 > <4>[ 132.382067] ret_from_fork+0x3a/0x50 > > <3>[ 132.382081] The buggy address belongs to the object at ffff8801e1324500 > which belongs to the cache drm_i915_gem_object of size 1168 > <3>[ 132.382133] The buggy address is located 248 bytes inside of > 1168-byte region [ffff8801e1324500, ffff8801e1324990) > <3>[ 132.382179] The buggy address belongs to the page: > <0>[ 132.382202] page:ffffea000784c800 count:1 mapcount:0 mapping:ffff8801dedf6500 index:0xffff8801e1323ec0 compound_mapcount: 0 > <0>[ 132.382251] flags: 0x8000000000008100(slab|head) > <1>[ 132.382274] raw: 8000000000008100 ffff8801d6317440 ffff8801d6317440 ffff8801dedf6500 > <1>[ 132.382307] raw: ffff8801e1323ec0 0000000000140013 00000001ffffffff 0000000000000000 > <1>[ 132.382339] page dumped because: kasan: bad access detected > > <3>[ 132.382373] Memory state around the buggy address: > <3>[ 132.382395] ffff8801e1324480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > <3>[ 132.382426] ffff8801e1324500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > <3>[ 132.382457] >ffff8801e1324580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > <3>[ 132.382488] ^ > <3>[ 132.382517] ffff8801e1324600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > <3>[ 132.382548] ffff8801e1324680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > > This patch tricks the system into running without the background retire > thread, until after we finish the test. The only reaping should then be > performed by the mmap offset routine to reclaim the space as required. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > .../gpu/drm/i915/selftests/i915_gem_object.c | 19 ++++++++++++++++++- > 1 file changed, 18 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c > index f4a5099c75b5..d77acf4cc439 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c > @@ -507,6 +507,15 @@ static int igt_mmap_offset_exhaustion(void *arg) > u64 hole_start, hole_end; > int loop, err; > > + /* Disable background reaper */ disable|enable_reaper, but I have cream in my coffee and perhaps we do it if another test ever wants this. > + mutex_lock(&i915->drm.struct_mutex); > + if (!i915->gt.active_requests++) > + i915_gem_unpark(i915); > + mutex_unlock(&i915->drm.struct_mutex); > + cancel_delayed_work_sync(&i915->gt.retire_work); As the retire work can yield on mutex, you could lift it inside mutex and do GEM_BUG_ON(!cancel_delayed...), if you want to enforce the coupling. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > + cancel_delayed_work_sync(&i915->gt.idle_work); > + GEM_BUG_ON(!i915->gt.awake); > + > /* Trim the device mmap space to only a page */ > memset(&resv, 0, sizeof(resv)); > drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { > @@ -515,7 +524,7 @@ static int igt_mmap_offset_exhaustion(void *arg) > err = drm_mm_reserve_node(mm, &resv); > if (err) { > pr_err("Failed to trim VMA manager, err=%d\n", err); > - return err; > + goto out_park; > } > break; > } > @@ -576,6 +585,7 @@ static int igt_mmap_offset_exhaustion(void *arg) > goto err_obj; > } > > + /* NB we rely on the _active_ reference to access obj now */ > GEM_BUG_ON(!i915_gem_object_is_active(obj)); > err = i915_gem_object_create_mmap_offset(obj); > if (err) { > @@ -587,6 +597,13 @@ static int igt_mmap_offset_exhaustion(void *arg) > > out: > drm_mm_remove_node(&resv); > +out_park: > + mutex_lock(&i915->drm.struct_mutex); > + if (--i915->gt.active_requests) > + queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); > + else > + queue_delayed_work(i915->wq, &i915->gt.idle_work, 0); > + mutex_unlock(&i915->drm.struct_mutex); > return err; > err_obj: > i915_gem_object_put(obj); > -- > 2.18.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx