On Wed, 23 Dec 2015 11:37:39 +0100 Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote: > Hello, > > The following program triggers > WARN_ON_ONCE(!list_empty(&ctx->flc_posix)) warning in > locks_free_lock_context (run it in a loop): > > // autogenerated by syzkaller (http://github.com/google/syzkaller) > #include <unistd.h> > #include <sys/syscall.h> > #include <string.h> > #include <stdint.h> > #include <pthread.h> > > #ifndef SYS_memfd_create > #define SYS_memfd_create 319 > #endif > > long r[15]; > long done[14]; > > void *thr(void *arg) > { > if (rand()%2) > usleep(100); > > switch ((long)arg) { > case 0: > r[0] = syscall(SYS_mmap, 0x20000000ul, 0x5000ul, > 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul); > break; > case 1: > memcpy((void*)0x20000c49, > "\xb6\x70\x70\x70\x31\x73\x65\x63\x75\x72\x69\x74\x79\x9e\x00", 15); > r[2] = syscall(SYS_memfd_create, 0x20000c49ul, 0x3ul, > 0, 0, 0, 0); > break; > case 2: > r[3] = syscall(SYS_socketpair, 0x1ul, 0x1ul, 0x0ul, > 0x20001000ul, 0, 0); > if (r[3] != -1) > r[4] = *(uint32_t*)0x20001000; > if (r[3] != -1) > r[5] = *(uint32_t*)0x20001004; > break; > case 3: > *(uint16_t*)0x20000000 = (uint16_t)0x0; > *(uint16_t*)0x20000002 = (uint16_t)0x1; > *(uint64_t*)0x20000008 = (uint64_t)0x6; > *(uint64_t*)0x20000010 = (uint64_t)0xad; > *(uint32_t*)0x20000018 = (uint32_t)0x0; > r[11] = syscall(SYS_fcntl, r[5], 0x7ul, 0x20000000ul, 0, 0, 0); > break; > case 4: > r[12] = syscall(SYS_write, r[5], 0x200006cbul, > 0x1000ul, 0, 0, 0); > break; > case 5: > r[13] = syscall(SYS_close, r[5], 0, 0, 0, 0, 0); > break; > case 6: > r[14] = syscall(SYS_dup2, r[2], r[4], 0, 0, 0, 0); > break; > } > done[(long)arg] = 1; > return 0; > } > > int main() > { > long i, j; > pthread_t th[14]; > > srand(time(0)+getpid()); > memset(r, -1, sizeof(r)); > for (i = 0; i < 7; i++) { > pthread_create(&th[i], 0, thr, (void*)i); > for (j = 0; j < 10; j++) { > if (done[i]) > break; > usleep(100); > } > } > for (i = 0; i < 7; i++) > done[i] = 0; > for (i = 0; i < 7; i++) { > pthread_create(&th[7+i], 0, thr, (void*)i); > if (rand()%2) > continue; > for (j = 0; j < 10; j++) { > if (done[i]) > break; > usleep(100); > } > } > usleep(100); > return 0; > } > > > ------------[ cut here ]------------ > WARNING: CPU: 3 PID: 1975 at fs/locks.c:241 > locks_free_lock_context+0x118/0x180() > Modules linked in: > CPU: 3 PID: 1975 Comm: a.out Not tainted 4.4.0-rc6+ #173 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > 00000000ffffffff ffff880068e67bf8 ffffffff82899ffd 0000000000000000 > ffff88006130af00 ffffffff85e17d60 ffff880068e67c38 ffffffff812ebbb9 > ffffffff818162d8 ffffffff85e17d60 00000000000000f1 ffff8800685c2828 > Call Trace: > [< inline >] __dump_stack lib/dump_stack.c:15 > [<ffffffff82899ffd>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50 > [<ffffffff812ebbb9>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:460 > [<ffffffff812ebde9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:493 > [<ffffffff818162d8>] locks_free_lock_context+0x118/0x180 fs/locks.c:241 > [<ffffffff81765783>] __destroy_inode+0x1d3/0x4d0 fs/inode.c:228 > [<ffffffff81765acb>] destroy_inode+0x4b/0x120 fs/inode.c:253 > [<ffffffff81765ec0>] evict+0x320/0x4f0 fs/inode.c:559 > [< inline >] iput_final fs/inode.c:1477 > [<ffffffff817665dc>] iput+0x45c/0x850 fs/inode.c:1504 > [< inline >] dentry_iput fs/dcache.c:358 > [<ffffffff81757237>] __dentry_kill+0x457/0x620 fs/dcache.c:543 > [< inline >] dentry_kill fs/dcache.c:587 > [<ffffffff8175c499>] dput+0x659/0x740 fs/dcache.c:796 > [<ffffffff817162fc>] __fput+0x42c/0x780 fs/file_table.c:226 > [<ffffffff817166d5>] ____fput+0x15/0x20 fs/file_table.c:244 > [<ffffffff8134679b>] task_work_run+0x16b/0x200 kernel/task_work.c:115 > [< inline >] tracehook_notify_resume include/linux/tracehook.h:191 > [<ffffffff81003990>] exit_to_usermode_loop+0x180/0x1a0 > arch/x86/entry/common.c:251 > [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:282 > [<ffffffff8100631f>] syscall_return_slowpath+0x19f/0x210 > arch/x86/entry/common.c:344 > [<ffffffff85ccea22>] int_ret_from_sys_call+0x25/0x9f > arch/x86/entry/entry_64.S:281 > ---[ end trace 2dde0624dd974a19 ]--- > > > On commit 4ef7675344d687a0ef5b0d7c0cee12da005870c0 (Dec 20). Ooh, nice catch...and just in time for Christmas. filp_close does this after the fd has been detached from the file table in __close_fd: if (likely(!(filp->f_mode & FMODE_PATH))) { dnotify_flush(filp, id); locks_remove_posix(filp, id); } fput(filp); ...and fcntl_setlk does this: /* * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ /* * we need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in close(). * rcu_read_lock() wouldn't do. */ spin_lock(¤t->files->file_lock); f = fcheck(fd); spin_unlock(¤t->files->file_lock); if (!error && f != filp && flock.l_type != F_UNLCK) { flock.l_type = F_UNLCK; goto again; } ...so in principle that should keep new locks from racing onto the list just after we call filp_close. Hmm...I'll see if I can reproduce and figure out how this could happen. Thanks, -- Jeff Layton <jlayton@xxxxxxxxxxxxxxx> -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html