On Mon, Jul 09, 2018 at 01:31:09PM +0100, David Howells wrote: > Eric Biggers <ebiggers3@xxxxxxxxx> wrote: > > > sys_fsmount() calls fc->ops->free() to free the data, zeroes > > ->fs_private, then proceeds to reuse the context. But legacy_fs_context > > doesn't use ->fs_private, so we need to handle zeroing it too; otherwise > > there's a double free of legacy_fs_context::{legacy_data,secdata}. > > I think the attached is better. I stopped embedding the fs_context in the > xxx_fs_context to make certain things simpler, but I missed the legacy > wrapper. > > David > --- > diff --git a/fs/fs_context.c b/fs/fs_context.c > index f91facc769f7..ab93a0b73dc6 100644 > --- a/fs/fs_context.c > +++ b/fs/fs_context.c > @@ -34,7 +34,6 @@ enum legacy_fs_param { > }; > > struct legacy_fs_context { > - struct fs_context fc; > char *legacy_data; /* Data page for legacy filesystems */ > char *secdata; > size_t data_size; > @@ -239,12 +238,21 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, > enum fs_context_purpose purpose) > { > struct fs_context *fc; > - int ret; > + int ret = -ENOMEM; > > - fc = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); > + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); > if (!fc) > return ERR_PTR(-ENOMEM); > > + if (!fs_type->init_fs_context) { > + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), > + GFP_KERNEL); > + if (!fc->fs_private) > + goto err_fc; > + > + fc->ops = &legacy_fs_context_ops; > + } > + Why isn't this done in the same place that ->init_fs_context() would otherwise be called? It logically does the same thing, right? > fc->purpose = purpose; > fc->sb_flags = sb_flags; > fc->fs_type = get_filesystem(fs_type); > @@ -277,8 +285,6 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, > ret = fc->fs_type->init_fs_context(fc, reference); > if (ret < 0) > goto err_fc; > - } else { > - fc->ops = &legacy_fs_context_ops; > } > > /* Do the security check last because ->init_fs_context may change the > @@ -395,7 +401,7 @@ EXPORT_SYMBOL(put_fs_context); > */ > static void legacy_fs_context_free(struct fs_context *fc) > { > - struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc); > + struct legacy_fs_context *ctx = fc->fs_private; > > free_secdata(ctx->secdata); > switch (ctx->param_type) { > @@ -408,6 +414,8 @@ static void legacy_fs_context_free(struct fs_context *fc) > kfree(ctx->legacy_data); > break; > } > + > + kfree(ctx); > } Okay, but now there's a NULL pointer dereference because fc->ops->free() can be called with NULL fc->fs_private. Probably fc->ops->free() shouldn't be called in that case. int main() { int fd = syscall(__NR_fsopen, "tmpfs", 0); write(fd, "x create", 8); syscall(__NR_fsmount, fd, 0, 0); } BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 1 PID: 186 Comm: fsopen Not tainted 4.18.0-rc1-00001-g0f067bdbfeca0 #29 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 RIP: 0010:legacy_fs_context_free+0xc/0x40 fs/fs_context.c:500 Code: 02 75 08 48 c7 42 08 01 00 00 00 31 c0 c3 c7 42 18 01 00 00 00 31 c0 c3 66 0f 1f 44 00 00 55 48 89 e5 53 48 8b 9f 90 00 00 00 <8b> 4b 18 83 f9 04 77 0c b8 01 00 00 00 48 d3 e0 a8 13 75 08 48 8b RSP: 0018:ffffc9000079bd88 EFLAGS: 00010282 RAX: ffffffff8118fbe0 RBX: 0000000000000000 RCX: 0000000000000001 RDX: ffff88007c82c0f4 RSI: 0000000000000001 RDI: ffff88007be77700 RBP: ffffc9000079bd90 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88007c82c000 R13: 0000000000060003 R14: ffff88007d34d020 R15: ffff88007ab8aea8 FS: 00007fee62b79740(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000001c0f000 CR4: 00000000003406e0 Call Trace: put_fs_context+0x4c/0x180 fs/fs_context.c:479 fscontext_release+0x20/0x30 fs/fsopen.c:196 __fput+0xbb/0x210 fs/file_table.c:210 ____fput+0x9/0x10 fs/file_table.c:246 task_work_run+0x86/0xc0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x27a/0xa30 kernel/exit.c:865 do_group_exit+0x3c/0xc0 kernel/exit.c:968 __do_sys_exit_group kernel/exit.c:979 [inline] __se_sys_exit_group kernel/exit.c:977 [inline] __x64_sys_exit_group+0x13/0x20 kernel/exit.c:977 do_syscall_64+0x4a/0x180 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fee6224eee8 Code: Bad RIP value. RSP: 002b:00007ffc3efc0cd8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fee6224eee8 RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000 RBP: 00007fee625386d8 R08: 00000000000000e7 R09: ffffffffffffff80 R10: 00007fee62745100 R11: 0000000000000246 R12: 00007fee625386d8 R13: 00007fee6253dbe0 R14: 0000000000000000 R15: 0000000000000000 CR2: 0000000000000018 ---[ end trace 8ac26865cb821d07 ]---